/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/sunddi.h>
#include <sys/byteorder.h>
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/kiconv.h>
#include <sys/u8_textprep.h>
#include <sys/kiconv_cck_common.h>
#include <sys/kiconv_sc.h>
#include <sys/kiconv_gb18030_utf8.h>
#include <sys/kiconv_gb2312_utf8.h>
#include <sys/kiconv_utf8_gb18030.h>
#include <sys/kiconv_utf8_gb2312.h>
static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
uchar_t *obtail, size_t *ret_val);
static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
size_t *ret_val, boolean_t isgbk4);
static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret);
static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret);
static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret);
#define KICONV_SC_GB18030 (0x01)
#define KICONV_SC_GBK (0x02)
#define KICONV_SC_EUCCN (0x03)
#define KICONV_SC_MAX_MAGIC_ID (0x03)
static void *
open_fr_gb18030()
{
return ((void *)KICONV_SC_GB18030);
}
static void *
open_fr_gbk()
{
return ((void *)KICONV_SC_GBK);
}
static void *
open_fr_euccn()
{
return ((void *)KICONV_SC_EUCCN);
}
static int
close_fr_sc(void *s)
{
if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
return (EBADF);
return (0);
}
/*
* Encoding convertor from UTF-8 to GB18030.
*/
size_t
kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_gb18030);
}
/*
* String based encoding convertor from UTF-8 to GB18030.
*/
size_t
kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
}
/*
* Encoding convertor from GB18030 to UTF-8.
*/
size_t
kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
size_t ret_val;
int8_t sz;
uint32_t gb_val;
boolean_t isgbk4;
/* Check on the kiconv code conversion descriptor. */
if (kcd == NULL || kcd == (void *)-1) {
*errno = EBADF;
return ((size_t)-1);
}
/* If this is a state reset request, process and return. */
if (inbuf == NULL || *inbuf == NULL) {
return (0);
}
ret_val = 0;
ib = (uchar_t *)*inbuf;
ob = (uchar_t *)*outbuf;
ibtail = ib + *inbytesleft;
obtail = ob + *outbytesleft;
while (ib < ibtail) {
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
/*
* Issue EILSEQ error if the first byte is not a
* valid GB18030 leading byte.
*/
if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
isgbk4 = (ibtail - ib < 2) ? B_FALSE :
KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
if (isgbk4) {
if (ibtail - ib < 4) {
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
gb_val = (uint32_t)(*ib) << 24 |
(uint32_t)(*(ib + 1)) << 16 |
(uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
} else {
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
}
sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += isgbk4 ? 4 : 2;
ob += sz;
}
*inbuf = (char *)ib;
*inbytesleft = ibtail - ib;
*outbuf = (char *)ob;
*outbytesleft = obtail - ob;
return (ret_val);
}
/*
* String based encoding convertor from GB18030 to UTF-8.
*/
size_t
kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
uchar_t *oldib;
size_t ret_val;
int8_t sz;
uint32_t gb_val;
boolean_t isgbk4;
boolean_t do_not_ignore_null;
ret_val = 0;
ib = (uchar_t *)inarray;
ob = (uchar_t *)outarray;
ibtail = ib + *inlen;
obtail = ob + *outlen;
do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
while (ib < ibtail) {
if (*ib == '\0' && do_not_ignore_null)
break;
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
oldib = ib;
if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
}
isgbk4 = (ibtail - ib < 2) ? B_FALSE :
KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
if (isgbk4) {
if (ibtail - ib < 4) {
if (flag & KICONV_REPLACE_INVALID) {
ib = ibtail;
goto REPLACE_INVALID;
}
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
}
gb_val = (uint32_t)(*ib) << 24 |
(uint32_t)(*(ib + 1)) << 16 |
(uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
} else {
if (ibtail - ib < 2) {
if (flag & KICONV_REPLACE_INVALID) {
ib = ibtail;
goto REPLACE_INVALID;
}
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
}
gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
}
sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += isgbk4 ? 4 : 2;
ob += sz;
continue;
REPLACE_INVALID:
if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
ret_val++;
}
*inlen = ibtail - ib;
*outlen = obtail - ob;
return (ret_val);
}
/*
* Encoding convertor from UTF-8 to GBK.
*/
size_t
kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_gbk);
}
/*
* String based encoding convertor from UTF-8 to GBK.
*/
size_t
kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
}
/*
* Encoding convertor from GBK to UTF-8.
*/
size_t
kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
size_t ret_val;
int8_t sz;
uint32_t gb_val;
/* Check on the kiconv code conversion descriptor. */
if (kcd == NULL || kcd == (void *)-1) {
*errno = EBADF;
return ((size_t)-1);
}
/* If this is a state reset request, process and return. */
if (inbuf == NULL || *inbuf == NULL) {
return (0);
}
ret_val = 0;
ib = (uchar_t *)*inbuf;
ob = (uchar_t *)*outbuf;
ibtail = ib + *inbytesleft;
obtail = ob + *outbytesleft;
while (ib < ibtail) {
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
/*
* Issue EILSEQ error if the first byte is not a
* valid GBK leading byte.
*/
if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
/*
* Issue EINVAL error if input buffer has an incomplete
* character at the end of the buffer.
*/
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
/*
* Issue EILSEQ error if the remaining byte is not
* a valid GBK byte.
*/
if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
/* Now we have a valid GBK character. */
gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += 2;
ob += sz;
}
*inbuf = (char *)ib;
*inbytesleft = ibtail - ib;
*outbuf = (char *)ob;
*outbytesleft = obtail - ob;
return (ret_val);
}
/*
* String based encoding convertor from GBK to UTF-8.
*/
size_t
kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
uchar_t *oldib;
size_t ret_val;
int8_t sz;
uint32_t gb_val;
boolean_t do_not_ignore_null;
ret_val = 0;
ib = (uchar_t *)inarray;
ob = (uchar_t *)outarray;
ibtail = ib + *inlen;
obtail = ob + *outlen;
do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
while (ib < ibtail) {
if (*ib == '\0' && do_not_ignore_null)
break;
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
oldib = ib;
if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
}
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
}
if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
}
gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += 2;
ob += sz;
continue;
REPLACE_INVALID:
if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
ret_val++;
}
*inlen = ibtail - ib;
*outlen = obtail - ob;
return (ret_val);
}
/*
* Encoding convertor from UTF-8 to EUC-CN.
*/
size_t
kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_gb2312);
}
/*
* String based encoding convertor from UTF-8 to EUC-CN.
*/
size_t
kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
}
/*
* Encoding converto from EUC-CN to UTF-8 code.
*/
size_t
kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
size_t ret_val;
int8_t sz;
/* Check on the kiconv code conversion descriptor. */
if (kcd == NULL || kcd == (void *)-1) {
*errno = EBADF;
return ((size_t)-1);
}
/* If this is a state reset request, process and return. */
if (inbuf == NULL || *inbuf == NULL) {
return (0);
}
ret_val = 0;
ib = (uchar_t *)*inbuf;
ob = (uchar_t *)*outbuf;
ibtail = ib + *inbytesleft;
obtail = ob + *outbytesleft;
while (ib < ibtail) {
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
/*
* Issue EILSEQ error if the first byte is not a
* valid GB2312 leading byte.
*/
if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
/*
* Issue EINVAL error if input buffer has an incomplete
* character at the end of the buffer.
*/
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
/*
* Issue EILSEQ error if the remaining byte is not
* a valid GB2312 byte.
*/
if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
/* Now we have a valid GB2312 character */
sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += 2;
ob += sz;
}
*inbuf = (char *)ib;
*inbytesleft = ibtail - ib;
*outbuf = (char *)ob;
*outbytesleft = obtail - ob;
return (ret_val);
}
/*
* String based encoding convertor from EUC-CN to UTF-8.
*/
size_t
kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
uchar_t *oldib;
size_t ret_val;
int8_t sz;
boolean_t do_not_ignore_null;
ret_val = 0;
ib = (uchar_t *)inarray;
ob = (uchar_t *)outarray;
ibtail = ib + *inlen;
obtail = ob + *outlen;
do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
while (ib < ibtail) {
if (*ib == '\0' && do_not_ignore_null)
break;
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
oldib = ib;
if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
}
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
}
if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
}
sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += 2;
ob += sz;
continue;
REPLACE_INVALID:
if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
ret_val++;
}
*inlen = ibtail - ib;
*outlen = obtail - ob;
return (ret_val);
}
/*
* Convert single GB2312 character to UTF-8.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
size_t *ret_val)
{
size_t index;
int8_t sz;
uchar_t *u8;
/* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
index = b1 * 94 + b2 - 0x3BBF;
if (index >= KICONV_GB2312_UTF8_MAX)
index = KICONV_GB2312_UTF8_MAX - 1; /* Map to 0xEFBFBD */
u8 = kiconv_gb2312_utf8[index];
sz = u8_number_of_bytes[u8[0]];
if (obtail - ob < sz) {
*ret_val = (size_t)-1;
return (-1);
}
for (index = 0; index < sz; index++)
*ob++ = u8[index];
/*
* As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
* elements, so need to ckeck more.
*/
if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
(*ret_val)++;
return (sz);
}
/*
* Convert single GB18030 or GBK character to UTF-8.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
boolean_t isgbk4)
{
size_t index;
int8_t sz;
uchar_t u8array[4];
uchar_t *u8;
if (isgbk4) {
if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
uint32_t u32;
/*
* u32 = ((gbk_val >> 24) - 0x90) * 12600 +
* (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
* (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
* (gbk_val & 0xFF - 0x30)+
* KICONV_SC_PLANE1_UCS4_START;
*/
u32 = (gbk_val >> 24) * 12600 +
((gbk_val & 0xFF0000) >> 16) * 1260 +
((gbk_val & 0xFF00) >> 8) * 10 +
(gbk_val & 0xFF) - 0x1BA0FA;
u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
u8 = u8array;
index = 1;
} else {
index = kiconv_binsearch(gbk_val,
kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
u8 = kiconv_gbk4_utf8[index].u8;
}
} else {
index = kiconv_binsearch(gbk_val,
kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
u8 = kiconv_gbk_utf8[index].u8;
}
sz = u8_number_of_bytes[u8[0]];
if (obtail - ob < sz) {
*ret_val = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret_val)++; /* Non-identical conversion */
for (index = 0; index < sz; index++)
*ob++ = u8[index];
return (sz);
}
/*
* Convert single UTF-8 character to GB18030.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
/* ARGSUSED */
static int8_t
utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret)
{
size_t index;
int8_t gbklen;
uint32_t gbkcode;
if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
/* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
uint32_t u32;
u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
KICONV_SC_PLANE1_UCS4_START;
gbkcode = ((u32 / 12600 + 0x90) << 24) |
(((u32 % 12600) / 1260 + 0x30) << 16) |
(((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
gbklen = 4;
index = 1;
} else {
index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
KICONV_UTF8_GB18030_MAX);
gbkcode = kiconv_utf8_gb18030[index].value;
KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
}
if (obtail - ob < gbklen) {
*ret = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret)++; /* Non-identical conversion */
if (gbklen == 2) {
*ob++ = (uchar_t)(gbkcode >> 8);
} else if (gbklen == 4) {
*ob++ = (uchar_t)(gbkcode >> 24);
*ob++ = (uchar_t)(gbkcode >> 16);
*ob++ = (uchar_t)(gbkcode >> 8);
}
*ob = (uchar_t)(gbkcode & 0xFF);
return (gbklen);
}
/*
* Convert single UTF-8 character to GBK.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
/* ARGSUSED */
static int8_t
utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret)
{
size_t index;
int8_t gbklen;
uint32_t gbkcode;
index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
KICONV_UTF8_GB18030_MAX);
gbkcode = kiconv_utf8_gb18030[index].value;
KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
/* GBK and GB18030 share the same table, so check the length. */
if (gbklen == 4) {
index = 0;
gbkcode = kiconv_utf8_gb18030[index].value;
gbklen = 1;
}
if (obtail - ob < gbklen) {
*ret = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret)++; /* Non-identical conversion */
if (gbklen > 1)
*ob++ = (uchar_t)(gbkcode >> 8);
*ob = (uchar_t)(gbkcode & 0xFF);
return (gbklen);
}
/*
* Convert single UTF-8 character to GB2312.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
/* ARGSUSED */
static int8_t
utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
uchar_t *ob, uchar_t *obtail, size_t *ret)
{
size_t index;
int8_t gblen;
uint32_t gbcode;
index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
KICONV_UTF8_GB2312_MAX);
gbcode = kiconv_utf8_gb2312[index].value;
gblen = (gbcode <= 0xFF) ? 1 : 2;
if (obtail - ob < gblen) {
*ret = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret)++;
if (gblen > 1)
*ob++ = (uchar_t)(gbcode >> 8);
*ob = (uchar_t)(gbcode & 0xFF);
return (gblen);
}
static kiconv_ops_t kiconv_sc_ops_tbl[] = {
{
"gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
kiconv_close_to_cck, kiconvstr_to_gb18030
},
{
"utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
close_fr_sc, kiconvstr_fr_gb18030
},
{
"gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
kiconv_close_to_cck, kiconvstr_to_gbk
},
{
"utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
close_fr_sc, kiconvstr_fr_gbk
},
{
"euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
kiconv_close_to_cck, kiconvstr_to_euccn
},
{
"utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
close_fr_sc, kiconvstr_fr_euccn
},
};
static kiconv_module_info_t kiconv_sc_info = {
"kiconv_sc", /* module name */
sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
kiconv_sc_ops_tbl,
0,
NULL,
NULL,
0
};
static struct modlkiconv modlkiconv_sc = {
&mod_kiconvops,
"kiconv Simplified Chinese module 1.0",
&kiconv_sc_info
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modlkiconv_sc,
NULL
};
int
_init(void)
{
int err;
err = mod_install(&modlinkage);
if (err)
cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
return (err);
}
int
_fini(void)
{
int err;
/*
* If this module is being used, then, we cannot remove the module.
* The following checking will catch pretty much all usual cases.
*
* Any remaining will be catached by the kiconv_unregister_module()
* during mod_remove() at below.
*/
if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
return (EBUSY);
err = mod_remove(&modlinkage);
if (err)
cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
return (err);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}