/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/sunddi.h>
#include <sys/byteorder.h>
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/u8_textprep.h>
#include <sys/kiconv.h>
#include <sys/kiconv_cck_common.h>
#include <sys/kiconv_tc.h>
#include <sys/kiconv_big5_utf8.h>
#include <sys/kiconv_euctw_utf8.h>
#include <sys/kiconv_hkscs_utf8.h>
#include <sys/kiconv_cp950hkscs_utf8.h>
#include <sys/kiconv_utf8_big5.h>
#include <sys/kiconv_utf8_euctw.h>
#include <sys/kiconv_utf8_cp950hkscs.h>
#include <sys/kiconv_utf8_hkscs.h>
/* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
static uchar_t hkscs_special_sequence[][4] = {
{ 0xc3, 0x8a, 0xcc, 0x84 }, /* 0x8862 */
{ 0xc3, 0x8a, 0xcc, 0x8c }, /* 0x8864 */
{ 0xc3, 0xaa, 0xcc, 0x84 }, /* 0x88a3 */
{ 0xc3, 0xaa, 0xcc, 0x8c } /* 0x88a5 */
};
/* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
static uint32_t ucs_special_sequence[] = {
0x8866, /* U+00ca */
0x8862, /* U+00ca U+0304 */
0x8864, /* U+00ca U+030c */
0x88a7, /* U+00ea */
0x88a3, /* U+00ea U+0304 */
0x88a5 /* U+00ea U+030c */
};
typedef int8_t (*kiconv_big5toutf8_t)(uint32_t value, uchar_t *ob,
uchar_t *obtail, size_t *ret_val);
static int8_t utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static int8_t utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static int8_t utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf,
uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static int8_t utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static int8_t big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
size_t *ret_val);
static int8_t big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
uchar_t *obtail, size_t *ret_val);
static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
uchar_t *obtail, size_t *ret_val);
static int8_t euctw_to_utf8(size_t plane_no, uint32_t euctw_val,
uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static uint32_t get_unicode_from_UDA(size_t plane_no, uchar_t byte1,
uchar_t byte2);
#define KICONV_TC_BIG5 (0x01)
#define KICONV_TC_BIG5HKSCS (0x02)
#define KICONV_TC_CP950HKSCS (0x03)
#define KICONV_TC_EUCTW (0x04)
#define KICONV_TC_MAX_MAGIC_ID (0x04)
static void *
open_fr_big5()
{
return ((void *)KICONV_TC_BIG5);
}
static void *
open_fr_big5hkscs()
{
return ((void *)KICONV_TC_BIG5HKSCS);
}
static void *
open_fr_cp950hkscs()
{
return ((void *)KICONV_TC_CP950HKSCS);
}
static void *
open_fr_euctw()
{
return ((void *)KICONV_TC_EUCTW);
}
static int
close_fr_tc(void *s)
{
if ((uintptr_t)s > KICONV_TC_MAX_MAGIC_ID)
return (EBADF);
return (0);
}
/*
* Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
*/
static size_t
kiconv_fr_big5_common(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno,
kiconv_big5toutf8_t ptr_big5touf8)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
size_t ret_val;
int8_t sz;
uint32_t big5_val;
/* Check on the kiconv code conversion descriptor. */
if (kcd == NULL || kcd == (void *)-1) {
*errno = EBADF;
return ((size_t)-1);
}
/* If this is a state reset request, process and return. */
if (inbuf == NULL || *inbuf == NULL) {
return (0);
}
ret_val = 0;
ib = (uchar_t *)*inbuf;
ob = (uchar_t *)*outbuf;
ibtail = ib + *inbytesleft;
obtail = ob + *outbytesleft;
while (ib < ibtail) {
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
/*
* Issue EILSEQ error if the first byte is not a
* valid BIG5/HKSCS leading byte.
*/
if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
/*
* Issue EINVAL error if input buffer has an incomplete
* character at the end of the buffer.
*/
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
/*
* Issue EILSEQ error if the remaining bytes is not
* a valid BIG5/HKSCS byte.
*/
if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
/* Now we have a valid BIG5/HKSCS character. */
big5_val = (uint32_t)(*ib) << 8 | *(ib + 1);
sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
if (sz < 0) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ib += 2;
ob += sz;
}
*inbuf = (char *)ib;
*inbytesleft = ibtail - ib;
*outbuf = (char *)ob;
*outbytesleft = obtail - ob;
return (ret_val);
}
/*
* String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
* to UTF-8.
*/
static size_t
kiconvstr_fr_big5_common(uchar_t *ib, size_t *inlen, uchar_t *ob,
size_t *outlen, int flag, int *errno,
kiconv_big5toutf8_t ptr_big5touf8)
{
uchar_t *oldib;
uchar_t *ibtail;
uchar_t *obtail;
size_t ret_val;
int8_t sz;
uint32_t big5_val;
boolean_t do_not_ignore_null;
ret_val = 0;
ibtail = ib + *inlen;
obtail = ob + *outlen;
do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
while (ib < ibtail) {
if (*ib == '\0' && do_not_ignore_null)
break;
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
oldib = ib;
if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
}
if (ibtail - ib < 2) {
KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
}
if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
}
big5_val = *ib++;
big5_val = (big5_val << 8) | *ib++;
sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
if (sz < 0) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ob += sz;
continue;
REPLACE_INVALID:
if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
ret_val++;
}
*inlen = ibtail - ib;
*outlen = obtail - ob;
return (ret_val);
}
/*
* Encoding convertor from BIG5 to UTF-8.
*/
static size_t
kiconv_fr_big5(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
size_t *outbytesleft, int *errno)
{
return (kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, big5_to_utf8));
}
/*
* String based encoding convertor from BIG5 to UTF-8.
*/
static size_t
kiconvstr_fr_big5(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return (kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno,
big5_to_utf8));
}
/*
* Encoding convertor from BIG5-HKSCS to UTF-8.
*/
static size_t
kiconv_fr_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, big5hkscs_to_utf8);
}
/*
* String based encoding convertor from BIG5-HKSCS to UTF-8.
*/
static size_t
kiconvstr_fr_big5hkscs(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, big5hkscs_to_utf8);
}
/*
* Encoding convertor from CP950-HKSCS to UTF-8.
*/
static size_t
kiconv_fr_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, cp950hkscs_to_utf8);
}
/*
* String based encoding convertor from CP950-HKSCS to UTF-8.
*/
static size_t
kiconvstr_fr_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, cp950hkscs_to_utf8);
}
/*
* Encoding convertor from EUC-TW to UTF-8.
*/
static size_t
kiconv_fr_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
uchar_t *oldib;
size_t ret_val;
size_t plane_no;
int8_t sz;
uint32_t euctw_val;
boolean_t isplane1;
/* Check on the kiconv code conversion descriptor. */
if (kcd == NULL || kcd == (void *)-1) {
*errno = EBADF;
return ((size_t)-1);
}
/* If this is a state reset request, process and return. */
if (inbuf == NULL || *inbuf == NULL) {
return (0);
}
ret_val = 0;
ib = (uchar_t *)*inbuf;
ob = (uchar_t *)*outbuf;
ibtail = ib + *inbytesleft;
obtail = ob + *outbytesleft;
while (ib < ibtail) {
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
/*
* Issue EILSEQ error if the first byte is not a
* valid EUC-TW leading byte.
*/
if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
B_FALSE : B_TRUE;
/*
* Issue EINVAL error if input buffer has an incomplete
* character at the end of the buffer.
*/
if (ibtail - ib < (isplane1 ? 2 : 4)) {
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
oldib = ib;
plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
/*
* Issue EILSEQ error if the remaining bytes are not
* valid EUC-TW bytes.
*/
if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
}
if (! isplane1)
ib += 2;
/* Now we have a valid EUC-TW character. */
euctw_val = *ib++;
euctw_val = (euctw_val << 8) | *ib++;
sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
if (sz < 0) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ob += sz;
}
*inbuf = (char *)ib;
*inbytesleft = ibtail - ib;
*outbuf = (char *)ob;
*outbytesleft = obtail - ob;
return (ret_val);
}
/*
* String based encoding convertor from EUC-TW to UTF-8.
*/
static size_t
kiconvstr_fr_euctw(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
uchar_t *ib;
uchar_t *ob;
uchar_t *ibtail;
uchar_t *obtail;
uchar_t *oldib;
size_t ret_val;
size_t plane_no;
int8_t sz;
uint32_t euctw_val;
boolean_t isplane1;
boolean_t do_not_ignore_null;
ret_val = 0;
ib = (uchar_t *)inarray;
ob = (uchar_t *)outarray;
ibtail = ib + *inlen;
obtail = ob + *outlen;
do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
while (ib < ibtail) {
if (*ib == '\0' && do_not_ignore_null)
break;
if (KICONV_IS_ASCII(*ib)) {
if (ob >= obtail) {
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = *ib++;
continue;
}
oldib = ib;
if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
}
isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
B_FALSE : B_TRUE;
if (ibtail - ib < (isplane1 ? 2 : 4)) {
if (flag & KICONV_REPLACE_INVALID) {
ib = ibtail;
goto REPLACE_INVALID;
}
KICONV_SET_ERRNO_AND_BREAK(EINVAL);
}
plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
KICONV_SET_ERRNO_WITH_FLAG(isplane1 ? 2 : 4, EILSEQ);
}
if (! isplane1)
ib += 2;
euctw_val = *ib++;
euctw_val = (euctw_val << 8) | *ib++;
sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
if (sz < 0) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
ob += sz;
continue;
REPLACE_INVALID:
if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
ib = oldib;
KICONV_SET_ERRNO_AND_BREAK(E2BIG);
}
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
ret_val++;
}
*inlen = ibtail - ib;
*outlen = obtail - ob;
return (ret_val);
}
/*
* Encoding convertor from UTF-8 to BIG5.
*/
static size_t
kiconv_to_big5(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_big5);
}
/*
* String based encoding convertor from UTF-8 to BIG5.
*/
static size_t
kiconvstr_to_big5(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_big5);
}
/*
* Encoding convertor from UTF-8 to EUC-TW.
*/
static size_t
kiconv_to_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_euctw);
}
/*
* String based encoding convertor from UTF-8 to EUC-TW.
*/
static size_t
kiconvstr_to_euctw(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_euctw);
}
/*
* Encoding convertor from UTF-8 to CP950HKSCS.
*/
static size_t
kiconv_to_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_cp950hkscs);
}
/*
* String based encoding convertor from UTF-8 to CP950HKSCS.
*/
static size_t
kiconvstr_to_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_cp950hkscs);
}
/*
* Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
*/
static size_t
kiconv_to_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft, int *errno)
{
return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
outbytesleft, errno, utf8_to_big5hkscs);
}
/*
* String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
*/
static size_t
kiconvstr_to_big5hkscs(char *inarray, size_t *inlen, char *outarray,
size_t *outlen, int flag, int *errno)
{
return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
(uchar_t *)outarray, outlen, flag, errno, utf8_to_big5hkscs);
}
/*
* Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
big5_to_utf8_common(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
size_t *ret_val, kiconv_table_array_t *table, size_t nitems)
{
size_t index;
int8_t sz;
uchar_t *u8;
index = kiconv_binsearch(big5_val, table, nitems);
u8 = table[index].u8;
sz = u8_number_of_bytes[u8[0]];
if (obtail - ob < sz) {
*ret_val = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret_val)++; /* Non-identical conversion */
for (index = 0; index < sz; index++)
*ob++ = u8[index];
return (sz);
}
/*
* Convert single BIG5 character to UTF-8.
*/
static int8_t
big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
return (big5_to_utf8_common(big5_val, ob, obtail, ret_val,
kiconv_big5_utf8, KICONV_BIG5_UTF8_MAX));
}
/*
* Convert single CP950-HKSCS character to UTF-8.
*/
static int8_t
cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
size_t *ret_val)
{
return (big5_to_utf8_common(hkscs_val, ob, obtail, ret_val,
kiconv_cp950hkscs_utf8, KICONV_CP950HKSCS_UTF8_MAX));
}
/*
* Calculate unicode value for some CNS planes which fall in Unicode
* UDA range.
*/
static uint32_t
get_unicode_from_UDA(size_t plane_no, uchar_t b1, uchar_t b2)
{
/*
* CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
* to compute the Unicode value.
*/
if (plane_no == 16)
--plane_no;
/* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
return (8836 * plane_no + 94 * b1 + b2 + 0xD2611);
}
/*
* Convert single EUC-TW character to UTF-8.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
euctw_to_utf8(size_t plane_no, uint32_t euctw_val, uchar_t *ob,
uchar_t *obtail, size_t *ret_val)
{
uint32_t u32;
size_t index;
int8_t sz;
uchar_t udc[4];
uchar_t *u8;
switch (plane_no) {
case 1:
index = kiconv_binsearch(euctw_val, kiconv_cns1_utf8,
KICONV_CNS1_UTF8_MAX);
u8 = kiconv_cns1_utf8[index].u8;
break;
case 2:
index = kiconv_binsearch(euctw_val, kiconv_cns2_utf8,
KICONV_CNS2_UTF8_MAX);
u8 = kiconv_cns2_utf8[index].u8;
break;
case 3:
index = kiconv_binsearch(euctw_val, kiconv_cns3_utf8,
KICONV_CNS3_UTF8_MAX);
u8 = kiconv_cns3_utf8[index].u8;
break;
case 4:
index = kiconv_binsearch(euctw_val, kiconv_cns4_utf8,
KICONV_CNS4_UTF8_MAX);
u8 = kiconv_cns4_utf8[index].u8;
break;
case 5:
index = kiconv_binsearch(euctw_val, kiconv_cns5_utf8,
KICONV_CNS5_UTF8_MAX);
u8 = kiconv_cns5_utf8[index].u8;
break;
case 6:
index = kiconv_binsearch(euctw_val, kiconv_cns6_utf8,
KICONV_CNS6_UTF8_MAX);
u8 = kiconv_cns6_utf8[index].u8;
break;
case 7:
index = kiconv_binsearch(euctw_val, kiconv_cns7_utf8,
KICONV_CNS7_UTF8_MAX);
u8 = kiconv_cns7_utf8[index].u8;
break;
case 12:
case 13:
case 14:
case 16:
u32 = get_unicode_from_UDA(plane_no,
(euctw_val & 0xFF00) >> 8, euctw_val & 0xFF);
/*
* As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
* will occupy 4 bytes.
*/
udc[0] = 0xF3;
udc[1] = (uchar_t)(0x80 | (u32 & 0x03F000) >> 12);
udc[2] = (uchar_t)(0x80 | (u32 & 0x000FC0) >> 6);
udc[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
u8 = udc;
index = 1;
break;
case 15:
index = kiconv_binsearch(euctw_val, kiconv_cns15_utf8,
KICONV_CNS15_UTF8_MAX);
u8 = kiconv_cns15_utf8[index].u8;
break;
default:
index = 0;
u8 = kiconv_cns1_utf8[index].u8;
}
sz = u8_number_of_bytes[u8[0]];
if (obtail - ob < sz) {
*ret_val = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret_val)++;
for (index = 0; index < sz; index++)
*ob++ = u8[index];
return (sz);
}
/*
* Convert single HKSCS character to UTF-8.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
size_t *ret_val)
{
size_t index;
int8_t sz;
uchar_t *u8;
index = kiconv_binsearch(hkscs_val, kiconv_hkscs_utf8,
KICONV_HKSCS_UTF8_MAX);
u8 = kiconv_hkscs_utf8[index].u8;
/*
* Single HKSCS-2004 character may map to 2 Unicode
* code points.
*/
if (u8[0] == 0xFF) {
u8 = hkscs_special_sequence[u8[1]];
sz = 4;
} else {
sz = u8_number_of_bytes[u8[0]];
}
if (obtail - ob < sz) {
*ret_val = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret_val)++; /* Non-identical conversion. */
for (index = 0; index < sz; index++)
*ob++ = u8[index];
return (sz);
}
/*
* Convert single UTF-8 character to EUC-TW.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
/* ARGSUSED */
static int8_t
utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
size_t index;
size_t plane_no;
uchar_t byte1;
uchar_t byte2;
if (utf8 >= KICONV_TC_UDA_UTF8_START &&
utf8 <= KICONV_TC_UDA_UTF8_END) {
/*
* Calculate EUC-TW code if utf8 is in Unicode
* Private Plane 15.
*/
index = (((utf8 & 0x7000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
KICONV_TC_UDA_UCS4_START;
plane_no = 12 + index / 8836;
byte1 = 0xA1 + (index % 8836) / 94;
byte2 = 0xA1 + index % 94;
/* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
if (plane_no == 15)
plane_no = 16;
} else {
uint32_t euctw_val;
index = kiconv_binsearch(utf8, kiconv_utf8_euctw,
KICONV_UTF8_EUCTW_MAX);
if (index == 0) {
if (ob >= obtail) {
*ret_val = (size_t)-1;
return (-1);
}
*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
(*ret_val)++;
return (1);
}
euctw_val = kiconv_utf8_euctw[index].value;
byte1 = (euctw_val & 0xFF00) >> 8;
byte2 = euctw_val & 0xFF;
plane_no = euctw_val >> 16;
}
if (obtail - ob < (plane_no == 1 ? 2 : 4)) {
*ret_val = (size_t)-1;
return (-1);
}
if (plane_no != 1) {
*ob++ = KICONV_TC_EUCTW_MBYTE;
*ob++ = KICONV_TC_EUCTW_PMASK + plane_no;
}
*ob++ = byte1;
*ob = byte2;
return (plane_no == 1 ? 2 : 4);
}
/*
* Convert single UTF-8 character to BIG5-HKSCS
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
size_t index;
int8_t hkscslen;
uint32_t hkscscode;
boolean_t special_sequence = B_FALSE;
index = kiconv_binsearch(utf8, kiconv_utf8_hkscs,
KICONV_UTF8_HKSCS_MAX);
hkscscode = kiconv_utf8_hkscs[index].value;
/*
* There are 4 special code points in HKSCS-2004 which mapped
* to 2 UNICODE code points.
*/
if ((int32_t)hkscscode < 0) {
size_t special_index = (-(int32_t)hkscscode - 1) * 3;
/* Check the following 2 bytes. */
if (ibtail - *inbuf >= 2 && **inbuf == 0xcc &&
(*(*inbuf + 1) == 0x84 || *(*inbuf + 1) == 0x8c)) {
special_index += (*(*inbuf + 1) == 0x84 ? 1 : 2);
special_sequence = B_TRUE;
}
hkscscode = ucs_special_sequence[special_index];
}
hkscslen = (hkscscode <= 0xFF) ? 1 : 2;
if (obtail - ob < hkscslen) {
*ret_val = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret_val)++;
if (hkscslen > 1)
*ob++ = (uchar_t)(hkscscode >> 8);
*ob = (uchar_t)(hkscscode & 0xFF);
if (special_sequence) { /* Advance for special sequence */
(*inbuf) += 2;
}
return (hkscslen);
}
/*
* Common convertor for UTF-8 to BIG5/CP950-HKSCS.
* Return: > 0 - Converted successfully
* = -1 - E2BIG
*/
static int8_t
utf8_to_big5_common(uint32_t utf8, uchar_t *ob, uchar_t *obtail,
size_t *ret_val, kiconv_table_t *table, size_t nitems)
{
size_t index;
int8_t big5len;
uint32_t big5code;
index = kiconv_binsearch(utf8, table, nitems);
big5code = table[index].value;
big5len = (big5code <= 0xFF) ? 1 : 2;
if (obtail - ob < big5len) {
*ret_val = (size_t)-1;
return (-1);
}
if (index == 0)
(*ret_val)++;
if (big5len > 1)
*ob++ = (uchar_t)(big5code >> 8);
*ob = (uchar_t)(big5code & 0xFF);
return (big5len);
}
/*
* Convert single UTF-8 character to BIG5.
*/
/* ARGSUSED */
static int8_t
utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
kiconv_utf8_big5, KICONV_UTF8_BIG5_MAX));
}
/*
* Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
*/
/* ARGSUSED */
static int8_t
utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
kiconv_utf8_cp950hkscs, KICONV_UTF8_CP950HKSCS));
}
static kiconv_ops_t kiconv_tc_ops_tbl[] = {
{
"big5", "utf-8", kiconv_open_to_cck, kiconv_to_big5,
kiconv_close_to_cck, kiconvstr_to_big5
},
{
"utf-8", "big5", open_fr_big5, kiconv_fr_big5,
close_fr_tc, kiconvstr_fr_big5
},
{
"big5-hkscs", "utf-8", kiconv_open_to_cck, kiconv_to_big5hkscs,
kiconv_close_to_cck, kiconvstr_to_big5hkscs
},
{
"utf-8", "big5-hkscs", open_fr_big5hkscs, kiconv_fr_big5hkscs,
close_fr_tc, kiconvstr_fr_big5hkscs
},
{
"euc-tw", "utf-8", kiconv_open_to_cck, kiconv_to_euctw,
kiconv_close_to_cck, kiconvstr_to_euctw
},
{
"utf-8", "euc-tw", open_fr_euctw, kiconv_fr_euctw,
close_fr_tc, kiconvstr_fr_euctw
},
{
"cp950-hkscs", "utf-8", kiconv_open_to_cck,
kiconv_to_cp950hkscs, kiconv_close_to_cck,
kiconvstr_to_cp950hkscs
},
{
"utf-8", "cp950-hkscs", open_fr_cp950hkscs,
kiconv_fr_cp950hkscs, close_fr_tc, kiconvstr_fr_cp950hkscs
},
};
static kiconv_module_info_t kiconv_tc_info = {
"kiconv_tc", /* module name */
sizeof (kiconv_tc_ops_tbl) / sizeof (kiconv_tc_ops_tbl[0]),
kiconv_tc_ops_tbl,
0,
NULL,
NULL,
0
};
static struct modlkiconv modlkiconv_tc = {
&mod_kiconvops,
"kiconv Traditional Chinese module 1.0",
&kiconv_tc_info
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modlkiconv_tc,
NULL
};
int
_init(void)
{
int err;
err = mod_install(&modlinkage);
if (err)
cmn_err(CE_WARN, "kiconv_tc: failed to load kernel module");
return (err);
}
int
_fini(void)
{
int err;
/*
* If this module is being used, then, we cannot remove the module.
* The following checking will catch pretty much all usual cases.
*
* Any remaining will be catached by the kiconv_unregister_module()
* during mod_remove() at below.
*/
if (kiconv_module_ref_count(KICONV_MODULE_ID_TC))
return (EBUSY);
err = mod_remove(&modlinkage);
if (err)
cmn_err(CE_WARN, "kiconv_tc: failed to remove kernel module");
return (err);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}