/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/byteorder.h>
#include <sys/kiconv_ja.h>
#include <sys/kiconv_ja_jis_to_unicode.h>
#include <sys/kiconv_ja_unicode_to_jis.h>
/*
* The following vector shows remaining bytes in a UTF-8 character.
* Index will be the first byte of the character. This is defined in
*/
extern const int8_t u8_number_of_bytes[];
/*
* The following is a vector of bit-masks to get used bits in
* the first byte of a UTF-8 character. Index is remaining bytes at above of
* the character. This is defined in uconv.c.
*/
extern const uchar_t u8_masks_tbl[];
/*
* The following two vectors are to provide valid minimum and
* maximum values for the 2'nd byte of a multibyte UTF-8 character for
* better illegal sequence checking. The index value must be the value of
* the first byte of the UTF-8 character. These are defined in u8_textprep.c.
*/
extern const uint8_t u8_valid_min_2nd_byte[];
extern const uint8_t u8_valid_max_2nd_byte[];
static kiconv_ja_euc16_t
{
const kiconv_ja_euc16_t *p;
return (p[ucs2 & 0xff]);
return (KICONV_JA_NODEST);
}
static size_t
{
uint_t l; /* to be copied to *p on successful return */
int remaining_bytes;
int u8_size;
if (ic1 < 0x80) {
/* successfully converted */
goto ret;
}
if (u8_size == U8_ILLEGAL_CHAR) {
} else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
}
if (remaining_bytes != 0) {
for (; remaining_bytes > 0; remaining_bytes--) {
if (ic1 != 0U) {
}
ic1 = 0U; /* 2nd byte check done */
} else {
}
}
}
/* successfully converted */
*p = l;
} else {
}
ret:
if (rv == 0) {
/*
* Update rv, *pip, and *pileft on successfule return.
*/
}
return (rv);
}
static size_t
{
uint_t l; /* to be copied to *p on successful return */
int remaining_bytes;
int u8_size;
if (ic1 < 0x80) {
/* successfully converted */
goto ret;
}
l = KICONV_JA_DEF_SINGLE;
(*repnum)++;
goto ret;
}
if (remaining_bytes != 0) {
for (; remaining_bytes > 0; remaining_bytes--) {
if (ic1 != 0U) {
l = KICONV_JA_DEF_SINGLE;
(*repnum)++;
break;
}
ic1 = 0U; /* 2nd byte check done */
} else {
l = KICONV_JA_DEF_SINGLE;
(*repnum)++;
break;
}
}
}
} else {
l = KICONV_JA_DEF_SINGLE;
(*repnum)++;
}
ret:
/* successfully converted */
*p = l;
return (rv);
}
static size_t /* return #bytes read, or -1 */
uint_t *p, /* point variable to store UTF-32 */
int *errno, /* point variable to errno */
int flag, /* kiconvstr flag */
{
if (flag & KICONV_REPLACE_INVALID)
else
}
static size_t
char **pop, /* point pointer to output buf */
int *errno) /* point variable to errno */
{
if (u32 <= 0x7f) {
rv = 1;
} else if (u32 <= 0x7ff) {
rv = 2;
} else if (u32 <= 0xffff) {
rv = 3;
} else if (u32 <= 0x10ffff) {
rv = 4;
} else {
}
ret:
/* update *pop and *poleft only on successful return */
}
return (rv);
}
static void *
{
KM_SLEEP);
kcd->bom_processed = 0;
return ((void *)kcd);
}
static void *
open_eucjp(void)
{
return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
}
static void *
open_eucjpms(void)
{
return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
}
static void *
open_sjis(void)
{
return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
}
static void *
open_cp932(void)
{
return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
}
int
{
return (EBADF);
return (0);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
ic1 &= KICONV_JA_CMASK;
ic2 &= KICONV_JA_CMASK;
if (u32 == KICONV_JA_NODEST) {
}
if (u32 == KICONV_JA_REPLACE)
rv++;
} else { /* 2nd byte check failed */
}
} else { /* 2nd byte check failed */
}
if (KICONV_JA_ISCS3(ic3)) {
/* 3rd byte check passed */
ic2 &= KICONV_JA_CMASK;
ic3 &= KICONV_JA_CMASK;
if (u32 == KICONV_JA_NODEST) {
(ic3 - 0x21));
[index];
}
if (u32 == KICONV_JA_REPLACE)
rv++;
} else { /* 3rd byte check failed */
}
} else { /* 2nd byte check failed */
}
} else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
/* C1 control; 1 byte */
} else { /* 1st byte check failed */
}
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
KICONV_JA_GETU(&ucs4, 0);
if (ucs4 > 0xffff) {
/* non-BMP */
rv++;
goto next;
}
if (euc16 == KICONV_JA_NODEST) {
}
if (euc16 == KICONV_JA_NODEST) {
rv++;
goto next;
}
switch (euc16 & 0x8080) {
case 0x0000: /* CS0 */
break;
case 0x8080: /* CS1 */
break;
case 0x0080: /* CS2 */
break;
case 0x8000: /* CS3 */
break;
}
next:
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
return (0);
}
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
ic1 &= KICONV_JA_CMASK;
ic2 &= KICONV_JA_CMASK;
if (u32 == KICONV_JA_NODEST) {
}
if (u32 == KICONV_JA_REPLACE)
rv++;
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
if (KICONV_JA_ISCS3(ic3)) {
/* 3rd byte check passed */
ic2 &= KICONV_JA_CMASK;
ic3 &= KICONV_JA_CMASK;
if (u32 == KICONV_JA_NODEST) {
(ic3 - 0x21));
[index];
}
if (u32 == KICONV_JA_REPLACE)
rv++;
} else { /* 3rd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
} else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
/* C1 control; 1 byte */
} else { /* 1st byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
next:
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
return (0);
}
if (ucs4 > 0xffff) {
/* non-BMP */
rv++;
goto next;
}
if (euc16 == KICONV_JA_NODEST) {
}
if (euc16 == KICONV_JA_NODEST) {
rv++;
goto next;
}
switch (euc16 & 0x8080) {
case 0x0000: /* CS0 */
break;
case 0x8080: /* CS1 */
break;
case 0x0080: /* CS2 */
break;
case 0x8000: /* CS3 */
break;
}
next:
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
return ((size_t)-1);
}
}
static size_t
{
return ((size_t)-1);
}
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
if (KICONV_JA_ISSJKANJI2(ic2)) {
if (ic2 >= 0x9f) {
ic1++;
}
if (uni == KICONV_JA_NODEST) {
+ (ic2 - 0x21);
}
if (uni == KICONV_JA_REPLACE)
rv++;
} else { /* 2nd byte check failed */
/* NOTREACHED */
}
if (KICONV_JA_ISSJKANJI2(ic2)) {
if (ic2 >= 0x9f) {
ic1++;
}
if (uni == KICONV_JA_REPLACE)
rv++;
} else { /* 2nd byte check failed */
}
/*
* We need a special treatment for each codes.
* By adding some offset number for them, we
* can process them as the same way of that of
* extended IBM chars.
*/
if (KICONV_JA_ISSJKANJI2(ic2)) {
if (dest == 0xffff) {
}
}
/*
* XXX: 0xfa54 and 0xfa5b must be mapped
* to JIS0208 area. Therefore we
* have to do special treatment.
*/
if (dest == 0xfa54) {
upper = 0x22;
lower = 0x4c;
} else {
upper = 0x22;
lower = 0x68;
}
if (uni == KICONV_JA_NODEST) {
[index];
}
if (uni == KICONV_JA_REPLACE)
rv++;
} else {
if (dest == 0xffff) {
}
if (uni == KICONV_JA_NODEST) {
[index];
}
if (uni == KICONV_JA_REPLACE)
rv++;
}
} else { /* 2nd byte check failed */
}
/*
* Based on the draft convention of OSF-JVC CDEWG,
* characters in this area will be mapped to
* "CHIKAN-MOJI." (convertible character)
* We use U+FFFD in this case.
*/
if (KICONV_JA_ISSJKANJI2(ic2)) {
uni = 0xfffd;
} else { /* 2nd byte check failed */
}
} else { /* 1st byte check failed */
}
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
/*
* _kiconv_ja_lookuptbl()
* Return the index number if its index-ed number
* is the same as dest value.
*/
static ushort_t
{
int i;
sizeof (kiconv_ja_sjtoibmext[0]));
for (i = 0; i < sz; i++) {
return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
}
return (0x3f);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
KICONV_JA_GETU(&ucs4, 0);
if (ucs4 > 0xffff) {
/* non-BMP */
rv++;
goto next;
}
if (euc16 == KICONV_JA_NODEST) {
}
if (euc16 == KICONV_JA_NODEST) {
rv++;
goto next;
}
switch (euc16 & 0x8080) {
case 0x0000: /* CS0 */
rv++;
} else {
}
break;
case 0x8080: /* CS1 */
/*
* for even number row (Ku), add 0x80 to
* look latter half of kiconv_ja_jistosj2[] array
*/
break;
case 0x0080: /* CS2 */
break;
case 0x8000: /* CS3 */
if (euc16 == 0xa271) {
/* NUMERO SIGN */
KICONV_JA_NPUT(0x87);
KICONV_JA_NPUT(0x82);
if (dest == 0xffff) {
} else {
/* avoid putting NUL ('\0') */
if (dest > 0xff) {
} else {
}
}
} else {
/*
* for even number row (Ku), add 0x80 to
* look latter half of kiconv_ja_jistosj2[]
*/
}
break;
}
next:
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
return (0);
}
} else if (KICONV_JA_ISSJKANA(ic1)) {
/* JIS X 0201 Kana; 1 byte */
} else if (KICONV_JA_ISSJKANJI1(ic1)) {
/* JIS X 0208 or UDC; 2 bytes */
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
if (KICONV_JA_ISSJKANJI2(ic2)) {
if (ic2 >= 0x9f) {
ic1++;
}
if (uni == KICONV_JA_NODEST) {
+ (ic2 - 0x21);
}
if (uni == KICONV_JA_REPLACE)
rv++;
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
/* NOTREACHED */
}
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
if (KICONV_JA_ISSJKANJI2(ic2)) {
if (ic2 >= 0x9f) {
ic1++;
}
if (uni == KICONV_JA_REPLACE)
rv++;
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
/*
* We need a special treatment for each codes.
* By adding some offset number for them, we
* can process them as the same way of that of
* extended IBM chars.
*/
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
if (KICONV_JA_ISSJKANJI2(ic2)) {
if (dest == 0xffff) {
if (flag &
rv++;
} else {
}
}
}
/*
* XXX: 0xfa54 and 0xfa5b must be mapped
* to JIS0208 area. Therefore we
* have to do special treatment.
*/
if (dest == 0xfa54) {
upper = 0x22;
lower = 0x4c;
} else {
upper = 0x22;
lower = 0x68;
}
if (uni == KICONV_JA_NODEST) {
[index];
}
if (uni == KICONV_JA_REPLACE)
rv++;
} else {
if (dest == 0xffff) {
if (flag &
rv++;
} else {
}
}
if (uni == KICONV_JA_NODEST) {
[index];
}
if (uni == KICONV_JA_REPLACE)
rv++;
}
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
/*
* Based on the draft convention of OSF-JVC CDEWG,
* characters in this area will be mapped to
* "CHIKAN-MOJI." (convertible character)
* We use U+FFFD in this case.
*/
if (flag & KICONV_REPLACE_INVALID) {
} else {
}
if (KICONV_JA_ISSJKANJI2(ic2)) {
uni = 0xfffd;
} else { /* 2nd byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
} else { /* 1st byte check failed */
if (flag & KICONV_REPLACE_INVALID) {
rv++;
} else {
}
}
next:
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
char *op;
return (0);
}
ileft = *inbytesleft;
oleft = *outbytesleft;
while (ileft != 0) {
return (0);
}
if (ucs4 > 0xffff) {
/* non-BMP */
rv++;
goto next;
}
if (euc16 == KICONV_JA_NODEST) {
}
if (euc16 == KICONV_JA_NODEST) {
rv++;
goto next;
}
switch (euc16 & 0x8080) {
case 0x0000: /* CS0 */
rv++;
} else {
}
break;
case 0x8080: /* CS1 */
/*
* for even number row (Ku), add 0x80 to
* look latter half of kiconv_ja_jistosj2[] array
*/
break;
case 0x0080: /* CS2 */
break;
case 0x8000: /* CS3 */
if (euc16 == 0xa271) {
/* NUMERO SIGN */
KICONV_JA_NPUT(0x87);
KICONV_JA_NPUT(0x82);
if (dest == 0xffff) {
} else {
/* avoid putting NUL ('\0') */
if (dest > 0xff) {
} else {
}
}
} else {
/*
* for even number row (Ku), add 0x80 to
* look latter half of kiconv_ja_jistosj2[]
*/
}
break;
}
next:
/*
* One character successfully converted so update
* values outside of this function's stack.
*/
*inbytesleft = ileft;
*outbytesleft = oleft;
}
ret:
return (rv);
}
static size_t
{
return ((size_t)-1);
}
}
static size_t
{
return ((size_t)-1);
}
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
{
},
{
},
{
},
{
},
{
},
{
},
{
},
{
}
};
#define KICONV_JA_MAX_JA_OPS \
(sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
#define KICONV_JA_MAX_JA_ALIAS \
(sizeof (kiconv_ja_aliases) / sizeof (char *))
"kiconv_ja", /* module name */
KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */
kiconv_ja_ops_tbl, /* kiconv_ja ops table */
KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
kiconv_ja_aliases, /* kiconv_ja aliases */
kiconv_ja_canonicals, /* kiconv_ja canonicals */
0
};
"kiconv module for Japanese",
};
(void *)&modlkiconv_ja,
};
int
_init(void)
{
int err;
if (err)
return (err);
}
int
{
}
int
_fini(void)
{
int err;
/*
* If this module is being used, then, we cannot remove the module.
* The following checking will catch pretty much all usual cases.
*
* Any remaining will be catached by the kiconv_unregister_module()
* during mod_remove() at below.
*/
return (EBUSY);
if (err)
return (err);
}