/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Kernel iconv code conversion module (kiconv_emea) for Europe, Middle East,
* and South East Asia (PSARC/2007/173).
*/
#include <sys/sysmacros.h>
#include <sys/byteorder.h>
#include <sys/kiconv_emea1.h>
#include <sys/kiconv_emea2.h>
/*
* The following macros indicate ids to the correct code conversion mapping
* data tables to use. The actual tables are coming from <sys/kiconv_emea1.h>
* and <sys/kiconv_emea2.h>. If you update the header files, then, you might
* also need to update the table ids at below.
*
* The table for KICONV_TBLID_720 is a special case and should come from
* a separate header file than others at <sys/kiconv_emea1.h> hence it has
* an id that is rather unusual distinguishing itself from others. (And,
* the ids much be of uint8_t.)
*/
#define KICONV_TBLID_737 (0)
/*
* The following tables are coming from u8_textprep.c. We use them to
* check on validity of UTF-8 characters and their bytes.
*/
extern const int8_t u8_number_of_bytes[];
extern const uint8_t u8_valid_min_2nd_byte[];
extern const uint8_t u8_valid_max_2nd_byte[];
/*
* The following 25 open_to_xxxx() functions are kiconv_open functions for
* the conversions from UTF-8 to xxxx single byte codesets.
*/
static void *
{
s->id = KICONV_TBLID_720;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_737;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_852;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_857;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_862;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_866;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1250;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1251;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1253;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1254;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1255;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1256;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_1257;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_2;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_3;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_4;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_5;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_6;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_7;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_8;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_9;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_10;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_11;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_8859_13;
s->bom_processed = 0;
return ((void *)s);
}
static void *
{
s->id = KICONV_TBLID_KOI8_R;
s->bom_processed = 0;
return ((void *)s);
}
/*
* The following 25 open_fr_xxxx() functions are kiconv_open functions for
* the conversions from xxxx single byte codeset to UTF-8.
*/
static void *
{
return ((void *)KICONV_TBLID_720);
}
static void *
{
return ((void *)KICONV_TBLID_737);
}
static void *
{
return ((void *)KICONV_TBLID_852);
}
static void *
{
return ((void *)KICONV_TBLID_857);
}
static void *
{
return ((void *)KICONV_TBLID_862);
}
static void *
{
return ((void *)KICONV_TBLID_866);
}
static void *
{
return ((void *)KICONV_TBLID_1250);
}
static void *
{
return ((void *)KICONV_TBLID_1251);
}
static void *
{
return ((void *)KICONV_TBLID_1253);
}
static void *
{
return ((void *)KICONV_TBLID_1254);
}
static void *
{
return ((void *)KICONV_TBLID_1255);
}
static void *
{
return ((void *)KICONV_TBLID_1256);
}
static void *
{
return ((void *)KICONV_TBLID_1257);
}
static void *
{
return ((void *)KICONV_TBLID_8859_2);
}
static void *
{
return ((void *)KICONV_TBLID_8859_3);
}
static void *
{
return ((void *)KICONV_TBLID_8859_4);
}
static void *
{
return ((void *)KICONV_TBLID_8859_5);
}
static void *
{
return ((void *)KICONV_TBLID_8859_6);
}
static void *
{
return ((void *)KICONV_TBLID_8859_7);
}
static void *
{
return ((void *)KICONV_TBLID_8859_8);
}
static void *
{
return ((void *)KICONV_TBLID_8859_9);
}
static void *
{
return ((void *)KICONV_TBLID_8859_10);
}
static void *
{
return ((void *)KICONV_TBLID_8859_11);
}
static void *
{
return ((void *)KICONV_TBLID_8859_13);
}
static void *
{
return ((void *)KICONV_TBLID_KOI8_R);
}
/*
* The following is the common kiconv_close function for the conversions from
* UTF-8 to single byte codesets.
*/
static int
close_to_sb(void *s)
{
if (! s || s == (void *)-1)
return (EBADF);
kmem_free(s, sizeof (kiconv_state_data_t));
return (0);
}
/*
* The following is the common kiconv_close function for the conversions from
* single byte codesets to UTF-8.
*/
static int
close_fr_sb(void *s)
{
if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID &&
((ulong_t)s < KICONV_TBLID_RANGE1_START ||
(ulong_t)s > KICONV_TBLID_RANGE1_END))
return (EBADF);
return (0);
}
/*
* The following is the common kiconv function for the conversions from
* UTF-8 to single byte codesets. (This may look a lot similar to
* kiconvstr_to_sb() but they do have different features to cover and
* it's not really worth to try to merge them into a single function since
* you'll have to add performance penalty for both per each character
* conversion as you will have to figure out if this is kiconv_to_sb() or
* kiconvstr_to_sb().)
*/
static size_t
{
size_t i;
size_t l;
size_t h;
/* Check on the kiconv code conversion descriptor. */
return ((size_t)-1);
}
/* Get the table id and check on it. */
if (id > KICONV_MAX_MAPPING_TBLID &&
return ((size_t)-1);
}
/* If this is a state reset request, process and return. */
return ((size_t)0);
}
ret_val = 0;
/*
* Get the table we want to use and also calculate the "init_h"
* which is the initial high index for the binary search that we will
* use. While the table sizes are all the same at the moment, to be
* ready for future cases where tables could be in different sizes,
* we separately calculate the init_h at here.
*/
if (id == KICONV_TBLID_720) {
init_h = sizeof (u8_to_cp720_tbl);
} else {
}
/*
* If we haven't checked on the UTF-8 signature BOM character in
* the beginning of the conversion data stream, we check it and if
* find one, we skip it since we have no use for it.
*/
ib += 3;
if (sz <= 0) {
break;
}
/*
* If there is no room to write at the output buffer,
* we issue E2BIG and let the caller knows about it.
*/
break;
}
/*
* If it is a 7-bit ASCII character, we don't need to
* process further and we just copy the character over.
*
* If not, we collect the character bytes up to four bytes,
* validate the bytes, and binary search for the corresponding
* single byte codeset character byte. If we find it from
* the mapping table, we put that into the output buffer;
* otherwise, we put a replacement character instead as
* a non-identical conversion.
*/
if (sz == 1) {
continue;
}
/*
* Issue EINVAL if the last character at the input buffer
* is an incomplete character missing a byte or more.
*/
break;
}
/*
* We collect UTF-8 character bytes and at the same time,
* check on if the bytes are valid bytes or not. This follows
* the latest UTF-8 byte representation.
*/
for (i = 1; i < sz; i++) {
if (second) {
goto TO_SB_ILLEGAL_CHAR_ERR;
}
goto TO_SB_ILLEGAL_CHAR_ERR;
}
ib++;
}
i = l = 0;
h = init_h;
while (l <= h) {
i = (l + h) / 2;
break;
l = i + 1;
else
h = i - 1;
}
} else {
/*
* What this means is that we encountered
* a non-identical conversion. In other words,
* input buffer contains a valid character in
* the fromcode but the tocode doesn't have
* any character that can be mapped to.
*
* In this case, we insert an ASCII replacement
* character instead at the output buffer and
* count such non-identical conversions by
* increasing the ret_val.
*
* If the return value of the function is bigger
* than zero, that means we had such non-identical
* conversion(s).
*/
ret_val++;
}
}
return (ret_val);
}
/*
* The following is the common kiconv function for the conversions from
* single byte codesets to UTf-8.
*/
static size_t
{
size_t i;
size_t k;
/* Validate the kiconv code conversion descriptor. */
return ((size_t)-1);
}
/*
* If this is a state reset request, there is nothing to do and so
* we just return.
*/
return ((size_t)0);
ret_val = 0;
/*
* If this is a 7-bit ASCII character, we just copy over and
* that's all we need to do for this character.
*/
if (*ib < 0x80) {
break;
}
continue;
}
/*
* Otherwise, we get the corresponding UTF-8 character bytes
* from the mapping table and copy them over.
*
* We don't need to worry about if the UTF-8 character bytes
* at the mapping tables are valid or not since they are good.
*/
k = *ib - 0x80;
/*
* If (sz <= 0), that means the character in the input buffer
* is an illegal character possibly unassigned or non-character
* at the fromcode single byte codeset.
*/
if (sz <= 0) {
break;
}
break;
}
for (i = 0; i < sz; i++)
ib++;
}
return (ret_val);
}
/*
* The following is the common kiconvstr function for the conversions from
* UTF-8 to single byte codeset.
*/
static size_t
{
size_t i;
size_t l;
size_t h;
/* Let's double check on the table id. */
if (id > KICONV_MAX_MAPPING_TBLID &&
return ((size_t)-1);
}
ret_val = 0;
if (id == KICONV_TBLID_720) {
init_h = sizeof (u8_to_cp720_tbl);
} else {
}
/* Skip any UTF-8 signature BOM character in the beginning. */
ib += 3;
/*
* Basically this is pretty much the same as kiconv_to_sb() except
* that we are now accepting two flag values and doing the processing
* accordingly.
*/
if (sz <= 0) {
if (flag & KICONV_REPLACE_INVALID) {
break;
}
ib++;
}
break;
}
break;
break;
}
if (sz == 1) {
continue;
}
if (flag & KICONV_REPLACE_INVALID) {
}
break;
}
for (i = 1; i < sz; i++) {
if (second) {
if (flag & KICONV_REPLACE_INVALID) {
}
}
if (flag & KICONV_REPLACE_INVALID) {
}
}
ib++;
}
i = l = 0;
h = init_h;
while (l <= h) {
i = (l + h) / 2;
break;
l = i + 1;
else
h = i - 1;
}
} else {
ret_val++;
}
}
return (ret_val);
}
/*
* The following 25 functions are the real entry points that will be
* given to the kiconv framework at the genunix.
*/
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
/*
* The following is the common kiconvstr function for the conversions from
* single byte codeset to UTF-8.
*/
static size_t
{
size_t i;
size_t k;
if (id > KICONV_MAX_MAPPING_TBLID &&
return ((size_t)-1);
}
ret_val = 0;
break;
if (*ib < 0x80) {
break;
}
continue;
}
k = *ib - 0x80;
if (sz <= 0) {
if (flag & KICONV_REPLACE_INVALID) {
break;
}
/* Save KICONV_UTF8_REPLACEMENT_CHAR. */
*ob++ = 0xef;
*ob++ = 0xbf;
*ob++ = 0xbd;
ret_val++;
ib++;
continue;
}
break;
}
break;
}
for (i = 0; i < sz; i++)
ib++;
}
return (ret_val);
}
/*
* The following 25 functions are the real entry points that will be
* given to kiconv framework at the genunix.
*/
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
static size_t
{
}
/*
* The following are the supported code conversions that will be passed to
* and registered from this module. The tocode and fromcode names are
* normalized.
*/
{
"utf8", "cp1250",
},
{
"cp1250", "utf8",
},
{
"utf8", "iso88592",
},
{
"iso88592", "utf8",
},
{
"utf8", "cp852",
},
{
"cp852", "utf8",
},
{
"utf8", "cp1251",
},
{
"cp1251", "utf8",
},
{
"utf8", "iso88595",
},
{
"iso88595", "utf8",
},
{
"utf8", "koi8r",
},
{
"koi8r", "utf8",
},
{
"utf8", "cp866",
},
{
"cp866", "utf8",
},
{
"utf8", "cp1253",
},
{
"cp1253", "utf8",
},
{
"utf8", "iso88597",
},
{
"iso88597", "utf8",
},
{
"utf8", "cp737",
},
{
"cp737", "utf8",
},
{
"utf8", "cp1254",
},
{
"cp1254", "utf8",
},
{
"utf8", "iso88599",
},
{
"iso88599", "utf8",
},
{
"utf8", "cp857",
},
{
"cp857", "utf8",
},
{
"utf8", "cp1256",
},
{
"cp1256", "utf8",
},
{
"utf8", "iso88596",
},
{
"iso88596", "utf8",
},
{
"utf8", "cp720",
},
{
"cp720", "utf8",
},
{
"utf8", "cp1255",
},
{
"cp1255", "utf8",
},
{
"utf8", "iso88598",
},
{
"iso88598", "utf8",
},
{
"utf8", "cp862",
},
{
"cp862", "utf8",
},
{
"utf8", "cp1257",
},
{
"cp1257", "utf8",
},
{
"utf8", "iso885913",
},
{
"iso885913", "utf8",
},
{
"utf8", "iso885910",
},
{
"iso885910", "utf8",
},
{
"utf8", "iso885911",
},
{
"iso885911", "utf8",
},
{
"utf8", "iso88593",
},
{
"iso88593", "utf8",
},
{
"utf8", "iso88594",
},
{
"iso88594", "utf8",
},
};
"kiconv_emea", /* Must be the same as in kiconv framework. */
KICONV_MAX_EMEA_OPS, /* size_t kiconv_num_convs */
kiconv_emea_ops, /* kiconv_ops_t *kiconv_ops_tbl */
0, /* size_t kiconv_num_aliases */
NULL, /* char **aliases */
NULL, /* char **canonicals */
0 /* int nowait */
};
"kiconv module for EMEA",
};
(void *)&kiconv_emea,
};
int
_init()
{
int err;
if (err)
return (err);
}
int
{
}
int
_fini()
{
int err;
/*
* If this module is being used, then, we cannot remove the module.
* The following checking will catch pretty much all usual cases.
*
* Any remaining will be catached by the kiconv_unregister_module()
* during mod_remove() at below.
*/
return (EBUSY);
if (err)
return (err);
}