g11n.c revision ee5b3c37dc989e29415b208ebafc684bddf73662
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <errno.h>
#include <locale.h>
#include <langinfo.h>
#include <iconv.h>
#include <ctype.h>
#include <strings.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "includes.h"
#include "xmalloc.h"
#include "xlist.h"
#ifdef MIN
#endif /* MIN */
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define LOCALE_PATH "/usr/bin/locale"
/* two-char country code, '-' and two-char region code */
#define LANGTAG_MAX 5
static char *g11n_locale2langtag(char *locale);
/*
* Convert locale string name into a language tag. The caller is responsible for
* freeing the memory allocated for the result.
*/
static char *
g11n_locale2langtag(char *locale)
{
char *langtag;
/* base cases */
return (NULL);
return (xstrdup("i-default"));
/* punt for language codes which are not exactly 2 letters */
return (NULL);
/* we have a primary language sub-tag */
/* do we have country sub-tag? For example: cs_CZ */
return (langtag);
}
/* example: create cs-CZ from cs_CZ */
return (langtag);
}
/* in all other cases we just use the primary language sub-tag */
return (langtag);
}
g11n_langtag_is_default(char *langtag)
{
}
/*
* This lang tag / locale matching function works only for two-character
* language primary sub-tags and two-character country sub-tags.
*/
{
/* match "i-default" to the process' current locale if possible */
if (g11n_langtag_is_default(langtag)) {
return (1);
else
return (0);
}
/*
* locale must be at least 2 chars long and the lang part must be
* exactly two characters
*/
return (0);
/* same thing with the langtag */
return (0);
/* primary language sub-tag and the locale's language part must match */
return (0);
/*
* primary language sub-tag and the locale's language match, now
* fuzzy check country part
*/
/* neither langtag nor locale have more than one component */
return (2);
/* langtag has only one sub-tag... */
return (1);
/* locale has no country code... */
return (1);
/* langtag has more than one subtag and the locale has a country code */
/* ignore second subtag if not two chars */
return (1);
return (1);
/* ignore rest of locale if there is no two-character country part */
return (1);
return (1);
/* if the country part matches, return 2 */
return (2);
return (1);
}
char *
{
/* we have one text domain - always set it */
(void) textdomain(TEXT_DOMAIN);
/* if the locale is not set, set it from the env vars */
}
void
{
char *curr;
/* we have one text domain - always set it */
(void) textdomain(TEXT_DOMAIN);
if (!locale)
return;
return;
/* if <category> is bogus, setlocale() will do nothing */
}
char **
{
char **list;
return (NULL);
/*
* start with enough room for 65 locales - that's a lot fewer than
* all the locales available for installation, but a lot more than
* what most users will need and install
*/
n_elems = 0;
list_size = 192;
/* skip long locale names (if any) */
long_line = 1;
continue;
} else if (long_line) {
long_line = 0;
continue;
}
/* ignore locale names like "iso_8859-1" */
continue;
list_size *= 2;
(list_size + 1) * sizeof (char *));
}
}
(void) pclose(locale_out);
if (n_elems == 0) {
return (NULL);
}
return (list);
}
char *
{
char *locale;
if (getenv("SSH_LANGS"))
locale = g11n_getlocale();
return (xstrdup("i-default"));
return (g11n_locale2langtag(locale));
}
char *
g11n_locales2langs(char **locale_set)
{
char **p, **r, **q;
for (locales = 0, p = locale_set; p && *p; p++)
locales++;
skip = 0;
continue;
for (q = r; (q - r) < locales; q++) {
if (!*q)
break;
skip = 1;
}
if (!skip)
*(q++) = langtag;
else
*q = NULL;
}
g11n_freelist(r);
return (langs);
}
static int
{
}
int
{
/* no match */
return (0);
/* no country sub-tags - exact match */
return (2);
/* one langtag has a country sub-tag, the other doesn't */
return (1);
/* can't happen - both langtags have a country sub-tag */
return (1);
/* compare country subtags */
return (1);
/* country tags matched - exact match */
return (2);
}
char *
{
char *set3, *lang_subtag;
;
;
/*
* we must not sort the user langtags - sorting or not the server's
* should not affect the outcome
*/
do_append = 0;
for (q = list2; q && *q; q++) {
if (g11n_langtag_match(*p, *q) != 2) continue;
/* append element */
do_append = 1;
if (!*r)
break;
if (strcmp(*p, *r) == 0) {
do_append = 0;
break;
}
}
}
}
}
for (p = list1; p && *p; p++) {
do_append = 0;
for (q = list2; q && *q; q++) {
if (g11n_langtag_match(*p, *q) != 1)
continue;
/* append element */
lang_subtag = xstrdup(*p);
do_append = 1;
if (!*r)
break;
if (strcmp(lang_subtag, *r) == 0) {
do_append = 0;
break;
}
}
} else
}
}
return (set3);
}
char *
{
char **xlist;
/* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
if (!list)
return (NULL);
return (NULL);
return (result);
}
/*
* Compare locales, preferring UTF-8 codesets to others, otherwise doing
* a stright strcmp()
*/
static int
{
char *dot_ptr;
int s1_is_utf8 = 0;
int s2_is_utf8 = 0;
/* check if s1 is a UTF-8 locale */
s1_is_utf8++;
}
/* check if s2 is a UTF-8 locale */
s2_is_utf8++;
}
/* prefer UTF-8 locales */
if (s1_is_utf8 && !s2_is_utf8)
return (-1);
if (s2_is_utf8 && !s1_is_utf8)
return (1);
/* prefer any locale over the default locales */
return (1);
}
return (-1);
}
}
char **
{
char **langtag_list, **result, **p, **q, **r;
char *s;
/* count lang tags and locales */
for (n_locales = 0, p = locale_set; p && *p; p++)
n_locales++;
/* count the number of langtags */
;
;
n_results = 0;
/* more specific matches first */
for (p = langtag_list; p && *p; p++) {
do_append = 0;
for (q = locale_set; q && *q; q++) {
if (g11n_langtag_matches_locale(*p, *q) == 2) {
do_append = 1;
if (!*r)
break;
if (strcmp(*q, *r) == 0) {
do_append = 0;
break;
}
}
}
break;
}
}
}
for (p = langtag_list; p && *p; p++) {
do_append = 0;
for (q = locale_set; q && *q; q++) {
if (g11n_langtag_matches_locale(*p, *q) == 1) {
do_append = 1;
if (!*r)
break;
if (strcmp(*q, *r) == 0) {
do_append = 0;
break;
}
}
}
break;
}
}
}
return (result);
}
char *
{
if (srvr_locales == NULL)
locales = g11n_getlocales();
else
goto err;
err:
if (locales != srvr_locales)
return (result);
}
/*
* Functions for validating ASCII and UTF-8 strings
*
* The error_str parameter is an optional pointer to a char variable
* where to store a string suitable for use with error() or fatal() or
* friends.
*
* The return value is 0 if success, EILSEQ or EINVAL.
*
*/
{
uchar_t *p;
;
return (EILSEQ);
return (0);
}
{
uchar_t *p;
uint_t c, l;
if (len == 0)
/* 8-bit chars begin a UTF-8 sequence */
if (*p & 0x80) {
/* get sequence length and sanity check first byte */
if (*p < 0xc0)
return (EILSEQ);
else if (*p < 0xe0)
l = 2;
else if (*p < 0xf0)
l = 3;
else if (*p < 0xf8)
l = 4;
else if (*p < 0xfc)
l = 5;
else if (*p < 0xfe)
l = 6;
else
return (EILSEQ);
return (EILSEQ);
/* overlong detection - build codepoint */
c = *p & 0x3f;
/* shift c bits from first byte */
c = c << (6 * (l - 1));
if (l > 1) {
if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80))
c = c | ((*(p + 1) & 0x3f) <<
(6 * (l - 2)));
else
return (EILSEQ);
if (c < 0x80)
return (EILSEQ);
}
if (l > 2) {
if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80))
c = c | ((*(p + 2) & 0x3f) <<
(6 * (l - 3)));
else
return (EILSEQ);
if (c < 0x800)
return (EILSEQ);
}
if (l > 3) {
if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80))
c = c | ((*(p + 3) & 0x3f) <<
(6 * (l - 4)));
else
return (EILSEQ);
if (c < 0x10000)
return (EILSEQ);
}
if (l > 4) {
if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80))
c = c | ((*(p + 4) & 0x3f) <<
(6 * (l - 5)));
else
return (EILSEQ);
if (c < 0x200000)
return (EILSEQ);
}
if (l > 5) {
if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80))
c = c | (*(p + 5) & 0x3f);
else
return (EILSEQ);
if (c < 0x4000000)
return (EILSEQ);
}
/*
* check for UTF-16 surrogates ifs other illegal
* UTF-8 * points
*/
if (((c <= 0xdfff) && (c >= 0xd800)) ||
(c == 0xfffe) || (c == 0xffff))
return (EILSEQ);
p += l;
}
/* 7-bit chars are fine */
else
p++;
}
return (0);
}
/*
* Functions for converting to ASCII or UTF-8 from the local codeset
* Functions for converting from ASCII or UTF-8 to the local codeset
*
* The error_str parameter is an optional pointer to a char variable
* where to store a string suitable for use with error() or fatal() or
* friends.
*
* The err parameter is an optional pointer to an integer where 0
* (success) or EILSEQ or EINVAL will be stored (failure).
*
* These functions return NULL if the conversion fails.
*
*/
uchar_t *
{
static uint_t initialized = 0;
static uint_t do_convert = 0;
int err;
if (!initialized) {
/*
* same, and there are aliases of codesets to boot...
*/
initialized = 1;
do_convert = 0;
} else {
if (err_ptr)
if (error_str)
"convert ASCII strings to the local"
" codeset";
}
initialized = 1;
do_convert = 1;
}
}
if (!do_convert) {
if (err_ptr)
return (NULL);
} else
}
}
uchar_t *
{
static uint_t initialized = 0;
static uint_t do_convert = 0;
int err;
if (!initialized) {
/*
* same, and there are aliases of codesets to boot...
*/
initialized = 1;
do_convert = 0;
} else {
if (err_ptr)
if (error_str)
"convert UTF-8 strings to the "
"local codeset";
}
initialized = 1;
do_convert = 1;
}
}
if (!do_convert) {
if (err_ptr)
return (NULL);
} else
}
}
char *
{
static uint_t initialized = 0;
static uint_t do_convert = 0;
if (!initialized) {
/*
* same, and there are aliases of codesets to boot...
*/
initialized = 1;
do_convert = 0;
} else {
if (err_ptr)
if (error_str)
"convert UTF-8 strings to the "
"local codeset";
}
initialized = 1;
do_convert = 1;
}
}
if (!do_convert)
}
uchar_t *
{
static uint_t initialized = 0;
static uint_t do_convert = 0;
if (!initialized) {
/*
* same, and there are aliases of codesets to boot...
*/
initialized = 1;
do_convert = 0;
} else {
if (err_ptr)
if (error_str)
"convert UTF-8 strings to the "
"local codeset";
}
initialized = 1;
do_convert = 1;
}
}
if (!do_convert)
}
/*
* Wrapper around iconv()
*
* The caller is responsible for freeing the result and for handling
* (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
*/
static uchar_t *
{
char *outbuf;
const char *inbuf;
return (NULL);
if (len == 0)
/* reset conversion descriptor */
/* XXX Do we need initial shift sequences for UTF-8??? */
if (mul_ptr)
outbytesleft = len;
do {
(size_t)-1) {
/* UTF-8 codepoints are at most 8 bytes long */
if (mul > 2) {
if (err_str)
"Conversion to UTF-8 failed"
" due to preposterous space"
" requirements";
if (err)
return (NULL);
}
/*
* re-alloc output and ensure that the outbuf
* and outbytesleft values are adjusted
*/
} else {
/*
* let the caller deal with iconv() errors,
* probably by calling fatal(); xfree() does
* not set errno
*/
if (err)
return (NULL);
}
}
} while (inbytesleft);
if (outlen)
if (mul_ptr)
return (converted);
}
/*
* Free all strings in the list and then free the list itself. We know that the
* list ends with a NULL pointer.
*/
void
g11n_freelist(char **list)
{
int i = 0;
i++;
}
}