/*
* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
*/
/*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is Mozilla Communicator client code, released
* March 31, 1998.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998-1999 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <ctype.h>
#ifndef HAVE_LIBICU
#ifdef SOLARIS_LDAP_CMD
#include <errno.h>
#include <langinfo.h>
#include <iconv.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
extern char *ldaptool_charset;
char *ldaptool_convdir = NULL;
static int charsetset = 0;
char *ldaptool_local2UTF8( const char *src );
#ifdef SOLARIS_LDAP_CMD
static char *ldaptool_convert( const char *src, const char *fcode,
const char *tcode);
char *ldaptool_UTF82local( const char *src );
#endif /* SOLARIS_LDAP_CMD */
#ifdef SOLARIS_LDAP_CMD
/*
* ICU version always returns string, unless strdup fails.
* As in ICU version, in case of error strdup(src)
* Usually strdup(src) will be ASCII and legal anyways.
*/
static char *
ldaptool_convert( const char *src, const char *fcode,
const char *tcode) {
char *dest, *tptr, *tmp;
char *fptr;
iconv_t cd;
size_t ileft, oleft, ret, size;
if (src == NULL)
return (NULL);
if (fcode == NULL || tcode == NULL)
return (strdup(src));
if (strcasecmp(fcode, tcode) == 0)
return (strdup(src));
if ((cd = iconv_open(tcode, fcode)) == (iconv_t)-1) {
/* conversion table not available */
return (strdup(src));
}
ileft = strlen(src);
oleft = 2 * ileft;
size = oleft;
ret = -1;
if ((dest = (char *)malloc(size)) == NULL) {
(void) iconv_close(cd);
/* maybe sizeof strlen(src) memory still exists */
return (strdup(src));
}
tptr = dest;
fptr = (char *)src;
for (;;) {
ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
if (ret != (size_t)-1) {
/*
* Success. Place 'cd' into its initial shift
* state before returning.
*/
if (fptr == NULL) /* already in initial state */
break;
fptr = NULL;
ileft = 0;
continue;
} if (errno == E2BIG) {
/*
* Lack of space in output buffer.
* Hence double the size and retry.
* But before calling iconv(), oleft
* and tptr have to re-adjusted, so that
* iconv() doesn't overwrite the data
* which has already been converted.
*/
oleft += size;
size *= 2;
if ((tmp = (char *) realloc(dest, size)) == NULL)
break;
tptr = tmp + (tptr - dest);
dest = tmp;
continue;
} else {
/* Other errors */
break;
}
}
if (dest != NULL) {
if (ret == -1) {
/* Free malloc'ed memory on failure */
free(dest);
dest = NULL;
} else if (oleft > 0) {
/* NULL terminate the return value */
*(dest + (size - oleft)) = '\0';
} else {
/* realloc one more byte and NULL terminate */
if ((tmp = (char *) realloc(dest, size + 1)) == NULL) {
free(dest);
dest = NULL;
} else {
*(dest + size) = '\0';
}
}
}
(void) iconv_close(cd);
if (dest == NULL) {
/* last chance in case some other failure along the way occurs */
return (strdup(src));
}
return (dest);
}
char *
ldaptool_UTF82local( const char *src )
{
char *to_code;
if ((to_code = nl_langinfo(CODESET)) == NULL)
return (strdup(src));
return (ldaptool_convert(src, "UTF-8", (const char *)to_code));
}
#endif /* SOLARIS_LDAP_CMD */
char *
ldaptool_local2UTF8( const char *src )
{
#ifdef SOLARIS_LDAP_CMD
char *from_code;
if ((from_code = nl_langinfo(CODESET)) == NULL)
return (strdup(src));
return (ldaptool_convert(src, (const char *)from_code, "UTF-8"));
#else
char *utf8;
charsetset = 0;
if (src == NULL)
{
return NULL;
}
utf8 = strdup(src);
return ( utf8 );
#endif /* SOLARIS_LDAP_CMD */
}
#else /* HAVE_LIBICU */
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#define NSPR20
#ifdef XP_WIN32
#define VC_EXTRALEAN
#include <afxwin.h>
#include <winnls.h>
#endif
extern char *ldaptool_charset;
static int charsetset = 0;
extern "C" {
char *ldaptool_convdir = NULL;
char *ldaptool_local2UTF8( const char * );
}
#ifndef XP_WIN32
char * GetNormalizedLocaleName(void);
char *
GetNormalizedLocaleName(void)
{
#ifdef _HPUX_SOURCE
int len;
char *locale;
locale = setlocale(LC_CTYPE, "");
if (locale && *locale) {
len = strlen(locale);
} else {
locale = "C";
len = 1;
}
if ((!strncmp(locale, "/\x03:", 3)) &&
(!strcmp(&locale[len - 2], ";/"))) {
locale += 3;
len -= 5;
}
locale = strdup(locale);
if (locale) {
locale[len] = 0;
}
return locale;
#else
char *locale;
locale = setlocale(LC_CTYPE, "");
if (locale && *locale) {
return strdup(locale);
}
return strdup("C");
#endif
}
#if defined(IRIX)
const char *CHARCONVTABLE[] =
{
"! This table maps the host's locale names to IANA charsets",
"!",
"C: ISO_8859-1:1987",
"cs: ISO_8859-2:1987",
"da: ISO_8859-1:1987",
"de: ISO_8859-1:1987",
"de_AT: ISO_8859-1:1987",
"de_CH: ISO_8859-1:1987",
"en: ISO_8859-1:1987",
"en_AU: ISO_8859-1:1987",
"en_CA: ISO_8859-1:1987",
"en_TH: ISO_8859-1:1987",
"en_US: ISO_8859-1:1987",
"es: ISO_8859-1:1987",
"fi: ISO_8859-1:1987",
"fr: ISO_8859-1:1987",
"fr_BE: ISO_8859-1:1987",
"fr_CA: ISO_8859-1:1987",
"fr_CH: ISO_8859-1:1987",
"is: ISO_8859-1:1987",
"it: ISO_8859-1:1987",
"it_CH: ISO_8859-1:1987",
"ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ko_KR.euc: EUC-KR",
"nl: ISO_8859-1:1987",
"nl_BE: ISO_8859-1:1987",
"no: ISO_8859-1:1987",
"pl: ISO_8859-2:1987",
"pt: ISO_8859-1:1987",
"sh: ISO_8859-2:1987",
"sk: ISO_8859-2:1987",
"sv: ISO_8859-1:1987",
"zh_CN.ugb: GB2312",
"zh_TW.ucns: cns11643_1",
NULL
};
#elif defined(SOLARIS)
const char *CHARCONVTABLE[] =
{
"! This table maps the host's locale names to IANA charsets",
"!",
"C: ISO_8859-1:1987",
"ja: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ja_JP.PCK: Shift_JIS",
"en: ISO_8859-1:1987",
"en_AU: ISO_8859-1:1987",
"en_CA: ISO_8859-1:1987",
"en_UK: ISO_8859-1:1987",
"en_US: ISO_8859-1:1987",
"es: ISO_8859-1:1987",
"es_AR: ISO_8859-1:1987",
"es_BO: ISO_8859-1:1987",
"es_CL: ISO_8859-1:1987",
"es_CO: ISO_8859-1:1987",
"es_CR: ISO_8859-1:1987",
"es_EC: ISO_8859-1:1987",
"es_GT: ISO_8859-1:1987",
"es_MX: ISO_8859-1:1987",
"es_NI: ISO_8859-1:1987",
"es_PA: ISO_8859-1:1987",
"es_PE: ISO_8859-1:1987",
"es_PY: ISO_8859-1:1987",
"es_SV: ISO_8859-1:1987",
"es_UY: ISO_8859-1:1987",
"es_VE: ISO_8859-1:1987",
"fr: ISO_8859-1:1987",
"fr_BE: ISO_8859-1:1987",
"fr_CA: ISO_8859-1:1987",
"fr_CH: ISO_8859-1:1987",
"de: ISO_8859-1:1987",
"de_AT: ISO_8859-1:1987",
"de_CH: ISO_8859-1:1987",
"nl: ISO_8859-1:1987",
"nl_BE: ISO_8859-1:1987",
"it: ISO_8859-1:1987",
"sv: ISO_8859-1:1987",
"no: ISO_8859-1:1987",
"da: ISO_8859-1:1987",
"iso_8859_1: ISO_8859-1:1987",
"japanese: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ko: EUC-KR",
"zh: GB2312",
"zh_TW: cns11643_1",
NULL
};
#elif defined(OSF1)
const char *CHARCONVTABLE[] =
{
"! This table maps the host's locale names to IANA charsets",
"!",
"C: ISO_8859-1:1987",
"cs_CZ.ISO8859-2: ISO_8859-2:1987",
"cs_CZ: ISO_8859-2:1987",
"da_DK.ISO8859-1: ISO_8859-1:1987",
"de_CH.ISO8859-1: ISO_8859-1:1987",
"de_DE.ISO8859-1: ISO_8859-1:1987",
"en_GB.ISO8859-1: ISO_8859-1:1987",
"en_US.ISO8859-1: ISO_8859-1:1987",
"es_ES.ISO8859-1: ISO_8859-1:1987",
"fi_FI.ISO8859-1: ISO_8859-1:1987",
"fr_BE.ISO8859-1: ISO_8859-1:1987",
"fr_CA.ISO8859-1: ISO_8859-1:1987",
"fr_CH.ISO8859-1: ISO_8859-1:1987",
"fr_FR.ISO8859-1: ISO_8859-1:1987",
"hu_HU.ISO8859-2: ISO_8859-2:1987",
"hu_HU: ISO_8859-2:1987",
"is_IS.ISO8859-1: ISO_8859-1:1987",
"it_IT.ISO8859-1: ISO_8859-1:1987",
"ja_JP.SJIS: Shift_JIS",
"ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ko_KR.eucKR: EUC-KR",
"ko_KR: EUC-KR",
"nl_BE.ISO8859-1: ISO_8859-1:1987",
"nl_NL.ISO8859-1: ISO_8859-1:1987",
"no_NO.ISO8859-1: ISO_8859-1:1987",
"pl_PL.ISO8859-2: ISO_8859-2:1987",
"pl_PL: ISO_8859-2:1987",
"pt_PT.ISO8859-1: ISO_8859-1:1987",
"sk_SK.ISO8859-2: ISO_8859-2:1987",
"sk_SK: ISO_8859-2:1987",
"sv_SE.ISO8859-1: ISO_8859-1:1987",
"zh_CN: GB2312",
"zh_HK.big5: Big5",
"zh_HK.eucTW: cns11643_1",
"zh_TW.big5: Big5",
"zh_TW.big5@chuyin: Big5",
"zh_TW.big5@radical: Big5",
"zh_TW.big5@stroke: Big5",
"zh_TW.eucTW: cns11643_1",
"zh_TW.eucTW@chuyin: cns11643_1",
"zh_TW.eucTW@radical: cns11643_1",
"zh_TW.eucTW@stroke: cns11643_1",
"zh_TW: cns11643_1",
NULL
};
#elif defined(HPUX)
const char *CHARCONVTABLE[] =
{
"! This table maps the host's locale names to IANA charsets",
"!",
"C: ISO_8859-1:1987",
"ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ja_JP.SJIS: Shift_JIS",
"ja_JP.eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"es_ES: ISO_8859-1:1987",
"es_ES.iso88591: ISO_8859-1:1987",
"sv_SE: ISO_8859-1:1987",
"sv_SE.iso88591: ISO_8859-1:1987",
"da_DK: ISO_8859-1:1987",
"da_DK.iso88591: ISO_8859-1:1987",
"nl_NL: ISO_8859-1:1987",
"nl_NL.iso88591: ISO_8859-1:1987",
"en: ISO_8859-1:1987",
"en_GB: ISO_8859-1:1987",
"en_GB.iso88591: ISO_8859-1:1987",
"en_US: ISO_8859-1:1987",
"en_US.iso88591: ISO_8859-1:1987",
"fi_FI: ISO_8859-1:1987",
"fi_FI.iso88591: ISO_8859-1:1987",
"fr_CA: ISO_8859-1:1987",
"fr_CA.iso88591: ISO_8859-1:1987",
"fr_FR: ISO_8859-1:1987",
"fr_FR.iso88591: ISO_8859-1:1987",
"de_DE: ISO_8859-1:1987",
"de_DE.iso88591: ISO_8859-1:1987",
"is_IS: ISO_8859-1:1987",
"is_IS.iso88591: ISO_8859-1:1987",
"it_IT: ISO_8859-1:1987",
"it_IT.iso88591: ISO_8859-1:1987",
"no_NO: ISO_8859-1:1987",
"no_NO.iso88591: ISO_8859-1:1987",
"pt_PT: ISO_8859-1:1987",
"pt_PT.iso88591: ISO_8859-1:1987",
"hu_HU: ISO_8859-2:1987",
"hu_HU.iso88592: ISO_8859-2:1987",
"cs_CZ: ISO_8859-2:1987",
"cs_CZ.iso88592: ISO_8859-2:1987",
"pl_PL: ISO_8859-2:1987",
"pl_PL.iso88592: ISO_8859-2:1987",
"ro_RO: ISO_8859-2:1987",
"ro_RO.iso88592: ISO_8859-2:1987",
"hr_HR: ISO_8859-2:1987",
"hr_HR.iso88592: ISO_8859-2:1987",
"sk_SK: ISO_8859-2:1987",
"sk_SK.iso88592: ISO_8859-2:1987",
"sl_SI: ISO_8859-2:1987",
"sl_SI.iso88592: ISO_8859-2:1987",
"american.iso88591: ISO_8859-1:1987",
"bulgarian: ISO_8859-2:1987",
"c-french.iso88591: ISO_8859-1:1987",
"chinese-s: GB2312",
"chinese-t.big5: Big5",
"czech: ISO_8859-2:1987",
"danish.iso88591: ISO_8859-1:1987",
"dutch.iso88591: ISO_8859-1:1987",
"english.iso88591: ISO_8859-1:1987",
"finnish.iso88591: ISO_8859-1:1987",
"french.iso88591: ISO_8859-1:1987",
"german.iso88591: ISO_8859-1:1987",
"hungarian: ISO_8859-2:1987",
"icelandic.iso88591: ISO_8859-1:1987",
"italian.iso88591: ISO_8859-1:1987",
"japanese.euc: Extended_UNIX_Code_Packed_Format_for_Japanese",
"japanese: Shift_JIS",
"katakana: Shift_JIS",
"korean: EUC-KR",
"norwegian.iso88591: ISO_8859-1:1987",
"polish: ISO_8859-2:1987",
"portuguese.iso88591: ISO_8859-1:1987",
"rumanian: ISO_8859-2:1987",
"serbocroatian: ISO_8859-2:1987",
"slovene: ISO_8859-2:1987",
"spanish.iso88591: ISO_8859-1:1987",
"swedish.iso88591: ISO_8859-1:1987",
NULL
};
#elif defined(AIX)
const char *CHARCONVTABLE[] =
{
"! This table maps the host's locale names to IANA charsets",
"!",
"C: ISO_8859-1:1987",
"En_JP.IBM-932: Shift_JIS",
"En_JP: Shift_JIS",
"Ja_JP.IBM-932: Shift_JIS",
"Ja_JP: Shift_JIS",
"da_DK.ISO8859-1: ISO_8859-1:1987",
"da_DK: ISO_8859-1:1987",
"de_CH.ISO8859-1: ISO_8859-1:1987",
"de_CH: ISO_8859-1:1987",
"de_DE.ISO8859-1: ISO_8859-1:1987",
"de_DE: ISO_8859-1:1987",
"en_GB.ISO8859-1: ISO_8859-1:1987",
"en_GB: ISO_8859-1:1987",
"en_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"en_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"en_KR.IBM-eucKR: EUC-KR",
"en_KR: EUC-KR",
"en_TW.IBM-eucTW: cns11643_1",
"en_TW: cns11643_1",
"en_US.ISO8859-1: ISO_8859-1:1987",
"en_US: ISO_8859-1:1987",
"es_ES.ISO8859-1: ISO_8859-1:1987",
"es_ES: ISO_8859-1:1987",
"fi_FI.ISO8859-1: ISO_8859-1:1987",
"fi_FI: ISO_8859-1:1987",
"fr_BE.ISO8859-1: ISO_8859-1:1987",
"fr_BE: ISO_8859-1:1987",
"fr_CA.ISO8859-1: ISO_8859-1:1987",
"fr_CA: ISO_8859-1:1987",
"fr_CH.ISO8859-1: ISO_8859-1:1987",
"fr_CH: ISO_8859-1:1987",
"fr_FR.ISO8859-1: ISO_8859-1:1987",
"fr_FR: ISO_8859-1:1987",
"is_IS.ISO8859-1: ISO_8859-1:1987",
"is_IS: ISO_8859-1:1987",
"it_IT.ISO8859-1: ISO_8859-1:1987",
"it_IT: ISO_8859-1:1987",
"ja_JP.IBM-eucJP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ja_JP: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ko_KR.IBM-eucKR: EUC-KR",
"ko_KR: EUC-KR",
"nl_BE.ISO8859-1: ISO_8859-1:1987",
"nl_BE: ISO_8859-1:1987",
"nl_NL.ISO8859-1: ISO_8859-1:1987",
"nl_NL: ISO_8859-1:1987",
"no_NO.ISO8859-1: ISO_8859-1:1987",
"no_NO: ISO_8859-1:1987",
"pt_PT.ISO8859-1: ISO_8859-1:1987",
"pt_PT: ISO_8859-1:1987",
"sv_SE.ISO8859-1: ISO_8859-1:1987",
"sv_SE: ISO_8859-1:1987",
"zh_TW.IBM-eucTW: cns11643_1",
"zh_TW: cns11643_1",
NULL
};
#else // sunos by default
const char *CHARCONVTABLE[] =
{
"! This table maps the host's locale names to IANA charsets",
"!",
"C: ISO_8859-1:1987",
"de: ISO_8859-1:1987",
"en_US: ISO_8859-1:1987",
"es: ISO_8859-1:1987",
"fr: ISO_8859-1:1987",
"iso_8859_1: ISO_8859-1:1987",
"it: ISO_8859-1:1987",
"ja: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ja_JP.EUC: Extended_UNIX_Code_Packed_Format_for_Japanese",
"japanese: Extended_UNIX_Code_Packed_Format_for_Japanese",
"ko: EUC-KR",
"sv: ISO_8859-1:1987",
"zh: GB2312",
"zh_TW: cns11643_1",
NULL
};
#endif
#define BSZ 256
char *
GetCharsetFromLocale(char *locale)
{
char *tmpcharset = NULL;
char buf[BSZ];
char *p;
const char *line;
int i=0;
line = CHARCONVTABLE[i];
while (line != NULL)
{
if (*line == 0)
{
break;
}
strcpy(buf, line);
line = CHARCONVTABLE[++i];
if (strlen(buf) == 0 || buf[0] == '!')
{
continue;
}
p = strchr(buf, ':');
if (p == NULL)
{
tmpcharset = NULL;
break;
}
*p = 0;
if (strcmp(buf, locale) == 0) {
while (*++p == ' ' || *p == '\t')
;
if (isalpha(*p)) {
tmpcharset = strdup(p);
} else
tmpcharset = NULL;
break;
}
}
return tmpcharset;
}
#endif /* Not defined XP_WIN32 */
#ifdef XP_WIN32
char *_convertor(const char *instr, int bFromUTF8)
{
char *outstr = NULL;
int inlen, wclen, outlen;
LPWSTR wcstr;
if (instr == NULL)
return NULL;
if ((inlen = strlen(instr)) <= 0)
return NULL;
/* output never becomes longer than input,
* thus we don't have to ask for the length
*/
wcstr = (LPWSTR) malloc( sizeof( WCHAR ) * (inlen+1) );
if (!wcstr)
return NULL;
wclen = MultiByteToWideChar(bFromUTF8 ? CP_UTF8 : CP_ACP, 0, instr,
inlen, wcstr, inlen);
outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
wclen, NULL, 0, NULL, NULL);
if (outlen > 0) {
outstr = (char *) malloc(outlen + 2);
outlen = WideCharToMultiByte(bFromUTF8 ? CP_ACP : CP_UTF8, 0, wcstr,
wclen, outstr, outlen, NULL, NULL);
if (outlen > 0)
*(outstr+outlen) = _T('\0');
else
return NULL;
}
free( wcstr );
return outstr;
}
#endif
char *
ldaptool_local2UTF8( const char *src )
{
char *utf8;
#ifndef XP_WIN32
char *locale, *newcharset;
size_t outLen, resultLen;
UErrorCode err = U_ZERO_ERROR;
UConverter *cnv;
if (src == NULL)
{
return NULL;
}
else if (*src == 0 || (ldaptool_charset == NULL)
|| (!strcmp( ldaptool_charset, "" )))
{
/* no option specified, so assume it's already in utf-8 */
utf8 = strdup(src);
return utf8;
}
if( !strcmp( ldaptool_charset, "0" )
&& (!charsetset) )
{
/* zero option specified, so try to get default codepage
this sucker is strdup'd immediately so it's OK to cast */
newcharset = (char *)ucnv_getDefaultName();
if (newcharset != NULL) {
free( ldaptool_charset );
/* the default codepage lives in ICU */
ldaptool_charset = strdup(newcharset);
if (ldaptool_charset == NULL) {
return strdup(src);
}
}
charsetset = 1;
}
else
if( strcmp( ldaptool_charset, "" ) && (!charsetset) )
{
/* -i option specified with charset name */
charsetset = 1;
}
/* do the preflight - get the size needed for the target buffer */
outLen = (size_t) ucnv_convert( "utf-8", ldaptool_charset, NULL, 0, src,
strlen( src ) * sizeof(char), &err);
if ((err != U_BUFFER_OVERFLOW_ERROR) || (outLen == 0)) {
/* default to just a copy of the string - this covers
the case of an illegal charset also */
return strdup(src);
}
utf8 = (char *) malloc( outLen + 1);
if( utf8 == NULL ) {
/* if we're already out of memory, does strdup just return NULL? */
return strdup(src);
}
/* do the actual conversion this time */
err = U_ZERO_ERROR;
resultLen = ucnv_convert( "utf-8", ldaptool_charset, utf8, (outLen + 1), src,
strlen(src) * sizeof(char), &err );
if (!U_SUCCESS(err)) {
free(utf8);
return strdup(src);
}
#else
utf8 = _convertor(src, FALSE);
if( utf8 == NULL )
utf8 = strdup(src);
#endif
return utf8;
}
#endif /* HAVE_LIBICU */
#ifndef HAVE_LIBICU
#ifdef __cplusplus
}
#endif
#endif