charsets.c revision 4bff34e37def8a90f9194d81bc345c52ba20086a
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * @APPLE_LICENSE_HEADER_START@
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Reserved. This file contains Original Code and/or Modifications of
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Original Code as defined in and that are subject to the Apple Public
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Source License Version 1.0 (the 'License'). You may not use this file
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * except in compliance with the License. Please obtain a copy of the
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * License at http://www.apple.com/publicsource and read it before using
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * this file.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * The Original Code and all software distributed under the License are
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * License for the specific language governing rights and limitations
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * under the License."
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * @APPLE_LICENSE_HEADER_END@
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/* @(#)charsets.c *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * (c) 2004 Apple Computer, Inc. All Rights Reserved
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * charsets.c -- Routines converting between UTF-8, 16-bit
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * little-endian Unicode, and various Windows
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * code pages.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * MODIFICATION HISTORY:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * 28-Nov-2004 Guy Harris New today
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#pragma ident "%Z%%M% %I% %E% SMI"
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <stdlib.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <stdio.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <string.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <ctype.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <iconv.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <langinfo.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <strings.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#ifdef NOTPORTED
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <CoreFoundation/CoreFoundation.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <CoreFoundation/CFStringDefaultEncoding.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <CoreFoundation/CFStringEncodingConverter.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <sys/mchain.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#endif /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <netsmb/smb_lib.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include <netsmb/mchain.h>
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#include "charsets.h"
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#ifdef NOTPORTED
4bff34e37def8a90f9194d81bc345c52ba20086athurlowextern uid_t real_uid,eff_uid;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#endif /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * On Solaris, we will need to do some rewriting to use our iconv
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * routines for the conversions. For now, we're effectively
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * stubbing out code, leaving the details of what happens on
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Darwin in case it's useful as a guide later.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlowstatic unsigned
4bff34e37def8a90f9194d81bc345c52ba20086athurlowxtoi(char u)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (isdigit(u))
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (u - '0');
4bff34e37def8a90f9194d81bc345c52ba20086athurlow else if (islower(u))
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (10 + u - 'a');
4bff34e37def8a90f9194d81bc345c52ba20086athurlow else if (isupper(u))
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (10 + u - 'A');
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (16);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/* Removes the "%" escape sequences from a URL component.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * See IETF RFC 2396.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlowchar *
4bff34e37def8a90f9194d81bc345c52ba20086athurlowunpercent(char * component)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow char c, *s;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow unsigned hi, lo;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (component)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow for (s = component; (c = *s) != 0; s++) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (c != '%')
4bff34e37def8a90f9194d81bc345c52ba20086athurlow continue;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow continue; /* ignore invalid escapes */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow s[0] = hi*16 + lo;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow /*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * This was strcpy(s + 1, s + 3);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * But nowadays leftward overlapping copies are
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * officially undefined in C. Ours seems to
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * work or not depending upon alignment.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow memmove(s+1, s+3, strlen(s+3) + 1);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (component);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#ifdef NOTPORTED
4bff34e37def8a90f9194d81bc345c52ba20086athurlowstatic CFStringEncoding
4bff34e37def8a90f9194d81bc345c52ba20086athurlowget_windows_encoding_equivalent( void )
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFStringEncoding encoding;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow uint32_t index,region;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow /* important! use root ID so you can read the config file! */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow seteuid(eff_uid);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow __CFStringGetInstallationEncodingAndRegion(&index,&region);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow seteuid(real_uid);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow switch ( index )
4bff34e37def8a90f9194d81bc345c52ba20086athurlow {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacRoman:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (region) /* anything nonzero is not US */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSLatin1;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow else /* US region */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSLatinUS;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacJapanese:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSJapanese;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacChineseTrad:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSChineseTrad;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacKorean:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSKorean;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacArabic:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSArabic;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacHebrew:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSHebrew;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacGreek:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSGreek;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacCyrillic:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSCyrillic;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacThai:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSThai;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacChineseSimp:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSChineseSimplif;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacCentralEurRoman:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSLatin2;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacTurkish:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSTurkish;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacCroatian:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSLatin2;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacIcelandic:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSIcelandic;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacRomanian:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSLatin2;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacFarsi:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSArabic;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow case kCFStringEncodingMacUkrainian:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSCyrillic;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow default:
4bff34e37def8a90f9194d81bc345c52ba20086athurlow encoding = kCFStringEncodingDOSLatin1;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow break;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return encoding;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#endif /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * XXX - NLS, or CF? We should probably use the same routine for all
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * conversions.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlowchar *
4bff34e37def8a90f9194d81bc345c52ba20086athurlowconvert_wincs_to_utf8(const char *windows_string)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#ifdef NOTPORTED
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFStringRef s;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFIndex maxlen;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow char *result;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow s = CFStringCreateWithCString(NULL, windows_string,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow get_windows_encoding_equivalent());
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (s == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow windows_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow /* kCFStringEncodingMacRoman should always succeed */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow s = CFStringCreateWithCString(NULL, windows_string,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow kCFStringEncodingMacRoman);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (s == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
4bff34e37def8a90f9194d81bc345c52ba20086athurlow -1, windows_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
4bff34e37def8a90f9194d81bc345c52ba20086athurlow kCFStringEncodingUTF8) + 1;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow result = malloc(maxlen);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (result == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow windows_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
4bff34e37def8a90f9194d81bc345c52ba20086athurlow -1, windows_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return result;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#else /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return ((char*)windows_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#endif /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * XXX - NLS, or CF? We should probably use the same routine for all
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * conversions.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlowchar *
4bff34e37def8a90f9194d81bc345c52ba20086athurlowconvert_utf8_to_wincs(const char *utf8_string)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#ifdef NOTPORTED
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFStringRef s;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFIndex maxlen;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow char *result;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow s = CFStringCreateWithCString(NULL, utf8_string,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow kCFStringEncodingUTF8);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (s == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow utf8_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
4bff34e37def8a90f9194d81bc345c52ba20086athurlow get_windows_encoding_equivalent()) + 1;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow result = malloc(maxlen);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (result == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow utf8_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (!CFStringGetCString(s, result, maxlen,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow get_windows_encoding_equivalent())) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
4bff34e37def8a90f9194d81bc345c52ba20086athurlow -1, utf8_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return result;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#else /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return ((char*)utf8_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#endif /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Convert little-endian Unicode string to UTF-8.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Converts the Unicode string to host byte order in place.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlowchar *
4bff34e37def8a90f9194d81bc345c52ba20086athurlowconvert_leunicode_to_utf8(unsigned short *unicode_string)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow unsigned short *unicode_charp, unicode_char;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow int len = 0;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow for (unicode_charp = unicode_string;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow (unicode_char = *unicode_charp) != 0;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow unicode_charp++) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow *unicode_charp = letohs(unicode_char);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow len = len + 2;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (convert_unicode_to_utf8(unicode_string, len));
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlowchar *
4bff34e37def8a90f9194d81bc345c52ba20086athurlowconvert_unicode_to_utf8(unsigned short *unicode_string, int len)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow iconv_t cd;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow char from[BUFSIZ], to[BUFSIZ];
4bff34e37def8a90f9194d81bc345c52ba20086athurlow char *tptr = NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow const char *fptr;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow size_t ileft, oleft, ret;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow cd = iconv_open("UTF-8", "UTF-16");
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (cd != (iconv_t)-1) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow ileft = len;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow bcopy((char *)unicode_string, from, ileft);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow fptr = from;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow oleft = BUFSIZ;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow tptr = to;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow ret = iconv(cd, &fptr, &ileft, &tptr, &oleft);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (ret != (size_t)-1) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow to[BUFSIZ-oleft] = '\0';
4bff34e37def8a90f9194d81bc345c52ba20086athurlow tptr = to;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow } else {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow tptr = NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow (void) iconv_close(cd);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return (tptr);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow/*
4bff34e37def8a90f9194d81bc345c52ba20086athurlow * Convert UTF-8 string to little-endian Unicode.
4bff34e37def8a90f9194d81bc345c52ba20086athurlow */
4bff34e37def8a90f9194d81bc345c52ba20086athurlowunsigned short *
4bff34e37def8a90f9194d81bc345c52ba20086athurlowconvert_utf8_to_leunicode(const char *utf8_string)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow{
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#ifdef NOTPORTED
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFStringRef s;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFIndex maxlen;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow unsigned short *result;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRange range;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow int i;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow s = CFStringCreateWithCString(NULL, utf8_string,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow kCFStringEncodingUTF8);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (s == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow utf8_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow
4bff34e37def8a90f9194d81bc345c52ba20086athurlow maxlen = CFStringGetLength(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow result = malloc(2*(maxlen + 1));
4bff34e37def8a90f9194d81bc345c52ba20086athurlow if (result == NULL) {
4bff34e37def8a90f9194d81bc345c52ba20086athurlow smb_error("Couldn't allocate buffer for Unicode string for \"%s\" - skipping", -1,
4bff34e37def8a90f9194d81bc345c52ba20086athurlow utf8_string);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return NULL;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow }
4bff34e37def8a90f9194d81bc345c52ba20086athurlow range.location = 0;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow range.length = maxlen;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFStringGetCharacters(s, range, result);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow for (i = 0; i < maxlen; i++)
4bff34e37def8a90f9194d81bc345c52ba20086athurlow result[i] = CFSwapInt16HostToLittle(result[i]);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow result[maxlen] = 0;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow CFRelease(s);
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return result;
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#else /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow /* LINTED */ /* XXX Really need to fix this! */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow return ((ushort_t *)utf8_string); /* XXX */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow#endif /* NOTPORTED */
4bff34e37def8a90f9194d81bc345c52ba20086athurlow}