ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry/*
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Use is subject to license terms.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry/*
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * util/support/utf8_conv.c
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Copyright 2008 by the Massachusetts Institute of Technology.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * All Rights Reserved.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Export of this software from the United States of America may
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * require a specific license from the United States Government.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * It is the responsibility of any person or organization contemplating
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * export to obtain such a license before exporting.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * distribute this software and its documentation for any purpose and
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * without fee is hereby granted, provided that the above copyright
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * notice appear in all copies and that both that copyright notice and
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * this permission notice appear in supporting documentation, and that
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * the name of M.I.T. not be used in advertising or publicity pertaining
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * to distribution of the software without specific, written prior
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * permission. Furthermore if you modify this software you must label
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * your software as modified software and not distribute it in such a
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * fashion that it might be confused with the original M.I.T. software.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * M.I.T. makes no representations about the suitability of
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * this software for any purpose. It is provided "as is" without express
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * or implied warranty.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Copyright 1998-2008 The OpenLDAP Foundation.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * All rights reserved.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Redistribution and use in source and binary forms, with or without
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * modification, are permitted only as authorized by the OpenLDAP
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Public License.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * A copy of this license is available in the file LICENSE in the
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * top-level directory of the distribution or, alternatively, at
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * <http://www.OpenLDAP.org/license.html>.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry/* Portions Copyright (C) 1999, 2000 Novell, Inc. All Rights Reserved.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * TREATIES. USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * TO VERSION 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * AVAILABLE AT HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE"
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * IN THE TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * OF THIS WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * PUBLIC LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * THE PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry/*
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * UTF-8 Conversion Routines
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * These routines convert between Wide Character and UTF-8,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * or between MultiByte and UTF-8 encodings.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * Both single character and string versions of the functions are provided.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry * All functions return -1 if the character or string cannot be converted.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#include "k5-platform.h"
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#include "k5-utf8.h"
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#include "supp-int.h"
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#include "errno.h" /* SUNW17PACresync */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrystatic unsigned char mask[] = { 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrystatic ssize_t
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryk5_utf8s_to_ucs2s(krb5_ucs2 *ucs2str,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry const char *utf8str,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t count,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry int little_endian)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t ucs2len = 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t utflen, i;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry krb5_ucs2 ch;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* If input ptr is NULL or empty... */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utf8str == NULL || *utf8str == '\0') {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2str = 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Examine next UTF-8 character. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry while (*utf8str && ucs2len < count) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Get UTF-8 sequence length from 1st byte */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry utflen = KRB5_UTF8_CHARLEN2(utf8str, utflen);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utflen == 0 || utflen > KRB5_MAX_UTF8_LEN)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return -1;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* First byte minus length tag */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch = (krb5_ucs2)(utf8str[0] & mask[utflen]);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry for (i = 1; i < utflen; i++) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Subsequent bytes must start with 10 */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if ((utf8str[i] & 0xc0) != 0x80)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return -1;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch <<= 6; /* 6 bits of data in each subsequent byte */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch |= (krb5_ucs2)(utf8str[i] & 0x3f);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2str != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#ifdef K5_BE
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#ifndef SWAP16
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#define SWAP16(X) ((((X) << 8) | ((X) >> 8)) & 0xFFFF)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#endif
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (little_endian)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ucs2str[ucs2len] = SWAP16(ch);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry else
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#endif
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ucs2str[ucs2len] = ch;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry utf8str += utflen; /* Move to next UTF-8 character */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ucs2len++; /* Count number of wide chars stored/required */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry assert(ucs2len < count);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2str != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Add null terminator if there's room in the buffer. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ucs2str[ucs2len] = 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ucs2len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_utf8s_to_ucs2s(const char *utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry krb5_ucs2 **ucs2s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *ucs2chars)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t chars;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry chars = krb5int_utf8_chars(utf8s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2s = (krb5_ucs2 *)malloc((chars + 1) * sizeof(krb5_ucs2));
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*ucs2s == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_utf8s_to_ucs2s(*ucs2s, utf8s, chars + 1, 0);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*ucs2s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2s = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2chars != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2chars = chars;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_utf8cs_to_ucs2s(const char *utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t utf8slen,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry krb5_ucs2 **ucs2s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *ucs2chars)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t chars;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry chars = krb5int_utf8c_chars(utf8s, utf8slen);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2s = (krb5_ucs2 *)malloc((chars + 1) * sizeof(krb5_ucs2));
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*ucs2s == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_utf8s_to_ucs2s(*ucs2s, utf8s, chars + 1, 0);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*ucs2s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2s = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2chars != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2chars = chars;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_utf8s_to_ucs2les(const char *utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry unsigned char **ucs2les,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *ucs2leslen)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t chars;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry chars = krb5int_utf8_chars(utf8s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2les = (unsigned char *)malloc((chars + 1) * sizeof(krb5_ucs2));
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*ucs2les == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_utf8s_to_ucs2s((krb5_ucs2 *)*ucs2les, utf8s, chars + 1, 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*ucs2les);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2les = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2leslen != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2leslen = chars * sizeof(krb5_ucs2);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_utf8cs_to_ucs2les(const char *utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t utf8slen,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry unsigned char **ucs2les,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *ucs2leslen)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t chars;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry chars = krb5int_utf8c_chars(utf8s, utf8slen);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2les = (unsigned char *)malloc((chars + 1) * sizeof(krb5_ucs2));
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*ucs2les == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_utf8s_to_ucs2s((krb5_ucs2 *)*ucs2les, utf8s, chars + 1, 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*ucs2les);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2les = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2leslen != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *ucs2leslen = chars * sizeof(krb5_ucs2);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry/*-----------------------------------------------------------------------------
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry Convert a wide char string to a UTF-8 string.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry No more than 'count' bytes will be written to the output buffer.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry Return the # of bytes written to the output buffer, excl null terminator.
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ucs2len is -1 if the UCS-2 string is NUL terminated, otherwise it is the
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry length of the UCS-2 string in characters
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry*/
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrystatic ssize_t
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryk5_ucs2s_to_utf8s(char *utf8str, const krb5_ucs2 *ucs2str,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t count, ssize_t ucs2len, int little_endian)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry int len = 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry int n;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry char *p = utf8str;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry krb5_ucs2 empty = 0, ch;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2str == NULL) /* Treat input ptr NULL as an empty string */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ucs2str = &empty;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utf8str == NULL) /* Just compute size of output, excl null */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry while (ucs2len == -1 ? *ucs2str : --ucs2len >= 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Get UTF-8 size of next wide char */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch = *ucs2str++;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#ifdef K5_BE
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (little_endian)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch = SWAP16(ch);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#endif
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry n = krb5int_ucs2_to_utf8(ch, NULL);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (n < 1)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return -1;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len + n < len)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return -1; /* overflow */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len += n;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Do the actual conversion. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry n = 1; /* In case of empty ucs2str */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry while (ucs2len == -1 ? *ucs2str != 0 : --ucs2len >= 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch = *ucs2str++;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#ifdef K5_BE
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (little_endian)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ch = SWAP16(ch);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry#endif
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry n = krb5int_ucs2_to_utf8(ch, p);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (n < 1)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry break;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry p += n;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry count -= n; /* Space left in output buffer */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* If not enough room for last character, pad remainder with null
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry so that return value = original count, indicating buffer full. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (n == 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry while (count--)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *p++ = 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Add a null terminator if there's room. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry else if (count)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *p = 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (n == -1) /* Conversion encountered invalid wide char. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return -1;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry /* Return the number of bytes written to output buffer, excl null. */
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return (p - utf8str);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_ucs2s_to_utf8s(const krb5_ucs2 *ucs2s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry char **utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *utf8slen)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(NULL, ucs2s, 0, -1, 0);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = (char *)malloc((size_t)len + 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*utf8s == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(*utf8s, ucs2s, (size_t)len + 1, -1, 0);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*utf8s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utf8slen != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8slen = len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_ucs2les_to_utf8s(const unsigned char *ucs2les,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry char **utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *utf8slen)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(NULL, (krb5_ucs2 *)ucs2les, 0, -1, 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = (char *)malloc((size_t)len + 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*utf8s == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(*utf8s, (krb5_ucs2 *)ucs2les, (size_t)len + 1, -1, 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*utf8s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utf8slen != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8slen = len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_ucs2cs_to_utf8s(const krb5_ucs2 *ucs2s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t ucs2slen,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry char **utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *utf8slen)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2slen > SSIZE_MAX)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ERANGE;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(NULL, (krb5_ucs2 *)ucs2s, 0,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry (ssize_t)ucs2slen, 0);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = (char *)malloc((size_t)len + 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*utf8s == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(*utf8s, (krb5_ucs2 *)ucs2s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry (size_t)len + 1, (ssize_t)ucs2slen, 0);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*utf8s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utf8slen != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8slen = len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barryint
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barrykrb5int_ucs2lecs_to_utf8s(const unsigned char *ucs2les,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t ucs2leslen,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry char **utf8s,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry size_t *utf8slen)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry{
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry ssize_t len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (ucs2leslen > SSIZE_MAX)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ERANGE;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(NULL, (krb5_ucs2 *)ucs2les, 0,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry (ssize_t)ucs2leslen, 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0)
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = (char *)malloc((size_t)len + 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (*utf8s == NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return ENOMEM;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry len = k5_ucs2s_to_utf8s(*utf8s, (krb5_ucs2 *)ucs2les,
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry (size_t)len + 1, (ssize_t)ucs2leslen, 1);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (len < 0) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry free(*utf8s);
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8s = NULL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return EINVAL;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry if (utf8slen != NULL) {
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry *utf8slen = len;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry }
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry return 0;
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry}
ba7b222e36bac28710a7f43739283302b617e7f5Glenn Barry