/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
/*
* a fixed size character encoding that maps to the Unicode 16-bit
* (UCS-2) character set standard. Multibyte or UCS transformation
* format (UTF) encoding is a variable length character encoding scheme
* that s compatible with existing ASCII characters and guarantees that
* the resultant strings do not contain embedded null characters. Both
* types of encoding provide a null terminator: single byte for UTF-8
* and a wide-char null for Unicode. See RFC 2044.
*
* The table below illustrates the UTF-8 encoding scheme. The letter x
* indicates bits available for encoding the character value.
*
* UCS-2 UTF-8 octet sequence (binary)
* 0x0000-0x007F 0xxxxxxx
* 0x0080-0x07FF 110xxxxx 10xxxxxx
* 0x0800-0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* RFC 2044
* UTF-8,a transformation format of UNICODE and ISO 10646
* F. Yergeau
* Alis Technologies
* October 1996
*/
#if defined(_KERNEL) || defined(_FAKE_KERNEL)
#else
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <strings.h>
#endif
/*
* mbstowcs
*
* The mbstowcs() function converts a multibyte character string
* mbstring into a wide character string wcstring. No more than
* nwchars wide characters are stored. A terminating null wide
* character is appended if there is room.
*
* Returns the number of wide characters converted, not counting
* any terminating null wide character. Returns -1 if an invalid
* multibyte character is encountered.
*/
{
int len;
while (nwchars--) {
if (len < 0) {
*wcstring = 0;
return ((size_t)-1);
}
if (*mbstring == 0)
break;
++wcstring;
}
}
/*
* mbtowc
*
* The mbtowc() function converts a multibyte character mbchar into
* a wide character and stores the result in the object pointed to
* by wcharp. Up to nbytes bytes are examined.
*
* If mbchar is NULL, mbtowc() returns zero to indicate that shift
* states are not supported. Shift states are used to switch between
* representation modes using reserved bytes to signal shifting
* without them being interpreted as characters. If mbchar is null
* mbtowc should return non-zero if the current locale requires shift
* states. Otherwise it should be return 0.
*
* If mbchar is non-null, returns the number of bytes processed in
* mbchar. If mbchar is invalid, returns -1.
*/
int /*ARGSUSED*/
{
unsigned char mbyte;
int count;
int bytes_left;
return (0); /* no shift states */
/* 0xxxxxxx -> 1 byte ASCII encoding */
if (wcharp)
return (mbyte ? 1 : 0);
}
/* 10xxxxxx -> invalid first byte */
if ((mbyte & 0x40) == 0)
return (-1);
if ((mbyte & 0x20) == 0) {
wide_char &= 0x1f;
bytes_left = 1;
} else if ((mbyte & 0x10) == 0) {
wide_char &= 0x0f;
bytes_left = 2;
} else {
return (-1);
}
count = 1;
while (bytes_left--) {
return (-1);
count++;
}
if (wcharp)
return (count);
}
/*
* wctomb
*
* The wctomb() function converts a wide character wchar into a multibyte
* character and stores the result in mbchar. The object pointed to by
* mbchar must be large enough to accommodate the multibyte character.
*
* Returns the numberof bytes written to mbchar.
*/
int
{
if ((wchar & ~0x7f) == 0) {
return (1);
}
if ((wchar & ~0x7ff) == 0) {
return (2);
}
return (3);
}
/*
* wcstombs
*
* The wcstombs() function converts a wide character string wcstring
* into a multibyte character string mbstring. Up to nbytes bytes are
* stored in mbstring. Partial multibyte characters at the end of the
* string are not stored. The multibyte character string is null
* terminated if there is room.
*
* Returns the number of bytes converted, not counting the terminating
* null byte.
*/
{
return (0);
while (nbytes > MTS_MB_CHAR_MAX) {
if (wide_char == 0)
/*LINTED E_PTRDIFF_OVERFLOW*/
}
*mbstring = 0;
break;
}
}
/*LINTED E_PTRDIFF_OVERFLOW*/
}
/*
* Returns the number of bytes that would be written if the multi-
* byte string mbs was converted to a wide character string, not
* counting the terminating null wide character.
*/
{
while (*mbs) {
return ((size_t)-1);
len += sizeof (smb_wchar_t);
}
return (len);
}
/*
* Returns the number of bytes that would be written if the multi-
* byte string mbs was converted to a single byte character string,
* not counting the terminating null character.
*/
{
while (*mbs) {
return ((size_t)-1);
if (wide_char & 0xFF00)
len += sizeof (smb_wchar_t);
else
++len;
}
return (len);
}
/*
* stombs
*
* Convert a regular null terminated string 'string' to a UTF-8 encoded
* null terminated multi-byte string 'mbstring'. Only full converted
* UTF-8 characters will be written 'mbstring'. If a character will not
* fit within the remaining buffer space or 'mbstring' will overflow
* max_mblen, the conversion process will be terminated and 'mbstring'
* will be null terminated.
*
* Returns the number of bytes written to 'mbstring', excluding the
* terminating null character.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
{
unsigned char *p = (unsigned char *)string;
int len;
return (-1);
while (*p && space_left > 2) {
wide_char = *p++;
space_left -= len;
}
if (*p) {
wide_char = *p;
space_left -= len;
}
}
*mbstring = '\0';
/*LINTED E_PTRDIFF_OVERFLOW*/
}
/*
* mbstos
*
* Convert a null terminated multi-byte string 'mbstring' to a regular
* null terminated string 'string'. A 1-byte character in 'mbstring'
* maps to a 1-byte character in 'string'. A 2-byte character in
* 'mbstring' will be mapped to 2-bytes, if the upper byte is non-null.
* Otherwise the upper byte null will be discarded to ensure that the
* output stream does not contain embedded null characters.
*
* If the input stream contains invalid multi-byte characters, a value
* of -1 will be returned. Otherwise the length of 'string', excluding
* the terminating null character, is returned.
*
* If either mbstring or string is a null pointer, -1 is returned.
*/
int
{
int len;
return (-1);
while (*mbstring) {
*string = 0;
return (-1);
}
if (wc & 0xFF00) {
/*LINTED E_BAD_PTR_CAST_ALIGN*/
string += sizeof (smb_wchar_t);
}
else
{
string++;
}
}
*string = 0;
/*LINTED E_PTRDIFF_OVERFLOW*/
}