/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/* Misc functions for conversion of Unicode and UTF-8 and platform encoding */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "jni.h"
#include "utf.h"
/*
* Error handler
*/
void
{
abort();
}
/*
* Convert UTF-8 to UTF-16
* Returns length or -1 if output overflows.
*/
int JNICALL
{
int outputLen;
int i;
UTF_ASSERT(len>=0);
i = 0;
outputLen = 0;
while ( i<len ) {
unsigned code, x, y, z;
if ( outputLen >= outputMaxLen ) {
return -1;
}
x = (unsigned char)utf8[i++];
code = x;
if ( (x & 0xE0)==0xE0 ) {
y = (unsigned char)utf8[i++];
z = (unsigned char)utf8[i++];
} else if ( (x & 0xC0)==0xC0 ) {
y = (unsigned char)utf8[i++];
}
}
return outputLen;
}
/*
* Convert UTF-16 to UTF-8 Modified
* Returns length or -1 if output overflows.
*/
int JNICALL
{
int i;
int outputLen;
UTF_ASSERT(len>=0);
outputLen = 0;
for (i = 0; i < len; i++) {
unsigned code;
}
if ( outputLen > outputMaxLen ) {
return -1;
}
}
return outputLen;
}
int JNICALL
{
return -1; /* FIXUP */
}
/* Determine length of this Standard UTF-8 in Modified UTF-8.
* Validation is done of the basic UTF encoding rules, returns
* length (no change) when errors are detected in the UTF encoding.
*
* Note: Accepts Modified UTF-8 also, no verification on the
* correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
*/
int JNICALL
{
int newLength;
int i;
newLength = 0;
for ( i = 0 ; i < length ; i++ ) {
unsigned byte;
newLength++;
if ( byte == 0 ) {
newLength++; /* We gain one byte in length on NULL bytes */
}
/* Check encoding of following bytes */
break; /* Error condition */
}
i++; /* Skip next byte */
newLength += 2;
/* Check encoding of following bytes */
break; /* Error condition */
}
i += 2; /* Skip next two bytes */
newLength += 3;
/* Check encoding of following bytes */
break; /* Error condition */
}
i += 3; /* Skip next 3 bytes */
} else {
break; /* Error condition */
}
}
if ( i != length ) {
/* Error in finding new length, return old length so no conversion */
/* FIXUP: ERROR_MESSAGE? */
return length;
}
return newLength;
}
/* Convert Standard UTF-8 to Modified UTF-8.
* Assumes the UTF-8 encoding was validated by utf8mLength() above.
*
* Note: Accepts Modified UTF-8 also, no verification on the
* correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
*/
void JNICALL
{
int i;
int j;
j = 0;
for ( i = 0 ; i < length ; i++ ) {
unsigned byte1;
/* NULL bytes and bytes starting with 11110xxx are special */
if ( byte1 == 0 ) {
/* Bits out: 11000000 10000000 */
} else {
/* Single byte */
}
/* Beginning of 4byte encoding, turn into 2 3byte encodings */
/* Bits in: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
/* Reconstruct full 21bit value */
/* Bits out: 11101101 1010xxxx 10xxxxxx */
/* Bits out: 11101101 1011xxxx 10xxxxxx */
}
}
UTF_ASSERT(i==length);
UTF_ASSERT(j==newLength);
}
/* Given a Modified UTF-8 string, calculate the Standard UTF-8 length.
* Basic validation of the UTF encoding rules is done, and length is
* returned (no change) when errors are detected.
*
* Note: No validation is made that this is indeed Modified UTF-8 coming in.
*
*/
int JNICALL
{
int newLength;
int i;
newLength = 0;
for ( i = 0 ; i < length ; i++ ) {
newLength++;
/* Check encoding of following bytes */
break; /* Error condition */
}
} else {
newLength++; /* We will turn 0xC080 into 0 */
}
/* Check encoding of following bytes */
break; /* Error condition */
}
newLength += 3;
/* Possible process a second 3byte encoding */
/* See if this is a pair of 3byte encodings */
/* Check encoding of 3rd byte */
break; /* Error condition */
}
newLength++; /* New string will have 4byte encoding */
i += 3; /* Skip next 3 bytes */
}
}
} else {
break; /* Error condition */
}
}
if ( i != length ) {
/* Error in UTF encoding */
/* FIXUP: ERROR_MESSAGE()? */
return length;
}
return newLength;
}
/* Convert a Modified UTF-8 string into a Standard UTF-8 string
* It is assumed that this string has been validated in terms of the
* basic UTF encoding rules by utf8Length() above.
*
* Note: No validation is made that this is indeed Modified UTF-8 coming in.
*
*/
void JNICALL
{
int i;
int j;
j = 0;
for ( i = 0 ; i < length ; i++ ) {
/* Single byte */
} else {
newString[j++] = 0;
}
/* See if this is a pair of 3byte encodings */
unsigned u21;
/* Bits in: 11101101 1010xxxx 10xxxxxx */
/* Bits in: 11101101 1011xxxx 10xxxxxx */
i += 3;
/* Reconstruct 21 bit code */
/* Bits out: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
/* Convert to 4byte encoding */
continue;
}
}
/* Normal 3byte encoding */
}
}
UTF_ASSERT(i==length);
UTF_ASSERT(j==newLength);
newString[j] = 0;
}
/* ================================================================= */
#if 1 /* Test program */
/*
* Convert any byte array into a printable string.
* Returns length or -1 if output overflows.
*/
static int
{
int outputLen;
int i;
UTF_ASSERT(len>=0);
UTF_ASSERT(outputMaxLen>=0);
outputLen = 0;
for ( i=0; i<len ; i++ ) {
unsigned byte;
if ( outputLen >= outputMaxLen ) {
return -1;
}
} else {
outputLen += 4;
}
}
return outputLen;
}
static void
test(void)
{
static char *strings[] = {
"characters",
"abcdefghijklmnopqrstuvwxyz",
"0123456789",
"!@#$%^&*()_+=-{}[]:;",
NULL };
int i;
i = 0;
char *str;
int len1;
int len2;
int len3;
i++;
}
}
int
{
test();
return 0;
}
#endif