utf8.cpp revision 2273
2273N/A * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 0N/A * published by the Free Software Foundation. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1472N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 0N/A// Assume the utf8 string is in legal form and has been 0N/A unsigned const char *
ptr = (
const unsigned char *)
str;
0N/A case 0x8:
case 0x9:
case 0xA:
case 0xB:
case 0xF:
0N/A /* Shouldn't happen. */ 0N/A /* 110xxxxx 10xxxxxx */ 0N/A /* 1110xxxx 10xxxxxx 10xxxxxx */ 0N/A }
/* end of switch */ 0N/A return (
char*)(
ptr +
1);
// make progress somehow 0N/A // The assert is correct but the .class file is wrong 0N/A // assert(UNICODE::utf8_size(result) == length, "checking reverse computation"); 0N/A unsigned const char *
ptr = (
const unsigned char *)
str;
0N/A /* See if it's legal supplementary character: 0N/A 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */ 0N/A// Count bytes of the form 10xxxxxx and deduct this count 0N/A// from the total byte count. The utf8 string must be in 0N/A// legal form which has been verified in the format checker. 0N/A for (
int i = 0; i <
len; i++) {
0N/A if ((
str[i] &
0xC0) ==
0x80) {
0N/A// Count bytes of the utf8 string except those in form 0N/A// 10xxxxxx which only appear in multibyte characters. 0N/A// The utf8 string must be in legal form and has been 0N/A// verified in the format checker. 0N/A for (
const char* p =
str; *p; p++) {
0N/A if (((*p) &
0xC0) !=
0x80) {
0N/A// Writes a jchar a utf8 and returns the end 0N/A /* 11 bits or less. */ 0N/A /* possibly full 16 bits. */ 0N/A /* ASCII case loop optimization */ 0N/A// Returns NULL if 'c' it not found. This only works as long 0N/A// as 'c' is an ASCII character 0N/A assert(c >= 0,
"does not work for non-ASCII characters");
0N/A // Skip backwards in string until 'c' is found or end is reached 0N/A // Length must be the same 0N/A return ((
str[0] &
0xFF) ==
0xED) && ((
str[
1] &
0xF0) ==
0xA0) && ((
str[
2] &
0xC0) ==
0x80)
0N/A && ((
str[
3] &
0xFF) ==
0xED) && ((
str[
4] &
0xF0) ==
0xB0) && ((
str[
5] &
0xC0) ==
0x80);
0N/A return 0x10000 + ((
str[
1] &
0x0f) <<
16) + ((
str[
2] &
0x3f) <<
10)
0N/A//------------------------------------------------------------------------------------- 0N/A if ((
0x0001 <= c) && (c <=
0x007F))
return 1;
0N/A if (c <=
0x07FF)
return 2;
0N/A if ((
0x0001 <= c) && (c <=
0x007F))
result +=
1;