2362N/A * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 0N/A * This code is free software; you can redistribute it and/or modify it 0N/A * under the terms of the GNU General Public License version 2 only, as 2362N/A * published by the Free Software Foundation. Oracle designates this 0N/A * particular file as subject to the "Classpath" exception as provided 2362N/A * by Oracle in the LICENSE file that accompanied this code. 0N/A * This code is distributed in the hope that it will be useful, but WITHOUT 0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 0N/A * version 2 for more details (a copy is included in the LICENSE file that 0N/A * accompanied this code). 0N/A * You should have received a copy of the GNU General Public License version 0N/A * 2 along with this work; if not, write to the Free Software Foundation, 0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 2362N/A * or visit www.oracle.com if you need additional information or have any 0N/A/* Legal UTF-8 Byte Sequences * U+0080..U+07FF C2..DF 80..BF * 3 16 1110xxxx 10xxxxxx 10xxxxxx * U+0800..U+0FFF E0 A0..BF 80..BF * U+1000..U+FFFF E1..EF 80..BF 80..BF * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF * U+100000..U10FFFF F4 80..8F 80..BF 80..BF return (b &
0xc0) !=
0x80;
return (
b1 &
0x1e) ==
0x0 || (
b2 &
0xc0) !=
0x80;
// [E0] [A0..BF] [80..BF] // [E1..EF] [80..BF] [80..BF] return (
b1 == (
byte)
0xe0 && (
b2 &
0xe0) ==
0x80) ||
(
b2 &
0xc0) !=
0x80 || (
b3 &
0xc0) !=
0x80;
// [F0] [90..BF] [80..BF] [80..BF] // [F1..F3] [80..BF] [80..BF] [80..BF] // [F4] [80..8F] [80..BF] [80..BF] // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] // will be checked by Character.isSupplementaryCodePoint(uc) return (
b2 &
0xc0) !=
0x80 || (
b3 &
0xc0) !=
0x80 ||
for (
int i =
1; i < n; i++) {
// 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx int b2 =
src.
get();
// no need to lookup b3 ((
b1 == (
byte)
0xe0 && (
b2 &
0xe0) ==
0x80) ||
case 4:
// we don't care the speed here (
b1 ==
0xf0 && (
b2 <
0x90 ||
b2 >
0xbf)) ||
(
b1 ==
0xf4 && (
b2 &
0xf0) !=
0x80) ||
// This method is optimized for ASCII input. // 1 byte, 7 bits: 0xxxxxxx }
else if ((
b1 >>
5) == -
2) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx }
else if ((
b1 >>
4) == -
2) {
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx }
else if ((
b1 >>
3) == -
2) {
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // 1 byte, 7 bits: 0xxxxxxx }
else if ((
b1 >>
5) == -
2) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx }
else if ((
b1 >>
4) == -
2) {
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx }
else if ((
b1 >>
3) == -
2) {
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // returns -1 if there is malformed byte(s) and the // "action" for malformed input is not REPLACE. // ASCII only optimized loop // 1 byte, 7 bits: 0xxxxxxx }
else if ((
b1 >>
5) == -
2) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx sp--;
// malformedN(bb, 2) always returns 1 }
else if ((
b1 >>
4) == -
2) {
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx da[
dp++] = (
char)((
b1 <<
12) ^
}
else if ((
b1 >>
3) == -
2) {
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // leading byte for 5 or 6-byte, but don't have enough // bytes in buffer to check. Consumed rest as malformed. // Have at most seven bits da[
dp++] = (
byte)(
0xc0 | (c >>
6));
da[
dp++] = (
byte)(
0x80 | (c &
0x3f));
da[
dp++] = (
byte)(
0xf0 | ((
uc >>
18)));
da[
dp++] = (
byte)(
0x80 | ((
uc >>
12) &
0x3f));
da[
dp++] = (
byte)(
0x80 | ((
uc >>
6) &
0x3f));
da[
dp++] = (
byte)(
0x80 | (
uc &
0x3f));
da[
dp++] = (
byte)(
0xe0 | ((c >>
12)));
da[
dp++] = (
byte)(
0x80 | ((c >>
6) &
0x3f));
da[
dp++] = (
byte)(
0x80 | (c &
0x3f));
// Have at most seven bits dst.
put((
byte)(
0xc0 | (c >>
6)));
dst.
put((
byte)(
0x80 | (c &
0x3f)));
dst.
put((
byte)(
0x80 | ((
uc >>
12) &
0x3f)));
dst.
put((
byte)(
0x80 | ((
uc >>
6) &
0x3f)));
dst.
put((
byte)(
0xe0 | ((c >>
12))));
dst.
put((
byte)(
0x80 | ((c >>
6) &
0x3f)));
dst.
put((
byte)(
0x80 | (c &
0x3f)));
// returns -1 if there is malformed char(s) and the // "action" for malformed input is not REPLACE. // ASCII only optimized loop // Have at most seven bits da[
dp++] = (
byte)(
0xc0 | (c >>
6));
da[
dp++] = (
byte)(
0x80 | (c &
0x3f));
da[
dp++] = (
byte)(
0xf0 | ((
uc >>
18)));
da[
dp++] = (
byte)(
0x80 | ((
uc >>
12) &
0x3f));
da[
dp++] = (
byte)(
0x80 | ((
uc >>
6) &
0x3f));
da[
dp++] = (
byte)(
0x80 | (
uc &
0x3f));
da[
dp++] = (
byte)(
0xe0 | ((c >>
12)));
da[
dp++] = (
byte)(
0x80 | ((c >>
6) &
0x3f));
da[
dp++] = (
byte)(
0x80 | (c &
0x3f));