/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/* Legal UTF-8 Byte Sequences
*
* 1 7 0xxxxxxx
* U+0000..U+007F 00..7F
*
* 2 11 110xxxxx 10xxxxxx
* U+0080..U+07FF C2..DF 80..BF
*
* 3 16 1110xxxx 10xxxxxx 10xxxxxx
* U+0800..U+0FFF E0 A0..BF 80..BF
* U+1000..U+FFFF E1..EF 80..BF 80..BF
*
* 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
* U+100000..U10FFFF F4 80..8F 80..BF 80..BF
*
*/
{
public UTF_8() {
}
return "UTF8";
}
return new Decoder(this);
}
return new Encoder(this);
}
}
implements ArrayDecoder {
}
private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80;
}
// [C2..DF] [80..BF]
}
// [E0] [A0..BF] [80..BF]
// [E1..EF] [80..BF] [80..BF]
}
// [F0] [90..BF] [80..BF] [80..BF]
// [F1..F3] [80..BF] [80..BF] [80..BF]
// [F4] [80..8F] [80..BF] [80..BF]
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
// will be checked by Character.isSupplementaryCodePoint(uc)
}
{
for (int i = 1; i < n; i++) {
return CoderResult.malformedForLength(i);
}
return CoderResult.malformedForLength(n);
}
switch (nb) {
case 1:
// 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
return CoderResult.UNDERFLOW;
}
// 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
return CoderResult.UNDERFLOW;
}
case 2: // always 1
case 3:
return CoderResult.malformedForLength(
case 4: // we don't care the speed here
if (b1 > 0xf4 ||
default:
assert false;
return null;
}
}
int nb)
{
return cr;
}
{
return cr;
}
}
return cr;
}
{
// This method is optimized for ASCII input.
// ASCII only loop
if (b1 >= 0) {
// 1 byte, 7 bits: 0xxxxxxx
sp++;
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
^
(((byte) 0xC0 << 6) ^
((byte) 0x80 << 0)));
sp += 2;
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
((b1 << 12) ^
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
sp += 3;
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
(b2 << 12) ^
(b3 << 6) ^
(b4 ^
(((byte) 0xF0 << 18) ^
((byte) 0x80 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
// shortest form check
}
sp += 4;
} else
}
}
{
if (b1 >= 0) {
// 1 byte, 7 bits: 0xxxxxxx
mark++;
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
^
(((byte) 0xC0 << 6) ^
((byte) 0x80 << 0))));
mark += 2;
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
((b1 << 12) ^
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0)))));
mark += 3;
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
(b2 << 12) ^
(b3 << 6) ^
(b4 ^
(((byte) 0xF0 << 18) ^
((byte) 0x80 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
// shortest form check
}
mark += 4;
} else {
}
}
}
{
else
}
{
return bb;
}
// returns -1 if there is malformed byte(s) and the
// "action" for malformed input is not REPLACE.
int dp = 0;
// ASCII only optimized loop
if (b1 >= 0) {
// 1 byte, 7 bits: 0xxxxxxx
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
return -1;
sp--; // malformedN(bb, 2) always returns 1
} else {
(((byte) 0xC0 << 6) ^
((byte) 0x80 << 0)));
}
continue;
}
return -1;
return dp;
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
return -1;
sp -=3;
} else {
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
}
continue;
}
return -1;
return dp;
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
(b2 << 12) ^
(b3 << 6) ^
(b4 ^
(((byte) 0xF0 << 18) ^
((byte) 0x80 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
// shortest form check
return -1;
sp -= 4;
} else {
}
continue;
}
return -1;
return dp;
} else {
return -1;
sp--;
// leading byte for 5 or 6-byte, but don't have enough
// bytes in buffer to check. Consumed rest as malformed.
return dp;
}
}
}
return dp;
}
}
implements ArrayEncoder {
}
public boolean canEncode(char c) {
return !Character.isSurrogate(c);
}
super.isLegalReplacement(repl));
}
return CoderResult.OVERFLOW;
}
return CoderResult.OVERFLOW;
}
{
// ASCII only loop
if (c < 0x80) {
// Have at most seven bits
} else if (c < 0x800) {
// 2 bytes, 11 bits
} else if (Character.isSurrogate(c)) {
// Have a surrogate pair
if (uc < 0) {
}
sp++; // 2 chars
} else {
// 3 bytes, 16 bits
}
sp++;
}
return CoderResult.UNDERFLOW;
}
{
while (src.hasRemaining()) {
if (c < 0x80) {
// Have at most seven bits
if (!dst.hasRemaining())
} else if (c < 0x800) {
// 2 bytes, 11 bits
} else if (Character.isSurrogate(c)) {
// Have a surrogate pair
if (uc < 0) {
}
mark++; // 2 chars
} else {
// 3 bytes, 16 bits
}
mark++;
}
return CoderResult.UNDERFLOW;
}
{
else
}
// returns -1 if there is malformed char(s) and the
// "action" for malformed input is not REPLACE.
int dp = 0;
// ASCII only optimized loop
if (c < 0x80) {
// Have at most seven bits
} else if (c < 0x800) {
// 2 bytes, 11 bits
} else if (Character.isSurrogate(c)) {
if (uc < 0) {
return -1;
} else {
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
}
}
return dp;
}
}
}