ByteToCharUTF8.java revision 2116
0N/A/*
0N/A * Copyright 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
0N/A * published by the Free Software Foundation. Sun designates this
0N/A * particular file as subject to the "Classpath" exception as provided
0N/A * by Sun in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
0N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0N/A * CA 95054 USA or visit www.sun.com if you need additional information or
0N/A * have any questions.
0N/A */
0N/Apackage sun.io;
0N/A
0N/A
0N/A/**
0N/A * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
0N/A *
0N/A * see CharToByteUTF8.java about UTF-8 format
0N/A */
0N/A
0N/Apublic class ByteToCharUTF8 extends ByteToCharConverter {
0N/A
0N/A private int savedSize;
0N/A private byte[] savedBytes;
0N/A
0N/A public ByteToCharUTF8() {
0N/A super();
0N/A savedSize = 0;
0N/A savedBytes = new byte[5];
0N/A }
0N/A
0N/A public int flush(char[] output, int outStart, int outEnd)
0N/A throws MalformedInputException
0N/A {
0N/A if (savedSize != 0) {
0N/A savedSize = 0;
0N/A badInputLength = 0;
0N/A throw new MalformedInputException();
0N/A }
0N/A byteOff = charOff = 0;
0N/A return 0;
0N/A }
0N/A
0N/A /**
0N/A * Character converson
0N/A */
0N/A public int convert(byte[] input, int inOff, int inEnd,
0N/A char[] output, int outOff, int outEnd)
0N/A throws MalformedInputException, ConversionBufferFullException
0N/A {
0N/A int byte1, byte2, byte3, byte4;
0N/A char[] outputChar = new char[2];
0N/A int outputSize;
0N/A int byteOffAdjustment = 0;
0N/A
0N/A if (savedSize != 0) {
0N/A byte[] newBuf;
0N/A newBuf = new byte[inEnd - inOff + savedSize];
0N/A for (int i = 0; i < savedSize; i++) {
0N/A newBuf[i] = savedBytes[i];
0N/A }
0N/A System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
0N/A input = newBuf;
0N/A inOff = 0;
0N/A inEnd = newBuf.length;
0N/A byteOffAdjustment = -savedSize;
0N/A savedSize = 0;
0N/A }
0N/A
0N/A charOff = outOff;
0N/A byteOff = inOff;
0N/A int startByteOff;
0N/A
0N/A while(byteOff < inEnd) {
0N/A
0N/A startByteOff = byteOff;
0N/A byte1 = input[byteOff++] & 0xff;
0N/A
0N/A if ((byte1 & 0x80) == 0){
0N/A outputChar[0] = (char)byte1;
0N/A outputSize = 1;
0N/A } else if ((byte1 & 0xe0) == 0xc0) {
0N/A if (byteOff >= inEnd) {
0N/A savedSize = 1;
0N/A savedBytes[0] = (byte)byte1;
0N/A break;
0N/A }
0N/A byte2 = input[byteOff++] & 0xff;
0N/A if ((byte2 & 0xc0) != 0x80) {
0N/A badInputLength = 2;
0N/A byteOff += byteOffAdjustment;
0N/A throw new MalformedInputException();
0N/A }
0N/A outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
0N/A outputSize = 1;
0N/A } else if ((byte1 & 0xf0) == 0xe0){
0N/A if (byteOff + 1 >= inEnd) {
0N/A savedBytes[0] = (byte)byte1;
0N/A if (byteOff >= inEnd) {
0N/A savedSize = 1;
0N/A } else {
0N/A savedSize = 2;
2116N/A savedBytes[1] = input[byteOff++];
0N/A }
0N/A break;
0N/A }
0N/A byte2 = input[byteOff++] & 0xff;
0N/A byte3 = input[byteOff++] & 0xff;
0N/A if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
0N/A badInputLength = 3;
0N/A byteOff += byteOffAdjustment;
0N/A throw new MalformedInputException();
0N/A }
0N/A outputChar[0] = (char)(((byte1 & 0x0f) << 12)
0N/A | ((byte2 & 0x3f) << 6)
0N/A | (byte3 & 0x3f));
0N/A outputSize = 1;
0N/A } else if ((byte1 & 0xf8) == 0xf0) {
0N/A if (byteOff + 2 >= inEnd) {
0N/A savedBytes[0] = (byte)byte1;
0N/A if (byteOff >= inEnd) {
0N/A savedSize = 1;
0N/A } else if (byteOff + 1 >= inEnd) {
0N/A savedSize = 2;
2116N/A savedBytes[1] = input[byteOff++];
0N/A } else {
0N/A savedSize = 3;
2116N/A savedBytes[1] = input[byteOff++];
2116N/A savedBytes[2] = input[byteOff++];
0N/A }
0N/A break;
0N/A }
0N/A byte2 = input[byteOff++] & 0xff;
0N/A byte3 = input[byteOff++] & 0xff;
0N/A byte4 = input[byteOff++] & 0xff;
0N/A if ((byte2 & 0xc0) != 0x80 ||
0N/A (byte3 & 0xc0) != 0x80 ||
0N/A (byte4 & 0xc0) != 0x80) {
0N/A badInputLength = 4;
0N/A byteOff += byteOffAdjustment;
0N/A throw new MalformedInputException();
0N/A }
0N/A // this byte sequence is UTF16 character
2116N/A int ucs4 = (0x07 & byte1) << 18 |
2116N/A (0x3f & byte2) << 12 |
2116N/A (0x3f & byte3) << 6 |
2116N/A (0x3f & byte4);
0N/A outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
0N/A outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
0N/A outputSize = 2;
0N/A } else {
0N/A badInputLength = 1;
0N/A byteOff += byteOffAdjustment;
0N/A throw new MalformedInputException();
0N/A }
0N/A
0N/A if (charOff + outputSize > outEnd) {
0N/A byteOff = startByteOff;
0N/A byteOff += byteOffAdjustment;
0N/A throw new ConversionBufferFullException();
0N/A }
0N/A
0N/A for (int i = 0; i < outputSize; i++) {
0N/A output[charOff + i] = outputChar[i];
0N/A }
0N/A charOff += outputSize;
0N/A }
0N/A
0N/A byteOff += byteOffAdjustment;
0N/A return charOff - outOff;
0N/A }
0N/A
0N/A /*
0N/A * Return the character set id
0N/A */
0N/A public String getCharacterEncoding() {
0N/A return "UTF8";
0N/A }
0N/A
0N/A /*
0N/A * Reset after finding bad input
0N/A */
0N/A public void reset() {
0N/A byteOff = charOff = 0;
0N/A savedSize = 0;
0N/A }
0N/A}