2362N/A * Copyright (c) 1996, 1997, Oracle and/or its affiliates. All rights reserved.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/Apackage sun.io;
0N/A * UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter
0N/A * It's represented like below.
0N/A *
0N/A * # Bits Bit pattern
0N/A * 1 7 0xxxxxxx
0N/A * 2 11 110xxxxx 10xxxxxx
0N/A * 3 16 1110xxxx 10xxxxxx 10xxxxxx
0N/A * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
0N/A * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
0N/A * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
0N/A *
0N/A * UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6
0N/A */
0N/Apublic class CharToByteUTF8 extends CharToByteConverter {
0N/A private char highHalfZoneCode;
0N/A public int flush(byte[] output, int outStart, int outEnd)
0N/A throws MalformedInputException
0N/A {
0N/A if (highHalfZoneCode != 0) {
0N/A highHalfZoneCode = 0;
0N/A badInputLength = 0;
0N/A throw new MalformedInputException();
0N/A }
0N/A byteOff = charOff = 0;
0N/A return 0;
0N/A }
0N/A /**
0N/A * Character conversion
0N/A */
0N/A public int convert(char[] input, int inOff, int inEnd,
0N/A byte[] output, int outOff, int outEnd)
0N/A throws ConversionBufferFullException, MalformedInputException
0N/A {
0N/A char inputChar;
0N/A byte[] outputByte = new byte[6];
0N/A int inputSize;
0N/A int outputSize;
0N/A charOff = inOff;
0N/A byteOff = outOff;
0N/A if (highHalfZoneCode != 0) {
0N/A inputChar = highHalfZoneCode;
0N/A highHalfZoneCode = 0;
0N/A if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
0N/A // This is legal UTF16 sequence.
0N/A int ucs4 = (highHalfZoneCode - 0xd800) * 0x400
0N/A + (input[inOff] - 0xdc00) + 0x10000;
0N/A output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
0N/A output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
0N/A output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
0N/A output[3] = (byte)(0x80 | (ucs4 & 0x3f));
0N/A charOff++;
0N/A highHalfZoneCode = 0;
0N/A } else {
0N/A // This is illegal UTF16 sequence.
0N/A badInputLength = 0;
0N/A throw new MalformedInputException();
0N/A }
0N/A }
0N/A while(charOff < inEnd) {
0N/A inputChar = input[charOff];
0N/A if (inputChar < 0x80) {
0N/A outputByte[0] = (byte)inputChar;
0N/A inputSize = 1;
0N/A outputSize = 1;
0N/A } else if (inputChar < 0x800) {
0N/A outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));
0N/A outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));
0N/A inputSize = 1;
0N/A outputSize = 2;
0N/A } else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
0N/A // this is <high-half zone code> in UTF-16
0N/A if (charOff + 1 >= inEnd) {
0N/A highHalfZoneCode = inputChar;
0N/A break;
0N/A }
0N/A // check next char is valid <low-half zone code>
0N/A char lowChar = input[charOff + 1];
0N/A if (lowChar < 0xdc00 || lowChar > 0xdfff) {
0N/A badInputLength = 1;
0N/A throw new MalformedInputException();
0N/A }
0N/A int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)
0N/A + 0x10000;
0N/A outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
0N/A outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
0N/A outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
0N/A outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f));
0N/A outputSize = 4;
0N/A inputSize = 2;
0N/A } else {
0N/A outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f);
0N/A outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));
0N/A outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));
0N/A inputSize = 1;
0N/A outputSize = 3;
0N/A }
0N/A if (byteOff + outputSize > outEnd) {
0N/A throw new ConversionBufferFullException();
0N/A }
0N/A for (int i = 0; i < outputSize; i++) {
0N/A output[byteOff++] = outputByte[i];
0N/A }
0N/A charOff += inputSize;
0N/A }
0N/A return byteOff - outOff;
0N/A }
0N/A public boolean canConvert(char ch) {
0N/A return true;
0N/A }
0N/A public int getMaxBytesPerChar() {
0N/A return 3;
0N/A }
0N/A public void reset() {
0N/A byteOff = charOff = 0;
0N/A highHalfZoneCode = 0;
0N/A }
0N/A public String getCharacterEncoding() {
0N/A return "UTF8";
0N/A }