0N/A/*
3261N/A * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/Apackage sun.io;
0N/A
0N/Aimport java.io.UnsupportedEncodingException;
0N/Aimport sun.nio.cs.ext.JISAutoDetect;
0N/A
0N/Apublic class ByteToCharJISAutoDetect extends ByteToCharConverter {
0N/A
0N/A private final static int EUCJP_MASK = 0x01;
0N/A private final static int SJIS2B_MASK = 0x02;
0N/A private final static int SJIS1B_MASK = 0x04;
0N/A private final static int EUCJP_KANA1_MASK = 0x08;
0N/A private final static int EUCJP_KANA2_MASK = 0x10;
2472N/A private final static byte[] maskTable1 = JISAutoDetect.getByteMask1();
2472N/A private final static byte[] maskTable2 = JISAutoDetect.getByteMask2();
0N/A
0N/A private final static int SS2 = 0x8e;
0N/A private final static int SS3 = 0x8f;
0N/A
0N/A // SJISName is set to either "SJIS" or "MS932"
0N/A private String SJISName;
0N/A private String EUCJPName;
0N/A
0N/A private String convName = null;
0N/A private ByteToCharConverter detectedConv = null;
0N/A private ByteToCharConverter defaultConv = null;
0N/A
0N/A public ByteToCharJISAutoDetect() {
0N/A super();
0N/A SJISName = CharacterEncoding.getSJISName();
0N/A EUCJPName = CharacterEncoding.getEUCJPName();
0N/A defaultConv = new ByteToCharISO8859_1();
0N/A defaultConv.subChars = subChars;
0N/A defaultConv.subMode = subMode;
0N/A }
0N/A
0N/A public int flush(char [] output, int outStart, int outEnd)
0N/A throws MalformedInputException, ConversionBufferFullException
0N/A {
0N/A badInputLength = 0;
0N/A if(detectedConv != null)
0N/A return detectedConv.flush(output, outStart, outEnd);
0N/A else
0N/A return defaultConv.flush(output, outStart, outEnd);
0N/A }
0N/A
0N/A
0N/A /**
0N/A * Character conversion
0N/A */
0N/A public int convert(byte[] input, int inOff, int inEnd,
0N/A char[] output, int outOff, int outEnd)
0N/A throws UnknownCharacterException, MalformedInputException,
0N/A ConversionBufferFullException
0N/A {
0N/A int num = 0;
0N/A
0N/A charOff = outOff;
0N/A byteOff = inOff;
0N/A
0N/A try {
0N/A if (detectedConv == null) {
0N/A int euckana = 0;
0N/A int ss2count = 0;
0N/A int firstmask = 0;
0N/A int secondmask = 0;
0N/A int cnt;
0N/A boolean nonAsciiFound = false;
0N/A
0N/A for (cnt = inOff; cnt < inEnd; cnt++) {
0N/A firstmask = 0;
0N/A secondmask = 0;
0N/A int byte1 = input[cnt]&0xff;
0N/A int byte2;
0N/A
0N/A // TODO: should check valid escape sequences!
0N/A if (byte1 == 0x1b) {
0N/A convName = "ISO2022JP";
0N/A break;
0N/A }
0N/A
0N/A // Try to convert all leading ASCII characters.
0N/A if ((nonAsciiFound == false) && (byte1 < 0x80)) {
0N/A if (charOff >= outEnd)
0N/A throw new ConversionBufferFullException();
0N/A output[charOff++] = (char) byte1;
0N/A byteOff++;
0N/A num++;
0N/A continue;
0N/A }
0N/A
0N/A // We can no longer convert ASCII.
0N/A nonAsciiFound = true;
0N/A
0N/A firstmask = maskTable1[byte1];
0N/A if (byte1 == SS2)
0N/A ss2count++;
0N/A
0N/A if (firstmask != 0) {
0N/A if (cnt+1 < inEnd) {
0N/A byte2 = input[++cnt] & 0xff;
0N/A secondmask = maskTable2[byte2];
0N/A int mask = firstmask & secondmask;
0N/A if (mask == EUCJP_MASK) {
0N/A convName = EUCJPName;
0N/A break;
0N/A }
0N/A if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK)
2472N/A || (JISAutoDetect.canBeSJIS1B(firstmask) && secondmask == 0)) {
0N/A convName = SJISName;
0N/A break;
0N/A }
0N/A
0N/A // If the first byte is a SS3 and the third byte
0N/A // is not an EUC byte, it should be SJIS.
0N/A // Otherwise, we can't determine it yet, but it's
0N/A // very likely SJIS. So we don't take the EUCJP CS3
0N/A // character boundary. If we tried both
0N/A // possibilities here, it might be able to be
0N/A // determined correctly.
2472N/A if ((byte1 == SS3) && JISAutoDetect.canBeEUCJP(secondmask)) {
0N/A if (cnt+1 < inEnd) {
0N/A int nextbyte = input[cnt+1] & 0xff;
2472N/A if (! JISAutoDetect.canBeEUCJP(maskTable2[nextbyte]))
0N/A convName = SJISName;
0N/A } else
0N/A convName = SJISName;
0N/A }
2472N/A if (JISAutoDetect.canBeEUCKana(firstmask, secondmask))
0N/A euckana++;
0N/A } else {
0N/A if ((firstmask & SJIS1B_MASK) != 0) {
0N/A convName = SJISName;
0N/A break;
0N/A }
0N/A }
0N/A }
0N/A }
0N/A
0N/A if (nonAsciiFound && (convName == null)) {
0N/A if ((euckana > 1) || (ss2count > 1))
0N/A convName = EUCJPName;
0N/A else
0N/A convName = SJISName;
0N/A }
0N/A
0N/A if (convName != null) {
0N/A try {
0N/A detectedConv = ByteToCharConverter.getConverter(convName);
0N/A detectedConv.subChars = subChars;
0N/A detectedConv.subMode = subMode;
0N/A } catch (UnsupportedEncodingException e){
0N/A detectedConv = null;
0N/A convName = null;
0N/A }
0N/A }
0N/A }
0N/A } catch (ConversionBufferFullException bufferFullException) {
0N/A throw bufferFullException;
0N/A } catch (Exception e) {
0N/A // If we fail to detect the converter needed for any reason,
0N/A // use the default converter.
0N/A detectedConv = defaultConv;
0N/A }
0N/A
0N/A // If we've converted all ASCII characters, then return.
0N/A if (byteOff == inEnd) {
0N/A return num;
0N/A }
0N/A
0N/A if(detectedConv != null) {
0N/A try {
0N/A num += detectedConv.convert(input, inOff + num, inEnd,
0N/A output, outOff + num, outEnd);
0N/A } finally {
0N/A charOff = detectedConv.nextCharIndex();
0N/A byteOff = detectedConv.nextByteIndex();
0N/A badInputLength = detectedConv.badInputLength;
0N/A }
0N/A } else {
0N/A try {
0N/A num += defaultConv.convert(input, inOff + num, inEnd,
0N/A output, outOff + num, outEnd);
0N/A } finally {
0N/A charOff = defaultConv.nextCharIndex();
0N/A byteOff = defaultConv.nextByteIndex();
0N/A badInputLength = defaultConv.badInputLength;
0N/A }
0N/A }
0N/A return num;
0N/A }
0N/A
0N/A public void reset() {
0N/A if(detectedConv != null) {
0N/A detectedConv.reset();
0N/A detectedConv = null;
0N/A convName = null;
0N/A } else
0N/A defaultConv.reset();
0N/A charOff = byteOff = 0;
0N/A }
0N/A
0N/A public String getCharacterEncoding() {
0N/A return "JISAutoDetect";
0N/A }
0N/A
0N/A public String toString() {
0N/A String s = getCharacterEncoding();
0N/A if (detectedConv != null) {
0N/A s += "[" + detectedConv.getCharacterEncoding() + "]";
0N/A } else {
0N/A s += "[unknown]";
0N/A }
0N/A return s;
0N/A }
0N/A}