sun/io/CharToByteCp970.java

	CharToByteCp970.java revision 0
0N/A/*
0N/A * Copyright 1997-2003 Sun Microsystems, Inc.  All Rights Reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
0N/A * published by the Free Software Foundation.  Sun designates this
0N/A * particular file as subject to the "Classpath" exception as provided
0N/A * by Sun in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
0N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0N/A * CA 95054 USA or visit www.sun.com if you need additional information or
0N/A * have any questions.
0N/A */
0N/Apackage sun.io;
0N/A
0N/Aimport sun.nio.cs.ext.IBM970;
0N/A
0N/A/**
0N/A* @author Malcolm Ayres
0N/A*/
0N/A
0N/Apublic class CharToByteCp970 extends CharToByteConverter
0N/A{
0N/A    private static final char SBase = '\uAC00';
0N/A    private static final char LBase = '\u1100';
0N/A    private static final char VBase = '\u1161';
0N/A    private static final char TBase = '\u11A7';
0N/A    private static final int  VCount = 21;
0N/A    private static final int  TCount = 28;
0N/A    private static final byte G0 = 0;
0N/A    private static final byte G1 = 1;
0N/A    private static final byte G2 = 2;
0N/A    private static final byte G3 = 3;
0N/A    private byte   charState = G0;
0N/A    private char   l, v, t;
0N/A
0N/A    private byte[] outputByte;
0N/A
0N/A    private char highHalfZoneCode;
0N/A    private int  mask1;
0N/A    private int  mask2;
0N/A    private int  shift;
0N/A
0N/A    private short[] index1;
0N/A    private String index2;
0N/A    private String index2a;
0N/A
0N/A    private final static IBM970 nioCoder = new IBM970();
0N/A
0N/A    public CharToByteCp970() {
0N/A       super();
0N/A       highHalfZoneCode = 0;
0N/A       outputByte = new byte[2];
0N/A       mask1 = 0xFFF8;
0N/A       mask2 = 0x0007;
0N/A       shift = 3;
0N/A       index1 = nioCoder.getEncoderIndex1();
0N/A       index2 = nioCoder.getEncoderIndex2();
0N/A       index2a = nioCoder.getEncoderIndex2a();
0N/A    }
0N/A
0N/A    /**
0N/A      * flush out any residual data and reset the buffer state
0N/A      */
0N/A    public int flush(byte[] output, int outStart, int outEnd)
0N/A        throws MalformedInputException,
0N/A               ConversionBufferFullException
0N/A    {
0N/A       int bytesOut;
0N/A
0N/A       byteOff = outStart;
0N/A
0N/A       if (highHalfZoneCode != 0) {
0N/A           reset();
0N/A           badInputLength = 0;
0N/A           throw new MalformedInputException();
0N/A       }
0N/A
0N/A       if (charState != G0) {
0N/A           try {
0N/A              unicodeToBuffer(composeHangul() ,output, outEnd);
0N/A           }
0N/A           catch(UnknownCharacterException e) {
0N/A              reset();
0N/A              badInputLength = 0;
0N/A              throw new MalformedInputException();
0N/A           }
0N/A           charState = G0;
0N/A       }
0N/A
0N/A       bytesOut = byteOff - outStart;
0N/A
0N/A       reset();
0N/A       return bytesOut;
0N/A    }
0N/A
0N/A    /**
0N/A     * Resets converter to its initial state.
0N/A     */
0N/A    public void reset() {
0N/A       highHalfZoneCode = 0;
0N/A       charState = G0;
0N/A       charOff = byteOff = 0;
0N/A    }
0N/A
0N/A    /**
0N/A     * Returns true if the given character can be converted to the
0N/A     * target character encoding.
0N/A     */
0N/A    public boolean canConvert(char ch) {
0N/A       int  index;
0N/A       int  theBytes;
0N/A
0N/A       index = index1[((ch & mask1) >> shift)] + (ch & mask2);
0N/A       if (index < 15000)
0N/A         theBytes = (int)(index2.charAt(index));
0N/A       else
0N/A         theBytes = (int)(index2a.charAt(index-15000));
0N/A
0N/A       if (theBytes != 0)
0N/A          return (true);
0N/A
0N/A       // only return true if input char was unicode null - all others are
0N/A       //    undefined
0N/A       return( ch == '\u0000');
0N/A    }
0N/A
0N/A    /**
0N/A     * Character conversion
0N/A     */
0N/A
0N/A    public int convert(char[] input, int inOff, int inEnd,
0N/A                       byte[] output, int outOff, int outEnd)
0N/A        throws UnknownCharacterException, MalformedInputException,
0N/A               ConversionBufferFullException
0N/A    {
0N/A       char    inputChar;
0N/A       int     inputSize;
0N/A
0N/A       charOff = inOff;
0N/A       byteOff = outOff;
0N/A
0N/A       while (charOff < inEnd) {
0N/A
0N/A          if (highHalfZoneCode == 0) {
0N/A             inputChar = input[charOff];
0N/A             inputSize = 1;
0N/A          } else {
0N/A             inputChar = highHalfZoneCode;
0N/A             inputSize = 0;
0N/A             highHalfZoneCode = 0;
0N/A          }
0N/A
0N/A          switch (charState) {
0N/A          case G0:
0N/A
0N/A             l = LBase;
0N/A             v = VBase;
0N/A             t = TBase;
0N/A
0N/A             if ( isLeadingC(inputChar) ) {     // Leading Consonant
0N/A                l = inputChar;
0N/A                charState = G1;
0N/A                break;
0N/A             }
0N/A
0N/A             if ( isVowel(inputChar) ) {        // Vowel
0N/A                v = inputChar;
0N/A                charState = G2;
0N/A                break;
0N/A             }
0N/A
0N/A             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
0N/A                t = inputChar;
0N/A                charState = G3;
0N/A                break;
0N/A             }
0N/A
0N/A             break;
0N/A
0N/A          case G1:
0N/A             if ( isLeadingC(inputChar) ) {     // Leading Consonant
0N/A                l = composeLL(l, inputChar);
0N/A                break;
0N/A             }
0N/A
0N/A             if ( isVowel(inputChar) ) {        // Vowel
0N/A                v = inputChar;
0N/A                charState = G2;
0N/A                break;
0N/A             }
0N/A
0N/A             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
0N/A                t = inputChar;
0N/A                charState = G3;
0N/A                break;
0N/A             }
0N/A
0N/A             unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A             charState = G0;
0N/A             break;
0N/A
0N/A          case G2:
0N/A             if ( isLeadingC(inputChar) ) {     // Leading Consonant
0N/A
0N/A                unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A                l = inputChar;
0N/A                v = VBase;
0N/A                t = TBase;
0N/A                charState = G1;
0N/A                break;
0N/A             }
0N/A
0N/A             if ( isVowel(inputChar) ) {        // Vowel
0N/A                v = composeVV(l, inputChar);
0N/A                charState = G2;
0N/A                break;
0N/A             }
0N/A
0N/A             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
0N/A                t = inputChar;
0N/A                charState = G3;
0N/A                break;
0N/A             }
0N/A
0N/A             unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A             charState = G0;
0N/A
0N/A             break;
0N/A
0N/A          case G3:
0N/A             if ( isTrailingC(inputChar) ) {    // Trailing Consonant
0N/A                t = composeTT(t, inputChar);
0N/A                charState = G3;
0N/A                break;
0N/A             }
0N/A
0N/A             unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A             charState = G0;
0N/A
0N/A             break;
0N/A          }
0N/A
0N/A          if (charState != G0)
0N/A             charOff++;
0N/A          else {
0N/A
0N/A             // Is this a high surrogate?
0N/A             if(inputChar >= '\ud800' && inputChar <= '\udbff') {
0N/A                // Is this the last character of the input?
0N/A                if (charOff + inputSize >= inEnd) {
0N/A                   highHalfZoneCode = inputChar;
0N/A                   charOff += inputSize;
0N/A                   break;
0N/A                }
0N/A
0N/A                // Is there a low surrogate following?
0N/A                inputChar = input[charOff + inputSize];
0N/A                if (inputChar >= '\udc00' && inputChar <= '\udfff') {
0N/A                   // We have a valid surrogate pair.  Too bad we don't do
0N/A                   // surrogates.  Is substitution enabled?
0N/A                   if (subMode) {
0N/A                      if (subBytes.length == 1) {
0N/A                         outputByte[0] = 0x00;
0N/A                         outputByte[1] = subBytes[0];
0N/A                      } else {
0N/A                         outputByte[0] = subBytes[0];
0N/A                         outputByte[1] = subBytes[1];
0N/A                      }
0N/A
0N/A                      bytesToBuffer(outputByte, output, outEnd);
0N/A                      inputSize++;
0N/A                   } else {
0N/A                      badInputLength = 2;
0N/A                      throw new UnknownCharacterException();
0N/A                   }
0N/A                } else {
0N/A                   // We have a malformed surrogate pair
0N/A                   badInputLength = 1;
0N/A                   throw new MalformedInputException();
0N/A                }
0N/A             }
0N/A
0N/A               // Is this an unaccompanied low surrogate?
0N/A             else
0N/A                if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
0N/A                   badInputLength = 1;
0N/A                   throw new MalformedInputException();
0N/A                } else {
0N/A                   unicodeToBuffer(inputChar, output, outEnd);
0N/A                }
0N/A
0N/A             charOff += inputSize;
0N/A
0N/A          }
0N/A
0N/A       }
0N/A
0N/A       return byteOff - outOff;
0N/A
0N/A    }
0N/A
0N/A    private char composeHangul() {
0N/A       int lIndex, vIndex, tIndex;
0N/A
0N/A       lIndex = l - LBase;
0N/A       vIndex = v - VBase;
0N/A       tIndex = t - TBase;
0N/A
0N/A       return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
0N/A    }
0N/A
0N/A    private char composeLL(char l1, char l2) {
0N/A       return l2;
0N/A    }
0N/A
0N/A    private char composeVV(char v1, char v2) {
0N/A       return v2;
0N/A    }
0N/A
0N/A    private char composeTT(char t1, char t2) {
0N/A       return t2;
0N/A    }
0N/A
0N/A    private boolean isLeadingC(char c) {
0N/A       return (c >= LBase && c <= '\u1159');
0N/A    }
0N/A
0N/A    private boolean isVowel(char c) {
0N/A       return (c >= VBase && c <= '\u11a2');
0N/A    }
0N/A
0N/A    private boolean isTrailingC(char c) {
0N/A       return (c >= TBase && c <= '\u11f9');
0N/A    }
0N/A
0N/A    /**
0N/A     * returns the maximum number of bytes needed to convert a char
0N/A     */
0N/A    public int getMaxBytesPerChar() {
0N/A       return 2;
0N/A    }
0N/A
0N/A
0N/A    /**
0N/A     * Return the character set ID
0N/A     */
0N/A    public String getCharacterEncoding() {
0N/A       return "Cp970";
0N/A    }
0N/A
0N/A    /**
0N/A     * private function to add the bytes to the output buffer
0N/A     */
0N/A    private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
0N/A        throws ConversionBufferFullException,
0N/A               UnknownCharacterException {
0N/A
0N/A       int spaceNeeded;
0N/A
0N/A       // ensure sufficient space for the bytes(s)
0N/A
0N/A       if (theBytes[0] == 0x00)
0N/A          spaceNeeded = 1;
0N/A       else
0N/A          spaceNeeded = 2;
0N/A
0N/A       if (byteOff + spaceNeeded > outEnd)
0N/A          throw new ConversionBufferFullException();
0N/A
0N/A       // move the data into the buffer
0N/A
0N/A       if (spaceNeeded == 1)
0N/A          output[byteOff++] = theBytes[1];
0N/A       else {
0N/A          output[byteOff++] = theBytes[0];
0N/A          output[byteOff++] = theBytes[1];
0N/A       }
0N/A
0N/A    }
0N/A
0N/A    /**
0N/A     * private function to add a unicode character to the output buffer
0N/A     */
0N/A    private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
0N/A        throws ConversionBufferFullException,
0N/A               UnknownCharacterException {
0N/A
0N/A       int index;
0N/A       int theBytes;
0N/A
0N/A       // first we convert the unicode to its byte representation
0N/A
0N/A       index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
0N/A       if (index < 15000) {
0N/A         theBytes = (int)(index2.charAt(index));
0N/A       } else {
0N/A         theBytes = (int)(index2a.charAt(index-15000));
0N/A       }
0N/A       outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
0N/A       outputByte[1] = (byte)(theBytes & 0x000000ff);
0N/A
0N/A       // if the unicode was not mappable - look for the substitution bytes
0N/A
0N/A       if (outputByte[0] == 0x00 && outputByte[1] == 0x00
0N/A                          && unicode != '\u0000') {
0N/A          if (subMode) {
0N/A             if (subBytes.length == 1) {
0N/A                outputByte[0] = 0x00;
0N/A                outputByte[1] = subBytes[0];
0N/A             } else {
0N/A                outputByte[0] = subBytes[0];
0N/A                outputByte[1] = subBytes[1];
0N/A             }
0N/A          } else {
0N/A             badInputLength = 1;
0N/A             throw new UnknownCharacterException();
0N/A          }
0N/A       }
0N/A
0N/A       // now put the bytes in the buffer
0N/A
0N/A       bytesToBuffer(outputByte, output, outEnd);
0N/A
0N/A    }
0N/A
0N/A}