CharToByteCp970.java revision 0
0N/A/*
0N/A * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
0N/A * published by the Free Software Foundation. Sun designates this
0N/A * particular file as subject to the "Classpath" exception as provided
0N/A * by Sun in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
0N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0N/A * CA 95054 USA or visit www.sun.com if you need additional information or
0N/A * have any questions.
0N/A */
0N/Apackage sun.io;
0N/A
0N/Aimport sun.nio.cs.ext.IBM970;
0N/A
0N/A/**
0N/A* @author Malcolm Ayres
0N/A*/
0N/A
0N/Apublic class CharToByteCp970 extends CharToByteConverter
0N/A{
0N/A private static final char SBase = '\uAC00';
0N/A private static final char LBase = '\u1100';
0N/A private static final char VBase = '\u1161';
0N/A private static final char TBase = '\u11A7';
0N/A private static final int VCount = 21;
0N/A private static final int TCount = 28;
0N/A private static final byte G0 = 0;
0N/A private static final byte G1 = 1;
0N/A private static final byte G2 = 2;
0N/A private static final byte G3 = 3;
0N/A private byte charState = G0;
0N/A private char l, v, t;
0N/A
0N/A private byte[] outputByte;
0N/A
0N/A private char highHalfZoneCode;
0N/A private int mask1;
0N/A private int mask2;
0N/A private int shift;
0N/A
0N/A private short[] index1;
0N/A private String index2;
0N/A private String index2a;
0N/A
0N/A private final static IBM970 nioCoder = new IBM970();
0N/A
0N/A public CharToByteCp970() {
0N/A super();
0N/A highHalfZoneCode = 0;
0N/A outputByte = new byte[2];
0N/A mask1 = 0xFFF8;
0N/A mask2 = 0x0007;
0N/A shift = 3;
0N/A index1 = nioCoder.getEncoderIndex1();
0N/A index2 = nioCoder.getEncoderIndex2();
0N/A index2a = nioCoder.getEncoderIndex2a();
0N/A }
0N/A
0N/A /**
0N/A * flush out any residual data and reset the buffer state
0N/A */
0N/A public int flush(byte[] output, int outStart, int outEnd)
0N/A throws MalformedInputException,
0N/A ConversionBufferFullException
0N/A {
0N/A int bytesOut;
0N/A
0N/A byteOff = outStart;
0N/A
0N/A if (highHalfZoneCode != 0) {
0N/A reset();
0N/A badInputLength = 0;
0N/A throw new MalformedInputException();
0N/A }
0N/A
0N/A if (charState != G0) {
0N/A try {
0N/A unicodeToBuffer(composeHangul() ,output, outEnd);
0N/A }
0N/A catch(UnknownCharacterException e) {
0N/A reset();
0N/A badInputLength = 0;
0N/A throw new MalformedInputException();
0N/A }
0N/A charState = G0;
0N/A }
0N/A
0N/A bytesOut = byteOff - outStart;
0N/A
0N/A reset();
0N/A return bytesOut;
0N/A }
0N/A
0N/A /**
0N/A * Resets converter to its initial state.
0N/A */
0N/A public void reset() {
0N/A highHalfZoneCode = 0;
0N/A charState = G0;
0N/A charOff = byteOff = 0;
0N/A }
0N/A
0N/A /**
0N/A * Returns true if the given character can be converted to the
0N/A * target character encoding.
0N/A */
0N/A public boolean canConvert(char ch) {
0N/A int index;
0N/A int theBytes;
0N/A
0N/A index = index1[((ch & mask1) >> shift)] + (ch & mask2);
0N/A if (index < 15000)
0N/A theBytes = (int)(index2.charAt(index));
0N/A else
0N/A theBytes = (int)(index2a.charAt(index-15000));
0N/A
0N/A if (theBytes != 0)
0N/A return (true);
0N/A
0N/A // only return true if input char was unicode null - all others are
0N/A // undefined
0N/A return( ch == '\u0000');
0N/A }
0N/A
0N/A /**
0N/A * Character conversion
0N/A */
0N/A
0N/A public int convert(char[] input, int inOff, int inEnd,
0N/A byte[] output, int outOff, int outEnd)
0N/A throws UnknownCharacterException, MalformedInputException,
0N/A ConversionBufferFullException
0N/A {
0N/A char inputChar;
0N/A int inputSize;
0N/A
0N/A charOff = inOff;
0N/A byteOff = outOff;
0N/A
0N/A while (charOff < inEnd) {
0N/A
0N/A if (highHalfZoneCode == 0) {
0N/A inputChar = input[charOff];
0N/A inputSize = 1;
0N/A } else {
0N/A inputChar = highHalfZoneCode;
0N/A inputSize = 0;
0N/A highHalfZoneCode = 0;
0N/A }
0N/A
0N/A switch (charState) {
0N/A case G0:
0N/A
0N/A l = LBase;
0N/A v = VBase;
0N/A t = TBase;
0N/A
0N/A if ( isLeadingC(inputChar) ) { // Leading Consonant
0N/A l = inputChar;
0N/A charState = G1;
0N/A break;
0N/A }
0N/A
0N/A if ( isVowel(inputChar) ) { // Vowel
0N/A v = inputChar;
0N/A charState = G2;
0N/A break;
0N/A }
0N/A
0N/A if ( isTrailingC(inputChar) ) { // Trailing Consonant
0N/A t = inputChar;
0N/A charState = G3;
0N/A break;
0N/A }
0N/A
0N/A break;
0N/A
0N/A case G1:
0N/A if ( isLeadingC(inputChar) ) { // Leading Consonant
0N/A l = composeLL(l, inputChar);
0N/A break;
0N/A }
0N/A
0N/A if ( isVowel(inputChar) ) { // Vowel
0N/A v = inputChar;
0N/A charState = G2;
0N/A break;
0N/A }
0N/A
0N/A if ( isTrailingC(inputChar) ) { // Trailing Consonant
0N/A t = inputChar;
0N/A charState = G3;
0N/A break;
0N/A }
0N/A
0N/A unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A charState = G0;
0N/A break;
0N/A
0N/A case G2:
0N/A if ( isLeadingC(inputChar) ) { // Leading Consonant
0N/A
0N/A unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A l = inputChar;
0N/A v = VBase;
0N/A t = TBase;
0N/A charState = G1;
0N/A break;
0N/A }
0N/A
0N/A if ( isVowel(inputChar) ) { // Vowel
0N/A v = composeVV(l, inputChar);
0N/A charState = G2;
0N/A break;
0N/A }
0N/A
0N/A if ( isTrailingC(inputChar) ) { // Trailing Consonant
0N/A t = inputChar;
0N/A charState = G3;
0N/A break;
0N/A }
0N/A
0N/A unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A charState = G0;
0N/A
0N/A break;
0N/A
0N/A case G3:
0N/A if ( isTrailingC(inputChar) ) { // Trailing Consonant
0N/A t = composeTT(t, inputChar);
0N/A charState = G3;
0N/A break;
0N/A }
0N/A
0N/A unicodeToBuffer(composeHangul(), output, outEnd);
0N/A
0N/A charState = G0;
0N/A
0N/A break;
0N/A }
0N/A
0N/A if (charState != G0)
0N/A charOff++;
0N/A else {
0N/A
0N/A // Is this a high surrogate?
0N/A if(inputChar >= '\ud800' && inputChar <= '\udbff') {
0N/A // Is this the last character of the input?
0N/A if (charOff + inputSize >= inEnd) {
0N/A highHalfZoneCode = inputChar;
0N/A charOff += inputSize;
0N/A break;
0N/A }
0N/A
0N/A // Is there a low surrogate following?
0N/A inputChar = input[charOff + inputSize];
0N/A if (inputChar >= '\udc00' && inputChar <= '\udfff') {
0N/A // We have a valid surrogate pair. Too bad we don't do
0N/A // surrogates. Is substitution enabled?
0N/A if (subMode) {
0N/A if (subBytes.length == 1) {
0N/A outputByte[0] = 0x00;
0N/A outputByte[1] = subBytes[0];
0N/A } else {
0N/A outputByte[0] = subBytes[0];
0N/A outputByte[1] = subBytes[1];
0N/A }
0N/A
0N/A bytesToBuffer(outputByte, output, outEnd);
0N/A inputSize++;
0N/A } else {
0N/A badInputLength = 2;
0N/A throw new UnknownCharacterException();
0N/A }
0N/A } else {
0N/A // We have a malformed surrogate pair
0N/A badInputLength = 1;
0N/A throw new MalformedInputException();
0N/A }
0N/A }
0N/A
0N/A // Is this an unaccompanied low surrogate?
0N/A else
0N/A if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
0N/A badInputLength = 1;
0N/A throw new MalformedInputException();
0N/A } else {
0N/A unicodeToBuffer(inputChar, output, outEnd);
0N/A }
0N/A
0N/A charOff += inputSize;
0N/A
0N/A }
0N/A
0N/A }
0N/A
0N/A return byteOff - outOff;
0N/A
0N/A }
0N/A
0N/A private char composeHangul() {
0N/A int lIndex, vIndex, tIndex;
0N/A
0N/A lIndex = l - LBase;
0N/A vIndex = v - VBase;
0N/A tIndex = t - TBase;
0N/A
0N/A return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
0N/A }
0N/A
0N/A private char composeLL(char l1, char l2) {
0N/A return l2;
0N/A }
0N/A
0N/A private char composeVV(char v1, char v2) {
0N/A return v2;
0N/A }
0N/A
0N/A private char composeTT(char t1, char t2) {
0N/A return t2;
0N/A }
0N/A
0N/A private boolean isLeadingC(char c) {
0N/A return (c >= LBase && c <= '\u1159');
0N/A }
0N/A
0N/A private boolean isVowel(char c) {
0N/A return (c >= VBase && c <= '\u11a2');
0N/A }
0N/A
0N/A private boolean isTrailingC(char c) {
0N/A return (c >= TBase && c <= '\u11f9');
0N/A }
0N/A
0N/A /**
0N/A * returns the maximum number of bytes needed to convert a char
0N/A */
0N/A public int getMaxBytesPerChar() {
0N/A return 2;
0N/A }
0N/A
0N/A
0N/A /**
0N/A * Return the character set ID
0N/A */
0N/A public String getCharacterEncoding() {
0N/A return "Cp970";
0N/A }
0N/A
0N/A /**
0N/A * private function to add the bytes to the output buffer
0N/A */
0N/A private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
0N/A throws ConversionBufferFullException,
0N/A UnknownCharacterException {
0N/A
0N/A int spaceNeeded;
0N/A
0N/A // ensure sufficient space for the bytes(s)
0N/A
0N/A if (theBytes[0] == 0x00)
0N/A spaceNeeded = 1;
0N/A else
0N/A spaceNeeded = 2;
0N/A
0N/A if (byteOff + spaceNeeded > outEnd)
0N/A throw new ConversionBufferFullException();
0N/A
0N/A // move the data into the buffer
0N/A
0N/A if (spaceNeeded == 1)
0N/A output[byteOff++] = theBytes[1];
0N/A else {
0N/A output[byteOff++] = theBytes[0];
0N/A output[byteOff++] = theBytes[1];
0N/A }
0N/A
0N/A }
0N/A
0N/A /**
0N/A * private function to add a unicode character to the output buffer
0N/A */
0N/A private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
0N/A throws ConversionBufferFullException,
0N/A UnknownCharacterException {
0N/A
0N/A int index;
0N/A int theBytes;
0N/A
0N/A // first we convert the unicode to its byte representation
0N/A
0N/A index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
0N/A if (index < 15000) {
0N/A theBytes = (int)(index2.charAt(index));
0N/A } else {
0N/A theBytes = (int)(index2a.charAt(index-15000));
0N/A }
0N/A outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
0N/A outputByte[1] = (byte)(theBytes & 0x000000ff);
0N/A
0N/A // if the unicode was not mappable - look for the substitution bytes
0N/A
0N/A if (outputByte[0] == 0x00 && outputByte[1] == 0x00
0N/A && unicode != '\u0000') {
0N/A if (subMode) {
0N/A if (subBytes.length == 1) {
0N/A outputByte[0] = 0x00;
0N/A outputByte[1] = subBytes[0];
0N/A } else {
0N/A outputByte[0] = subBytes[0];
0N/A outputByte[1] = subBytes[1];
0N/A }
0N/A } else {
0N/A badInputLength = 1;
0N/A throw new UnknownCharacterException();
0N/A }
0N/A }
0N/A
0N/A // now put the bytes in the buffer
0N/A
0N/A bytesToBuffer(outputByte, output, outEnd);
0N/A
0N/A }
0N/A
0N/A}