2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A/*
2N/A * Copyright (c) 2001 by Sun Microsystems, Inc.
2N/A * All rights reserved.
2N/A *
2N/A */
2N/A
2N/A// IANACharCode.java: SLPv1 Character encoding support
2N/A// Author: James Kempf
2N/A// Created On: Fri Sep 11 13:24:02 1998
2N/A// Last Modified By: James Kempf
2N/A// Last Modified On: Wed Oct 28 14:33:02 1998
2N/A// Update Count: 7
2N/A//
2N/A
2N/A
2N/Apackage com.sun.slp;
2N/A
2N/Aimport java.util.*;
2N/Aimport java.io.*;
2N/A
2N/A/**
2N/A * The IANACharCode class supports static methods for decoding IANA
2N/A * character codes into strings appropriate for the Java Writer subclass
2N/A * encoding String arguments, and for encoding the String descriptions
2N/A * of character codings into the integer codes. Ideally, Java itself
2N/A * should support this.
2N/A *
2N/A * @author James Kempf
2N/A */
2N/A
2N/Aabstract class IANACharCode extends Object {
2N/A
2N/A // Character code descriptors. These can be used with the Java
2N/A // character encoding utilities. For Unicode, we use little on
2N/A // input,
2N/A
2N/A static final String ASCII = "Default";
2N/A static final String LATIN1 = "latin1";
2N/A static final String UTF8 = "UTF8";
2N/A static final String UNICODE = "Unicode";
2N/A static final String UNICODE_LITTLE = "UnicodeLittle";
2N/A static final String UNICODE_BIG = "UnicodeBig";
2N/A static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr";
2N/A
2N/A // Error code for misidentified character set.
2N/A
2N/A static final short CHARSET_NOT_UNDERSTOOD = 5;
2N/A
2N/A // Character codes.
2N/A
2N/A protected static final int CHAR_ASCII = 3;
2N/A protected static final int CHAR_LATIN1 = 4;
2N/A protected static final int CHAR_UTF8 = 6;
2N/A protected static final int CHAR_UNICODE = 1000;
2N/A
2N/A // First two bytes indicate that string is big/little endian Unicode.
2N/A // If this flag isn't set, then big endian is assumed and we
2N/A // must add the big endian bytes on every call.
2N/A
2N/A protected static final byte[] UNICODE_LITTLE_FLAG =
2N/A {(byte)0xFF, (byte)0xFE};
2N/A
2N/A protected static final byte[] UNICODE_BIG_FLAG =
2N/A {(byte)0xFE, (byte)0xFF};
2N/A
2N/A /**
2N/A * Encode the String describing a character encoding into
2N/A * the approprate integer descriptor code.
2N/A *
2N/A * @param encoding The String describing the encoding.
2N/A * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
2N/A * String is not recognized.
2N/A */
2N/A
2N/A static int encodeCharacterEncoding(String encoding)
2N/A throws ServiceLocationException {
2N/A
2N/A if (encoding.equals(ASCII)) {
2N/A return CHAR_ASCII;
2N/A } else if (encoding.equals(LATIN1)) {
2N/A return CHAR_LATIN1;
2N/A } else if (encoding.equals(UTF8)) {
2N/A return CHAR_UTF8;
2N/A } else if (encoding.equals(UNICODE)) {
2N/A return CHAR_UNICODE;
2N/A } else if (encoding.equals(UNICODE_BIG)) {
2N/A return CHAR_UNICODE;
2N/A } else if (encoding.equals(UNICODE_LITTLE)) {
2N/A return CHAR_UNICODE;
2N/A } else if (encoding.equals(UNICODE_BIG_NO_HDR)) {
2N/A return CHAR_UNICODE;
2N/A }
2N/A
2N/A throw
2N/A new ServiceLocationException(
2N/A CHARSET_NOT_UNDERSTOOD,
2N/A "v1_unsupported_encoding",
2N/A new Object[] {encoding});
2N/A }
2N/A
2N/A /**
2N/A * Decode the integer describing a character encoding into
2N/A * the approprate String descriptor.
2N/A *
2N/A * @param code The integer coding the String set.
2N/A * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
2N/A * integer is not recognized.
2N/A */
2N/A
2N/A static String decodeCharacterEncoding(int code)
2N/A throws ServiceLocationException {
2N/A
2N/A switch (code) {
2N/A case CHAR_ASCII: return ASCII;
2N/A case CHAR_LATIN1: return LATIN1;
2N/A case CHAR_UTF8: return UTF8;
2N/A case CHAR_UNICODE: return UNICODE;
2N/A }
2N/A
2N/A throw
2N/A new ServiceLocationException(
2N/A CHARSET_NOT_UNDERSTOOD,
2N/A "v1_unsupported_encoding",
2N/A new Object[] {Integer.toString(code)});
2N/A }
2N/A
2N/A /**
2N/A * Return a string of integers giving the character's encoding in
2N/A * the character set passed in as encoding.
2N/A *
2N/A * @param c The character to escape.
2N/A * @param encoding The character set encoding to use.
2N/A * @return The character as a string of integers for the encoding.
2N/A * @exception ServiceLocationException Thrown if the encoding is not
2N/A * recognized, if the character's encoding
2N/A * has more than 8 bytes or if the sign bit gets turned on.
2N/A */
2N/A
2N/A static String escapeChar(char c, String encoding)
2N/A throws ServiceLocationException {
2N/A
2N/A ByteArrayOutputStream baos = new ByteArrayOutputStream();
2N/A
2N/A try {
2N/A OutputStreamWriter osw = new OutputStreamWriter(baos, encoding);
2N/A
2N/A osw.write(c);
2N/A osw.flush();
2N/A
2N/A } catch (UnsupportedEncodingException ex) {
2N/A
2N/A throw
2N/A new ServiceLocationException(
2N/A CHARSET_NOT_UNDERSTOOD,
2N/A "v1_unsupported_encoding",
2N/A new Object[] {encoding});
2N/A
2N/A } catch (IOException ex) {
2N/A
2N/A }
2N/A
2N/A byte b[] = baos.toByteArray();
2N/A int code = 0;
2N/A
2N/A // Assemble the character code based on the encoding type.
2N/A
2N/A if (encoding.equals(UNICODE) ||
2N/A encoding.equals(UNICODE_BIG) ||
2N/A encoding.equals(UNICODE_LITTLE)) {
2N/A
2N/A code = (int)(b[0] & 0xFF); // control bytes...
2N/A code = (int)(code | ((b[1] & 0xFF) << 8));
2N/A code = (int)(code | ((b[2] & 0xFF) << 16));
2N/A code = (int)(code | ((b[3] & 0xFF) << 24));
2N/A
2N/A if (b.length <= 4) {
2N/A throw
2N/A new ServiceLocationException(
2N/A ServiceLocationException.PARSE_ERROR,
2N/A "v1_charcode_error",
2N/A new Object[] {new Character(c), encoding});
2N/A }
2N/A
2N/A } else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) {
2N/A
2N/A code = (int)(b[0] & 0xFF);
2N/A
2N/A if (b.length > 1) {
2N/A throw
2N/A new ServiceLocationException(
2N/A ServiceLocationException.PARSE_ERROR,
2N/A "v1_charcode_error",
2N/A new Object[] {new Character(c), encoding});
2N/A }
2N/A } else if (encoding.equals(UTF8)) {
2N/A
2N/A if (b.length > 3) {
2N/A throw
2N/A new ServiceLocationException(
2N/A ServiceLocationException.PARSE_ERROR,
2N/A "v1_charcode_error",
2N/A new Object[] {new Character(c), encoding});
2N/A }
2N/A
2N/A
2N/A code = (int)(b[0] & 0xFF);
2N/A
2N/A if (b.length > 1) {
2N/A code = (int)(code | ((b[1] & 0xFF) << 8));
2N/A }
2N/A
2N/A if (b.length > 2) {
2N/A code = (int)(code | ((b[2] & 0xFF) << 16));
2N/A }
2N/A }
2N/A
2N/A return Integer.toString(code);
2N/A }
2N/A
2N/A /**
2N/A * Unescape the character encoded as the string.
2N/A *
2N/A * @param ch The character as a string of Integers.
2N/A * @param encoding The character set encoding to use.
2N/A * @return The character.
2N/A * @exception ServiceLocationException Thrown if the string can't
2N/A * be parsed into an integer or if the encoding isn't
2N/A * recognized.
2N/A */
2N/A
2N/A static String unescapeChar(String ch, String encoding)
2N/A throws ServiceLocationException {
2N/A
2N/A int code = 0;
2N/A
2N/A try {
2N/A code = Integer.parseInt(ch);
2N/A
2N/A } catch (NumberFormatException ex) {
2N/A throw
2N/A new ServiceLocationException(
2N/A ServiceLocationException.PARSE_ERROR,
2N/A "v1_stringcode_error",
2N/A new Object[] {ch, encoding});
2N/A
2N/A }
2N/A
2N/A // Convert to bytes. We need to taylor the array size to the
2N/A // number of bytes because otherwise, in encodings that
2N/A // take less bytes, the resulting string will have garbage
2N/A // in it.
2N/A
2N/A String str = null;
2N/A byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
2N/A byte b[] = null;
2N/A
2N/A b0 = (byte) (code & 0xFF);
2N/A b1 = (byte) ((code >> 8) & 0xFF);
2N/A b2 = (byte) ((code >> 16) & 0xFF);
2N/A b3 = (byte) ((code >> 24) & 0xFf);
2N/A
2N/A // We create an array sized to the encoding.
2N/A
2N/A if (encoding.equals(UNICODE_BIG) ||
2N/A encoding.equals(UNICODE_LITTLE)) {
2N/A b = new byte[4];
2N/A b[0] = b0;
2N/A b[1] = b1;
2N/A b[2] = b2;
2N/A b[3] = b3;
2N/A
2N/A } else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) {
2N/A // single byte
2N/A b = new byte[1];
2N/A b[0] = b0;
2N/A
2N/A if (b1 != 0 || b2 != 0) {
2N/A throw
2N/A new ServiceLocationException(
2N/A ServiceLocationException.PARSE_ERROR,
2N/A "v1_stringcode_error",
2N/A new Object[] {ch, encoding});
2N/A }
2N/A
2N/A
2N/A } else if (encoding.equals(UTF8)) {// vari-byte
2N/A
2N/A if (b3 != 0) {
2N/A throw
2N/A new ServiceLocationException(
2N/A ServiceLocationException.PARSE_ERROR,
2N/A "v1_stringcode_error",
2N/A new Object[] {ch, encoding});
2N/A }
2N/A
2N/A if (b2 != 0) {
2N/A b = new byte[3];
2N/A b[2] = b2;
2N/A b[1] = b1;
2N/A b[0] = b0;
2N/A } else if (b1 != 0) {
2N/A b = new byte[2];
2N/A b[1] = b1;
2N/A b[0] = b0;
2N/A } else {
2N/A b = new byte[1];
2N/A b[0] = b0;
2N/A }
2N/A }
2N/A
2N/A // Make a string out of it.
2N/A
2N/A try {
2N/A str = new String(b, encoding);
2N/A
2N/A } catch (UnsupportedEncodingException ex) {
2N/A Assert.slpassert(false,
2N/A "v1_unsupported_encoding",
2N/A new Object[] {encoding});
2N/A }
2N/A
2N/A return str;
2N/A }
2N/A
2N/A // Determine from the flag bytes whether this is big or little endian
2N/A // Unicode. If there are no flag bytes, then just return UNICODE.
2N/A
2N/A static String getUnicodeEndianess(byte[] bytes) {
2N/A
2N/A if (bytes.length >= 2) {
2N/A
2N/A if (bytes[0] == UNICODE_LITTLE_FLAG[0] &&
2N/A bytes[1] == UNICODE_LITTLE_FLAG[1]) {
2N/A return UNICODE_LITTLE;
2N/A
2N/A } else if (bytes[0] == UNICODE_BIG_FLAG[0] &&
2N/A bytes[1] == UNICODE_BIG_FLAG[1]) {
2N/A return UNICODE_BIG;
2N/A
2N/A }
2N/A }
2N/A
2N/A // We can`t tell from the byte header, so it's big endian. But
2N/A // since we need to add the byte header, we say we don't know.
2N/A
2N/A return UNICODE;
2N/A
2N/A }
2N/A
2N/A // Add the big endian flag to a Unicode string.
2N/A
2N/A static byte[] addBigEndianFlag(byte[] bytes) {
2N/A
2N/A byte[] flaggedBytes = new byte[bytes.length + 2];
2N/A
2N/A flaggedBytes[0] = UNICODE_BIG_FLAG[0];
2N/A flaggedBytes[1] = UNICODE_BIG_FLAG[1];
2N/A
2N/A System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length);
2N/A
2N/A return flaggedBytes;
2N/A
2N/A }
2N/A}