StringCoding.java revision 0
0N/A/*
3909N/A * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Sun designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Sun in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
2362N/A * CA 95054 USA or visit www.sun.com if you need additional information or
2362N/A * have any questions.
0N/A */
0N/A
0N/Apackage java.lang;
0N/A
0N/Aimport java.io.CharConversionException;
0N/Aimport java.io.UnsupportedEncodingException;
0N/Aimport java.lang.ref.SoftReference;
0N/Aimport java.nio.ByteBuffer;
0N/Aimport java.nio.CharBuffer;
0N/Aimport java.nio.BufferOverflowException;
0N/Aimport java.nio.BufferUnderflowException;
0N/Aimport java.nio.charset.Charset;
0N/Aimport java.nio.charset.CharsetDecoder;
0N/Aimport java.nio.charset.CharsetEncoder;
0N/Aimport java.nio.charset.CharacterCodingException;
0N/Aimport java.nio.charset.CoderResult;
0N/Aimport java.nio.charset.CodingErrorAction;
0N/Aimport java.nio.charset.IllegalCharsetNameException;
0N/Aimport java.nio.charset.MalformedInputException;
964N/Aimport java.nio.charset.UnsupportedCharsetException;
964N/Aimport java.util.Arrays;
0N/Aimport sun.misc.MessageUtils;
0N/Aimport sun.nio.cs.HistoricallyNamedCharset;
0N/A
0N/A/**
0N/A * Utility class for string encoding and decoding.
0N/A */
0N/A
0N/Aclass StringCoding {
0N/A
18N/A private StringCoding() { }
18N/A
3323N/A /* The cached coders for each thread
18N/A */
3323N/A private static ThreadLocal decoder = new ThreadLocal();
0N/A private static ThreadLocal encoder = new ThreadLocal();
0N/A
0N/A private static boolean warnUnsupportedCharset = true;
18N/A
18N/A private static Object deref(ThreadLocal tl) {
0N/A SoftReference sr = (SoftReference)tl.get();
0N/A if (sr == null)
0N/A return null;
0N/A return sr.get();
0N/A }
18N/A
3386N/A private static void set(ThreadLocal tl, Object ob) {
0N/A tl.set(new SoftReference(ob));
0N/A }
0N/A
0N/A // Trim the given byte array to the given length
964N/A //
964N/A private static byte[] safeTrim(byte[] ba, int len, Charset cs) {
0N/A if (len == ba.length
0N/A && (System.getSecurityManager() == null
0N/A || cs.getClass().getClassLoader0() == null))
0N/A return ba;
0N/A else
0N/A return Arrays.copyOf(ba, len);
0N/A }
964N/A
964N/A // Trim the given char array to the given length
964N/A //
0N/A private static char[] safeTrim(char[] ca, int len, Charset cs) {
0N/A if (len == ca.length
0N/A && (System.getSecurityManager() == null
0N/A || cs.getClass().getClassLoader0() == null))
0N/A return ca;
0N/A else
0N/A return Arrays.copyOf(ca, len);
0N/A }
0N/A
0N/A private static int scale(int len, float expansionFactor) {
0N/A // We need to perform double, not float, arithmetic; otherwise
0N/A // we lose low order bits when len is larger than 2**24.
0N/A return (int)(len * (double)expansionFactor);
0N/A }
0N/A
0N/A private static Charset lookupCharset(String csn) {
0N/A if (Charset.isSupported(csn)) {
0N/A try {
0N/A return Charset.forName(csn);
0N/A } catch (UnsupportedCharsetException x) {
0N/A throw new Error(x);
0N/A }
0N/A }
0N/A return null;
0N/A }
0N/A
0N/A private static void warnUnsupportedCharset(String csn) {
0N/A if (warnUnsupportedCharset) {
0N/A // Use sun.misc.MessageUtils rather than the Logging API or
0N/A // System.err since this method may be called during VM
0N/A // initialization before either is available.
0N/A MessageUtils.err("WARNING: Default charset " + csn +
0N/A " not supported, using ISO-8859-1 instead");
0N/A warnUnsupportedCharset = false;
0N/A }
0N/A }
0N/A
0N/A
0N/A // -- Decoding --
964N/A private static class StringDecoder {
0N/A private final String requestedCharsetName;
0N/A private final Charset cs;
0N/A private final CharsetDecoder cd;
0N/A
0N/A private StringDecoder(Charset cs, String rcn) {
0N/A this.requestedCharsetName = rcn;
0N/A this.cs = cs;
964N/A this.cd = cs.newDecoder()
0N/A .onMalformedInput(CodingErrorAction.REPLACE)
0N/A .onUnmappableCharacter(CodingErrorAction.REPLACE);
0N/A }
0N/A
0N/A String charsetName() {
0N/A if (cs instanceof HistoricallyNamedCharset)
0N/A return ((HistoricallyNamedCharset)cs).historicalName();
0N/A return cs.name();
0N/A }
0N/A
0N/A final String requestedCharsetName() {
0N/A return requestedCharsetName;
0N/A }
0N/A
0N/A char[] decode(byte[] ba, int off, int len) {
0N/A int en = scale(len, cd.maxCharsPerByte());
0N/A char[] ca = new char[en];
964N/A if (len == 0)
964N/A return ca;
964N/A cd.reset();
964N/A ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
964N/A CharBuffer cb = CharBuffer.wrap(ca);
964N/A try {
964N/A CoderResult cr = cd.decode(bb, cb, true);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A cr = cd.flush(cb);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A } catch (CharacterCodingException x) {
964N/A // Substitution is always enabled,
964N/A // so this shouldn't happen
964N/A throw new Error(x);
964N/A }
964N/A return safeTrim(ca, cb.position(), cs);
964N/A }
964N/A
0N/A }
0N/A
0N/A static char[] decode(String charsetName, byte[] ba, int off, int len)
0N/A throws UnsupportedEncodingException
0N/A {
0N/A StringDecoder sd = (StringDecoder)deref(decoder);
0N/A String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
18N/A if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
0N/A || csn.equals(sd.charsetName()))) {
0N/A sd = null;
0N/A try {
0N/A Charset cs = lookupCharset(csn);
0N/A if (cs != null)
0N/A sd = new StringDecoder(cs, csn);
0N/A } catch (IllegalCharsetNameException x) {}
0N/A if (sd == null)
0N/A throw new UnsupportedEncodingException(csn);
0N/A set(decoder, sd);
0N/A }
0N/A return sd.decode(ba, off, len);
0N/A }
0N/A
0N/A static char[] decode(Charset cs, byte[] ba, int off, int len) {
0N/A StringDecoder sd = new StringDecoder(cs, cs.name());
0N/A byte[] b = Arrays.copyOf(ba, ba.length);
964N/A return sd.decode(b, off, len);
964N/A }
964N/A
964N/A static char[] decode(byte[] ba, int off, int len) {
964N/A String csn = Charset.defaultCharset().name();
964N/A try {
964N/A return decode(csn, ba, off, len);
964N/A } catch (UnsupportedEncodingException x) {
964N/A warnUnsupportedCharset(csn);
964N/A }
964N/A try {
964N/A return decode("ISO-8859-1", ba, off, len);
964N/A } catch (UnsupportedEncodingException x) {
964N/A // If this code is hit during VM initialization, MessageUtils is
964N/A // the only way we will be able to get any kind of error message.
964N/A MessageUtils.err("ISO-8859-1 charset not available: "
964N/A + x.toString());
964N/A // If we can not find ISO-8859-1 (a required encoding) then things
964N/A // are seriously wrong with the installation.
964N/A System.exit(1);
964N/A return null;
964N/A }
964N/A }
964N/A
964N/A
964N/A
964N/A
964N/A // -- Encoding --
964N/A private static class StringEncoder {
964N/A private Charset cs;
964N/A private CharsetEncoder ce;
964N/A private final String requestedCharsetName;
964N/A
964N/A private StringEncoder(Charset cs, String rcn) {
964N/A this.requestedCharsetName = rcn;
964N/A this.cs = cs;
964N/A this.ce = cs.newEncoder()
964N/A .onMalformedInput(CodingErrorAction.REPLACE)
964N/A .onUnmappableCharacter(CodingErrorAction.REPLACE);
964N/A }
964N/A
964N/A String charsetName() {
964N/A if (cs instanceof HistoricallyNamedCharset)
964N/A return ((HistoricallyNamedCharset)cs).historicalName();
964N/A return cs.name();
964N/A }
964N/A
964N/A final String requestedCharsetName() {
964N/A return requestedCharsetName;
964N/A }
964N/A
0N/A byte[] encode(char[] ca, int off, int len) {
0N/A int en = scale(len, ce.maxBytesPerChar());
0N/A byte[] ba = new byte[en];
0N/A if (len == 0)
0N/A return ba;
0N/A
0N/A ce.reset();
0N/A ByteBuffer bb = ByteBuffer.wrap(ba);
0N/A CharBuffer cb = CharBuffer.wrap(ca, off, len);
0N/A try {
0N/A CoderResult cr = ce.encode(cb, bb, true);
0N/A if (!cr.isUnderflow())
0N/A cr.throwException();
0N/A cr = ce.flush(bb);
0N/A if (!cr.isUnderflow())
0N/A cr.throwException();
0N/A } catch (CharacterCodingException x) {
0N/A // Substitution is always enabled,
0N/A // so this shouldn't happen
0N/A throw new Error(x);
0N/A }
0N/A return safeTrim(ba, bb.position(), cs);
0N/A }
0N/A }
0N/A
0N/A static byte[] encode(String charsetName, char[] ca, int off, int len)
0N/A throws UnsupportedEncodingException
0N/A {
964N/A StringEncoder se = (StringEncoder)deref(encoder);
0N/A String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
0N/A if ((se == null) || !(csn.equals(se.requestedCharsetName())
0N/A || csn.equals(se.charsetName()))) {
0N/A se = null;
0N/A try {
0N/A Charset cs = lookupCharset(csn);
0N/A if (cs != null)
964N/A se = new StringEncoder(cs, csn);
0N/A } catch (IllegalCharsetNameException x) {}
0N/A if (se == null)
0N/A throw new UnsupportedEncodingException (csn);
0N/A set(encoder, se);
0N/A }
0N/A return se.encode(ca, off, len);
0N/A }
0N/A
0N/A static byte[] encode(Charset cs, char[] ca, int off, int len) {
0N/A StringEncoder se = new StringEncoder(cs, cs.name());
0N/A char[] c = Arrays.copyOf(ca, ca.length);
0N/A return se.encode(c, off, len);
0N/A }
0N/A
0N/A static byte[] encode(char[] ca, int off, int len) {
0N/A String csn = Charset.defaultCharset().name();
0N/A try {
964N/A return encode(csn, ca, off, len);
964N/A } catch (UnsupportedEncodingException x) {
964N/A warnUnsupportedCharset(csn);
964N/A }
964N/A try {
964N/A return encode("ISO-8859-1", ca, off, len);
964N/A } catch (UnsupportedEncodingException x) {
964N/A // If this code is hit during VM initialization, MessageUtils is
964N/A // the only way we will be able to get any kind of error message.
964N/A MessageUtils.err("ISO-8859-1 charset not available: "
964N/A + x.toString());
964N/A // If we can not find ISO-8859-1 (a required encoding) then things
964N/A // are seriously wrong with the installation.
964N/A System.exit(1);
964N/A return null;
964N/A }
964N/A }
964N/A}
964N/A