StringCoding.java revision 3386
0N/A/*
2362N/A * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/Apackage java.lang;
0N/A
0N/Aimport java.io.UnsupportedEncodingException;
0N/Aimport java.lang.ref.SoftReference;
0N/Aimport java.nio.ByteBuffer;
0N/Aimport java.nio.CharBuffer;
0N/Aimport java.nio.charset.Charset;
0N/Aimport java.nio.charset.CharsetDecoder;
0N/Aimport java.nio.charset.CharsetEncoder;
0N/Aimport java.nio.charset.CharacterCodingException;
0N/Aimport java.nio.charset.CoderResult;
0N/Aimport java.nio.charset.CodingErrorAction;
0N/Aimport java.nio.charset.IllegalCharsetNameException;
0N/Aimport java.nio.charset.UnsupportedCharsetException;
0N/Aimport java.util.Arrays;
0N/Aimport sun.misc.MessageUtils;
0N/Aimport sun.nio.cs.HistoricallyNamedCharset;
964N/Aimport sun.nio.cs.ArrayDecoder;
964N/Aimport sun.nio.cs.ArrayEncoder;
0N/A
0N/A/**
0N/A * Utility class for string encoding and decoding.
0N/A */
0N/A
0N/Aclass StringCoding {
0N/A
0N/A private StringCoding() { }
0N/A
18N/A /** The cached coders for each thread */
18N/A private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
3323N/A new ThreadLocal<>();
18N/A private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
3323N/A new ThreadLocal<>();
0N/A
0N/A private static boolean warnUnsupportedCharset = true;
0N/A
18N/A private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
18N/A SoftReference<T> sr = tl.get();
0N/A if (sr == null)
0N/A return null;
0N/A return sr.get();
0N/A }
0N/A
18N/A private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
3386N/A tl.set(new SoftReference<T>(ob));
0N/A }
0N/A
0N/A // Trim the given byte array to the given length
0N/A //
964N/A private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
964N/A if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
0N/A return ba;
0N/A else
0N/A return Arrays.copyOf(ba, len);
0N/A }
0N/A
0N/A // Trim the given char array to the given length
0N/A //
964N/A private static char[] safeTrim(char[] ca, int len,
964N/A Charset cs, boolean isTrusted) {
964N/A if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
0N/A return ca;
0N/A else
0N/A return Arrays.copyOf(ca, len);
0N/A }
0N/A
0N/A private static int scale(int len, float expansionFactor) {
0N/A // We need to perform double, not float, arithmetic; otherwise
0N/A // we lose low order bits when len is larger than 2**24.
0N/A return (int)(len * (double)expansionFactor);
0N/A }
0N/A
0N/A private static Charset lookupCharset(String csn) {
0N/A if (Charset.isSupported(csn)) {
0N/A try {
0N/A return Charset.forName(csn);
0N/A } catch (UnsupportedCharsetException x) {
0N/A throw new Error(x);
0N/A }
0N/A }
0N/A return null;
0N/A }
0N/A
0N/A private static void warnUnsupportedCharset(String csn) {
0N/A if (warnUnsupportedCharset) {
0N/A // Use sun.misc.MessageUtils rather than the Logging API or
0N/A // System.err since this method may be called during VM
0N/A // initialization before either is available.
0N/A MessageUtils.err("WARNING: Default charset " + csn +
0N/A " not supported, using ISO-8859-1 instead");
0N/A warnUnsupportedCharset = false;
0N/A }
0N/A }
0N/A
0N/A
0N/A // -- Decoding --
0N/A private static class StringDecoder {
0N/A private final String requestedCharsetName;
0N/A private final Charset cs;
0N/A private final CharsetDecoder cd;
964N/A private final boolean isTrusted;
0N/A
0N/A private StringDecoder(Charset cs, String rcn) {
0N/A this.requestedCharsetName = rcn;
0N/A this.cs = cs;
0N/A this.cd = cs.newDecoder()
0N/A .onMalformedInput(CodingErrorAction.REPLACE)
0N/A .onUnmappableCharacter(CodingErrorAction.REPLACE);
964N/A this.isTrusted = (cs.getClass().getClassLoader0() == null);
0N/A }
0N/A
0N/A String charsetName() {
0N/A if (cs instanceof HistoricallyNamedCharset)
0N/A return ((HistoricallyNamedCharset)cs).historicalName();
0N/A return cs.name();
0N/A }
0N/A
0N/A final String requestedCharsetName() {
0N/A return requestedCharsetName;
0N/A }
0N/A
0N/A char[] decode(byte[] ba, int off, int len) {
0N/A int en = scale(len, cd.maxCharsPerByte());
0N/A char[] ca = new char[en];
0N/A if (len == 0)
0N/A return ca;
964N/A if (cd instanceof ArrayDecoder) {
964N/A int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
964N/A return safeTrim(ca, clen, cs, isTrusted);
964N/A } else {
964N/A cd.reset();
964N/A ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
964N/A CharBuffer cb = CharBuffer.wrap(ca);
964N/A try {
964N/A CoderResult cr = cd.decode(bb, cb, true);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A cr = cd.flush(cb);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A } catch (CharacterCodingException x) {
964N/A // Substitution is always enabled,
964N/A // so this shouldn't happen
964N/A throw new Error(x);
964N/A }
964N/A return safeTrim(ca, cb.position(), cs, isTrusted);
0N/A }
0N/A }
0N/A }
0N/A
0N/A static char[] decode(String charsetName, byte[] ba, int off, int len)
0N/A throws UnsupportedEncodingException
0N/A {
18N/A StringDecoder sd = deref(decoder);
0N/A String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
0N/A if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
0N/A || csn.equals(sd.charsetName()))) {
0N/A sd = null;
0N/A try {
0N/A Charset cs = lookupCharset(csn);
0N/A if (cs != null)
0N/A sd = new StringDecoder(cs, csn);
0N/A } catch (IllegalCharsetNameException x) {}
0N/A if (sd == null)
0N/A throw new UnsupportedEncodingException(csn);
0N/A set(decoder, sd);
0N/A }
0N/A return sd.decode(ba, off, len);
0N/A }
0N/A
0N/A static char[] decode(Charset cs, byte[] ba, int off, int len) {
964N/A // (1)We never cache the "external" cs, the only benefit of creating
964N/A // an additional StringDe/Encoder object to wrap it is to share the
964N/A // de/encode() method. These SD/E objects are short-lifed, the young-gen
964N/A // gc should be able to take care of them well. But the best approash
964N/A // is still not to generate them if not really necessary.
964N/A // (2)The defensive copy of the input byte/char[] has a big performance
964N/A // impact, as well as the outgoing result byte/char[]. Need to do the
964N/A // optimization check of (sm==null && classLoader0==null) for both.
964N/A // (3)getClass().getClassLoader0() is expensive
964N/A // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
964N/A // is only chcked (and then isTrusted gets set) when (SM==null). It is
964N/A // possible that the SM==null for now but then SM is NOT null later
964N/A // when safeTrim() is invoked...the "safe" way to do is to redundant
964N/A // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
964N/A // but it then can be argued that the SM is null when the opertaion
964N/A // is started...
964N/A CharsetDecoder cd = cs.newDecoder();
964N/A int en = scale(len, cd.maxCharsPerByte());
964N/A char[] ca = new char[en];
964N/A if (len == 0)
964N/A return ca;
964N/A boolean isTrusted = false;
964N/A if (System.getSecurityManager() != null) {
964N/A if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
964N/A ba = Arrays.copyOfRange(ba, off, off + len);
964N/A off = 0;
964N/A }
964N/A }
964N/A if (cd instanceof ArrayDecoder) {
964N/A int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
964N/A return safeTrim(ca, clen, cs, isTrusted);
964N/A } else {
964N/A cd.onMalformedInput(CodingErrorAction.REPLACE)
964N/A .onUnmappableCharacter(CodingErrorAction.REPLACE)
964N/A .reset();
964N/A ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
964N/A CharBuffer cb = CharBuffer.wrap(ca);
964N/A try {
964N/A CoderResult cr = cd.decode(bb, cb, true);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A cr = cd.flush(cb);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A } catch (CharacterCodingException x) {
964N/A // Substitution is always enabled,
964N/A // so this shouldn't happen
964N/A throw new Error(x);
964N/A }
964N/A return safeTrim(ca, cb.position(), cs, isTrusted);
964N/A }
0N/A }
0N/A
0N/A static char[] decode(byte[] ba, int off, int len) {
0N/A String csn = Charset.defaultCharset().name();
0N/A try {
0N/A return decode(csn, ba, off, len);
0N/A } catch (UnsupportedEncodingException x) {
0N/A warnUnsupportedCharset(csn);
0N/A }
0N/A try {
0N/A return decode("ISO-8859-1", ba, off, len);
0N/A } catch (UnsupportedEncodingException x) {
0N/A // If this code is hit during VM initialization, MessageUtils is
0N/A // the only way we will be able to get any kind of error message.
0N/A MessageUtils.err("ISO-8859-1 charset not available: "
0N/A + x.toString());
0N/A // If we can not find ISO-8859-1 (a required encoding) then things
0N/A // are seriously wrong with the installation.
0N/A System.exit(1);
0N/A return null;
0N/A }
0N/A }
0N/A
0N/A // -- Encoding --
0N/A private static class StringEncoder {
0N/A private Charset cs;
0N/A private CharsetEncoder ce;
0N/A private final String requestedCharsetName;
964N/A private final boolean isTrusted;
0N/A
0N/A private StringEncoder(Charset cs, String rcn) {
0N/A this.requestedCharsetName = rcn;
0N/A this.cs = cs;
0N/A this.ce = cs.newEncoder()
0N/A .onMalformedInput(CodingErrorAction.REPLACE)
0N/A .onUnmappableCharacter(CodingErrorAction.REPLACE);
964N/A this.isTrusted = (cs.getClass().getClassLoader0() == null);
0N/A }
0N/A
0N/A String charsetName() {
0N/A if (cs instanceof HistoricallyNamedCharset)
0N/A return ((HistoricallyNamedCharset)cs).historicalName();
0N/A return cs.name();
0N/A }
0N/A
0N/A final String requestedCharsetName() {
0N/A return requestedCharsetName;
0N/A }
0N/A
0N/A byte[] encode(char[] ca, int off, int len) {
0N/A int en = scale(len, ce.maxBytesPerChar());
0N/A byte[] ba = new byte[en];
0N/A if (len == 0)
0N/A return ba;
964N/A if (ce instanceof ArrayEncoder) {
964N/A int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
964N/A return safeTrim(ba, blen, cs, isTrusted);
964N/A } else {
964N/A ce.reset();
964N/A ByteBuffer bb = ByteBuffer.wrap(ba);
964N/A CharBuffer cb = CharBuffer.wrap(ca, off, len);
964N/A try {
964N/A CoderResult cr = ce.encode(cb, bb, true);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A cr = ce.flush(bb);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A } catch (CharacterCodingException x) {
964N/A // Substitution is always enabled,
964N/A // so this shouldn't happen
964N/A throw new Error(x);
964N/A }
964N/A return safeTrim(ba, bb.position(), cs, isTrusted);
0N/A }
0N/A }
0N/A }
0N/A
0N/A static byte[] encode(String charsetName, char[] ca, int off, int len)
0N/A throws UnsupportedEncodingException
0N/A {
18N/A StringEncoder se = deref(encoder);
0N/A String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
0N/A if ((se == null) || !(csn.equals(se.requestedCharsetName())
0N/A || csn.equals(se.charsetName()))) {
0N/A se = null;
0N/A try {
0N/A Charset cs = lookupCharset(csn);
0N/A if (cs != null)
0N/A se = new StringEncoder(cs, csn);
0N/A } catch (IllegalCharsetNameException x) {}
0N/A if (se == null)
0N/A throw new UnsupportedEncodingException (csn);
0N/A set(encoder, se);
0N/A }
0N/A return se.encode(ca, off, len);
0N/A }
0N/A
0N/A static byte[] encode(Charset cs, char[] ca, int off, int len) {
964N/A CharsetEncoder ce = cs.newEncoder();
964N/A int en = scale(len, ce.maxBytesPerChar());
964N/A byte[] ba = new byte[en];
964N/A if (len == 0)
964N/A return ba;
964N/A boolean isTrusted = false;
964N/A if (System.getSecurityManager() != null) {
964N/A if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
964N/A ca = Arrays.copyOfRange(ca, off, off + len);
964N/A off = 0;
964N/A }
964N/A }
964N/A if (ce instanceof ArrayEncoder) {
964N/A int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
964N/A return safeTrim(ba, blen, cs, isTrusted);
964N/A } else {
964N/A ce.onMalformedInput(CodingErrorAction.REPLACE)
964N/A .onUnmappableCharacter(CodingErrorAction.REPLACE)
964N/A .reset();
964N/A ByteBuffer bb = ByteBuffer.wrap(ba);
964N/A CharBuffer cb = CharBuffer.wrap(ca, off, len);
964N/A try {
964N/A CoderResult cr = ce.encode(cb, bb, true);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A cr = ce.flush(bb);
964N/A if (!cr.isUnderflow())
964N/A cr.throwException();
964N/A } catch (CharacterCodingException x) {
964N/A throw new Error(x);
964N/A }
964N/A return safeTrim(ba, bb.position(), cs, isTrusted);
964N/A }
0N/A }
0N/A
0N/A static byte[] encode(char[] ca, int off, int len) {
0N/A String csn = Charset.defaultCharset().name();
0N/A try {
0N/A return encode(csn, ca, off, len);
0N/A } catch (UnsupportedEncodingException x) {
0N/A warnUnsupportedCharset(csn);
0N/A }
0N/A try {
0N/A return encode("ISO-8859-1", ca, off, len);
0N/A } catch (UnsupportedEncodingException x) {
0N/A // If this code is hit during VM initialization, MessageUtils is
0N/A // the only way we will be able to get any kind of error message.
0N/A MessageUtils.err("ISO-8859-1 charset not available: "
0N/A + x.toString());
0N/A // If we can not find ISO-8859-1 (a required encoding) then things
0N/A // are seriously wrong with the installation.
0N/A System.exit(1);
0N/A return null;
0N/A }
0N/A }
0N/A}