325N/A/*
325N/A * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
325N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
325N/A *
325N/A * This code is free software; you can redistribute it and/or modify it
325N/A * under the terms of the GNU General Public License version 2 only, as
325N/A * published by the Free Software Foundation. Oracle designates this
325N/A * particular file as subject to the "Classpath" exception as provided
325N/A * by Oracle in the LICENSE file that accompanied this code.
325N/A *
325N/A * This code is distributed in the hope that it will be useful, but WITHOUT
325N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
325N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
325N/A * version 2 for more details (a copy is included in the LICENSE file that
325N/A * accompanied this code).
325N/A *
325N/A * You should have received a copy of the GNU General Public License version
325N/A * 2 along with this work; if not, write to the Free Software Foundation,
325N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
325N/A *
325N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
325N/A * or visit www.oracle.com if you need additional information or have any
325N/A * questions.
325N/A */
325N/A
325N/A/*
325N/A * @(#)MimeUtility.java 1.45 03/03/10
325N/A */
325N/A
325N/A
325N/A
325N/Apackage com.sun.xml.internal.messaging.saaj.packaging.mime.internet;
325N/A
325N/Aimport java.io.*;
325N/Aimport java.util.*;
325N/A
325N/Aimport javax.activation.DataHandler;
325N/Aimport javax.activation.DataSource;
325N/A
325N/Aimport com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException;
325N/Aimport com.sun.xml.internal.messaging.saaj.packaging.mime.util.*;
325N/Aimport com.sun.xml.internal.messaging.saaj.util.SAAJUtil;
325N/A
325N/A/**
325N/A * This is a utility class that provides various MIME related
325N/A * functionality. <p>
325N/A *
325N/A * There are a set of methods to encode and decode MIME headers as
325N/A * per RFC 2047. A brief description on handling such headers is
325N/A * given below: <p>
325N/A *
325N/A * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
325N/A * characters. Headers that contain non US-ASCII characters must be
325N/A * encoded so that they contain only US-ASCII characters. Basically,
325N/A * this process involves using either BASE64 or QP to encode certain
325N/A * characters. RFC 2047 describes this in detail. <p>
325N/A *
325N/A * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
325N/A * subset of Unicode (and occupies the range 0 - 127). A String
325N/A * that contains only ASCII characters is already mail-safe. If the
325N/A * String contains non US-ASCII characters, it must be encoded. An
325N/A * additional complexity in this step is that since Unicode is not
325N/A * yet a widely used charset, one might want to first charset-encode
325N/A * the String into another charset and then do the transfer-encoding.
325N/A * <p>
325N/A * Note that to get the actual bytes of a mail-safe String (say,
325N/A * for sending over SMTP), one must do
325N/A * <p><blockquote><pre>
325N/A *
325N/A * byte[] bytes = string.getBytes("iso-8859-1");
325N/A *
325N/A * </pre></blockquote><p>
325N/A *
325N/A * The <code>setHeader</code> and <code>addHeader</code> methods
325N/A * on MimeMessage and MimeBodyPart assume that the given header values
325N/A * are Unicode strings that contain only US-ASCII characters. Hence
325N/A * the callers of those methods must insure that the values they pass
325N/A * do not contain non US-ASCII characters. The methods in this class
325N/A * help do this. <p>
325N/A *
325N/A * The <code>getHeader</code> family of methods on MimeMessage and
325N/A * MimeBodyPart return the raw header value. These might be encoded
325N/A * as per RFC 2047, and if so, must be decoded into Unicode Strings.
325N/A * The methods in this class help to do this. <p>
325N/A *
325N/A * Several System properties control strict conformance to the MIME
325N/A * spec. Note that these are not session properties but must be set
325N/A * globally as System properties. <p>
325N/A *
325N/A * The <code>mail.mime.decodetext.strict</code> property controls
325N/A * decoding of MIME encoded words. The MIME spec requires that encoded
325N/A * words start at the beginning of a whitespace separated word. Some
325N/A * mailers incorrectly include encoded words in the middle of a word.
325N/A * If the <code>mail.mime.decodetext.strict</code> System property is
325N/A * set to <code>"false"</code>, an attempt will be made to decode these
325N/A * illegal encoded words. The default is true. <p>
325N/A *
325N/A * The <code>mail.mime.encodeeol.strict</code> property controls the
325N/A * choice of Content-Transfer-Encoding for MIME parts that are not of
325N/A * type "text". Often such parts will contain textual data for which
325N/A * an encoding that allows normal end of line conventions is appropriate.
325N/A * In rare cases, such a part will appear to contain entirely textual
325N/A * data, but will require an encoding that preserves CR and LF characters
325N/A * without change. If the <code>mail.mime.decodetext.strict</code>
325N/A * System property is set to <code>"true"</code>, such an encoding will
325N/A * be used when necessary. The default is false. <p>
325N/A *
325N/A * In addition, the <code>mail.mime.charset</code> System property can
325N/A * be used to specify the default MIME charset to use for encoded words
325N/A * and text parts that don't otherwise specify a charset. Normally, the
325N/A * default MIME charset is derived from the default Java charset, as
325N/A * specified in the <code>file.encoding</code> System property. Most
325N/A * applications will have no need to explicitly set the default MIME
325N/A * charset. In cases where the default MIME charset to be used for
325N/A * mail messages is different than the charset used for files stored on
325N/A * the system, this property should be set.
325N/A *
325N/A * @version 1.45, 03/03/10
325N/A * @author John Mani
325N/A * @author Bill Shannon
325N/A */
325N/A
325N/Apublic class MimeUtility {
325N/A
325N/A // This class cannot be instantiated
325N/A private MimeUtility() { }
325N/A
325N/A public static final int ALL = -1;
325N/A
325N/A private static final int BUFFER_SIZE = 1024;
325N/A private static boolean decodeStrict = true;
325N/A private static boolean encodeEolStrict = false;
325N/A private static boolean foldEncodedWords = false;
325N/A private static boolean foldText = true;
325N/A
325N/A static {
325N/A try {
325N/A String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict");
325N/A // default to true
325N/A decodeStrict = s == null || !s.equalsIgnoreCase("false");
325N/A s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict");
325N/A // default to false
325N/A encodeEolStrict = s != null && s.equalsIgnoreCase("true");
325N/A s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords");
325N/A // default to false
325N/A foldEncodedWords = s != null && s.equalsIgnoreCase("true");
325N/A s = SAAJUtil.getSystemProperty("mail.mime.foldtext");
325N/A // default to true
325N/A foldText = s == null || !s.equalsIgnoreCase("false");
325N/A } catch (SecurityException sex) {
325N/A // ignore it
325N/A }
325N/A }
325N/A
325N/A
325N/A /**
325N/A * Get the content-transfer-encoding that should be applied
325N/A * to the input stream of this datasource, to make it mailsafe. <p>
325N/A *
325N/A * The algorithm used here is: <br>
325N/A * <ul>
325N/A * <li>
325N/A * If the primary type of this datasource is "text" and if all
325N/A * the bytes in its input stream are US-ASCII, then the encoding
325N/A * is "7bit". If more than half of the bytes are non-US-ASCII, then
325N/A * the encoding is "base64". If less than half of the bytes are
325N/A * non-US-ASCII, then the encoding is "quoted-printable".
325N/A * <li>
325N/A * If the primary type of this datasource is not "text", then if
325N/A * all the bytes of its input stream are US-ASCII, the encoding
325N/A * is "7bit". If there is even one non-US-ASCII character, the
325N/A * encoding is "base64".
325N/A * </ul>
325N/A *
325N/A * @param ds DataSource
325N/A * @return the encoding. This is either "7bit",
325N/A * "quoted-printable" or "base64"
325N/A */
325N/A public static String getEncoding(DataSource ds) {
325N/A ContentType cType = null;
325N/A InputStream is = null;
325N/A String encoding = null;
325N/A
325N/A try {
325N/A cType = new ContentType(ds.getContentType());
325N/A is = ds.getInputStream();
325N/A } catch (Exception ex) {
325N/A return "base64"; // what else ?!
325N/A }
325N/A
325N/A boolean isText = cType.match("text/*");
325N/A // if not text, stop processing when we see non-ASCII
325N/A int i = checkAscii(is, ALL, !isText);
325N/A switch (i) {
325N/A case ALL_ASCII:
325N/A encoding = "7bit"; // all ascii
325N/A break;
325N/A case MOSTLY_ASCII:
325N/A encoding = "quoted-printable"; // mostly ascii
325N/A break;
325N/A default:
325N/A encoding = "base64"; // mostly binary
325N/A break;
325N/A }
325N/A
325N/A // Close the input stream
325N/A try {
325N/A is.close();
325N/A } catch (IOException ioex) { }
325N/A
325N/A return encoding;
325N/A }
325N/A
325N/A /**
325N/A * Same as <code>getEncoding(DataSource)</code> except that instead
325N/A * of reading the data from an <code>InputStream</code> it uses the
325N/A * <code>writeTo</code> method to examine the data. This is more
325N/A * efficient in the common case of a <code>DataHandler</code>
325N/A * created with an object and a MIME type (for example, a
325N/A * "text/plain" String) because all the I/O is done in this
325N/A * thread. In the case requiring an <code>InputStream</code> the
325N/A * <code>DataHandler</code> uses a thread, a pair of pipe streams,
325N/A * and the <code>writeTo</code> method to produce the data. <p>
325N/A *
325N/A * @since JavaMail 1.2
325N/A */
325N/A public static String getEncoding(DataHandler dh) {
325N/A ContentType cType = null;
325N/A String encoding = null;
325N/A
325N/A /*
325N/A * Try to pick the most efficient means of determining the
325N/A * encoding. If this DataHandler was created using a DataSource,
325N/A * the getEncoding(DataSource) method is typically faster. If
325N/A * the DataHandler was created with an object, this method is
325N/A * much faster. To distinguish the two cases, we use a heuristic.
325N/A * A DataHandler created with an object will always have a null name.
325N/A * A DataHandler created with a DataSource will usually have a
325N/A * non-null name.
325N/A *
325N/A * XXX - This is actually quite a disgusting hack, but it makes
325N/A * a common case run over twice as fast.
325N/A */
325N/A if (dh.getName() != null)
325N/A return getEncoding(dh.getDataSource());
325N/A
325N/A try {
325N/A cType = new ContentType(dh.getContentType());
325N/A } catch (Exception ex) {
325N/A return "base64"; // what else ?!
325N/A }
325N/A
325N/A if (cType.match("text/*")) {
325N/A // Check all of the available bytes
325N/A AsciiOutputStream aos = new AsciiOutputStream(false, false);
325N/A try {
325N/A dh.writeTo(aos);
325N/A } catch (IOException ex) { } // ignore it
325N/A switch (aos.getAscii()) {
325N/A case ALL_ASCII:
325N/A encoding = "7bit"; // all ascii
325N/A break;
325N/A case MOSTLY_ASCII:
325N/A encoding = "quoted-printable"; // mostly ascii
325N/A break;
325N/A default:
325N/A encoding = "base64"; // mostly binary
325N/A break;
325N/A }
325N/A } else { // not "text"
325N/A // Check all of available bytes, break out if we find
325N/A // at least one non-US-ASCII character
325N/A AsciiOutputStream aos =
325N/A new AsciiOutputStream(true, encodeEolStrict);
325N/A try {
325N/A dh.writeTo(aos);
325N/A } catch (IOException ex) { } // ignore it
325N/A if (aos.getAscii() == ALL_ASCII) // all ascii
325N/A encoding = "7bit";
325N/A else // found atleast one non-ascii character, use b64
325N/A encoding = "base64";
325N/A }
325N/A
325N/A return encoding;
325N/A }
325N/A
325N/A /**
325N/A * Decode the given input stream. The Input stream returned is
325N/A * the decoded input stream. All the encodings defined in RFC 2045
325N/A * are supported here. They include "base64", "quoted-printable",
325N/A * "7bit", "8bit", and "binary". In addition, "uuencode" is also
325N/A * supported.
325N/A *
325N/A * @param is input stream
325N/A * @param encoding the encoding of the stream.
325N/A * @return decoded input stream.
325N/A */
325N/A public static InputStream decode(InputStream is, String encoding)
325N/A throws MessagingException {
325N/A if (encoding.equalsIgnoreCase("base64"))
325N/A return new BASE64DecoderStream(is);
325N/A else if (encoding.equalsIgnoreCase("quoted-printable"))
325N/A return new QPDecoderStream(is);
325N/A else if (encoding.equalsIgnoreCase("uuencode") ||
325N/A encoding.equalsIgnoreCase("x-uuencode") ||
325N/A encoding.equalsIgnoreCase("x-uue"))
325N/A return new UUDecoderStream(is);
325N/A else if (encoding.equalsIgnoreCase("binary") ||
325N/A encoding.equalsIgnoreCase("7bit") ||
325N/A encoding.equalsIgnoreCase("8bit"))
325N/A return is;
325N/A else
325N/A throw new MessagingException("Unknown encoding: " + encoding);
325N/A }
325N/A
325N/A /**
325N/A * Wrap an encoder around the given output stream.
325N/A * All the encodings defined in RFC 2045 are supported here.
325N/A * They include "base64", "quoted-printable", "7bit", "8bit" and
325N/A * "binary". In addition, "uuencode" is also supported.
325N/A *
325N/A * @param os output stream
325N/A * @param encoding the encoding of the stream.
325N/A * @return output stream that applies the
325N/A * specified encoding.
325N/A */
325N/A public static OutputStream encode(OutputStream os, String encoding)
325N/A throws MessagingException {
325N/A if (encoding == null)
325N/A return os;
325N/A else if (encoding.equalsIgnoreCase("base64"))
325N/A return new BASE64EncoderStream(os);
325N/A else if (encoding.equalsIgnoreCase("quoted-printable"))
325N/A return new QPEncoderStream(os);
325N/A else if (encoding.equalsIgnoreCase("uuencode") ||
325N/A encoding.equalsIgnoreCase("x-uuencode") ||
325N/A encoding.equalsIgnoreCase("x-uue"))
325N/A return new UUEncoderStream(os);
325N/A else if (encoding.equalsIgnoreCase("binary") ||
325N/A encoding.equalsIgnoreCase("7bit") ||
325N/A encoding.equalsIgnoreCase("8bit"))
325N/A return os;
325N/A else
325N/A throw new MessagingException("Unknown encoding: " +encoding);
325N/A }
325N/A
325N/A /**
325N/A * Wrap an encoder around the given output stream.
325N/A * All the encodings defined in RFC 2045 are supported here.
325N/A * They include "base64", "quoted-printable", "7bit", "8bit" and
325N/A * "binary". In addition, "uuencode" is also supported.
325N/A * The <code>filename</code> parameter is used with the "uuencode"
325N/A * encoding and is included in the encoded output.
325N/A *
325N/A * @param os output stream
325N/A * @param encoding the encoding of the stream.
325N/A * @param filename name for the file being encoded (only used
325N/A * with uuencode)
325N/A * @return output stream that applies the
325N/A * specified encoding.
325N/A * @since JavaMail 1.2
325N/A */
325N/A public static OutputStream encode(OutputStream os, String encoding,
325N/A String filename)
325N/A throws MessagingException {
325N/A if (encoding == null)
325N/A return os;
325N/A else if (encoding.equalsIgnoreCase("base64"))
325N/A return new BASE64EncoderStream(os);
325N/A else if (encoding.equalsIgnoreCase("quoted-printable"))
325N/A return new QPEncoderStream(os);
325N/A else if (encoding.equalsIgnoreCase("uuencode") ||
325N/A encoding.equalsIgnoreCase("x-uuencode") ||
325N/A encoding.equalsIgnoreCase("x-uue"))
325N/A return new UUEncoderStream(os, filename);
325N/A else if (encoding.equalsIgnoreCase("binary") ||
325N/A encoding.equalsIgnoreCase("7bit") ||
325N/A encoding.equalsIgnoreCase("8bit"))
325N/A return os;
325N/A else
325N/A throw new MessagingException("Unknown encoding: " +encoding);
325N/A }
325N/A
325N/A /**
325N/A * Encode a RFC 822 "text" token into mail-safe form as per
325N/A * RFC 2047. <p>
325N/A *
325N/A * The given Unicode string is examined for non US-ASCII
325N/A * characters. If the string contains only US-ASCII characters,
325N/A * it is returned as-is. If the string contains non US-ASCII
325N/A * characters, it is first character-encoded using the platform's
325N/A * default charset, then transfer-encoded using either the B or
325N/A * Q encoding. The resulting bytes are then returned as a Unicode
325N/A * string containing only ASCII characters. <p>
325N/A *
325N/A * Note that this method should be used to encode only
325N/A * "unstructured" RFC 822 headers. <p>
325N/A *
325N/A * Example of usage:
325N/A * <p><blockquote><pre>
325N/A *
325N/A * MimeBodyPart part = ...
325N/A * String rawvalue = "FooBar Mailer, Japanese version 1.1"
325N/A * try {
325N/A * // If we know for sure that rawvalue contains only US-ASCII
325N/A * // characters, we can skip the encoding part
325N/A * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
325N/A * } catch (UnsupportedEncodingException e) {
325N/A * // encoding failure
325N/A * } catch (MessagingException me) {
325N/A * // setHeader() failure
325N/A * }
325N/A *
325N/A * </pre></blockquote><p>
325N/A *
325N/A * @param text unicode string
325N/A * @return Unicode string containing only US-ASCII characters
325N/A * @exception UnsupportedEncodingException if the encoding fails
325N/A */
325N/A public static String encodeText(String text)
325N/A throws UnsupportedEncodingException {
325N/A return encodeText(text, null, null);
325N/A }
325N/A
325N/A /**
325N/A * Encode a RFC 822 "text" token into mail-safe form as per
325N/A * RFC 2047. <p>
325N/A *
325N/A * The given Unicode string is examined for non US-ASCII
325N/A * characters. If the string contains only US-ASCII characters,
325N/A * it is returned as-is. If the string contains non US-ASCII
325N/A * characters, it is first character-encoded using the specified
325N/A * charset, then transfer-encoded using either the B or Q encoding.
325N/A * The resulting bytes are then returned as a Unicode string
325N/A * containing only ASCII characters. <p>
325N/A *
325N/A * Note that this method should be used to encode only
325N/A * "unstructured" RFC 822 headers.
325N/A *
325N/A * @param text the header value
325N/A * @param charset the charset. If this parameter is null, the
325N/A * platform's default chatset is used.
325N/A * @param encoding the encoding to be used. Currently supported
325N/A * values are "B" and "Q". If this parameter is null, then
325N/A * the "Q" encoding is used if most of characters to be
325N/A * encoded are in the ASCII charset, otherwise "B" encoding
325N/A * is used.
325N/A * @return Unicode string containing only US-ASCII characters
325N/A */
325N/A public static String encodeText(String text, String charset,
325N/A String encoding)
325N/A throws UnsupportedEncodingException {
325N/A return encodeWord(text, charset, encoding, false);
325N/A }
325N/A
325N/A /**
325N/A * Decode "unstructured" headers, that is, headers that are defined
325N/A * as '*text' as per RFC 822. <p>
325N/A *
325N/A * The string is decoded using the algorithm specified in
325N/A * RFC 2047, Section 6.1.1. If the charset-conversion fails
325N/A * for any sequence, an UnsupportedEncodingException is thrown.
325N/A * If the String is not an RFC 2047 style encoded header, it is
325N/A * returned as-is <p>
325N/A *
325N/A * Example of usage:
325N/A * <p><blockquote><pre>
325N/A *
325N/A * MimeBodyPart part = ...
325N/A * String rawvalue = null;
325N/A * String value = null;
325N/A * try {
325N/A * if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
325N/A * value = MimeUtility.decodeText(rawvalue);
325N/A * } catch (UnsupportedEncodingException e) {
325N/A * // Don't care
325N/A * value = rawvalue;
325N/A * } catch (MessagingException me) { }
325N/A *
325N/A * return value;
325N/A *
325N/A * </pre></blockquote><p>
325N/A *
325N/A * @param etext the possibly encoded value
325N/A * @exception UnsupportedEncodingException if the charset
325N/A * conversion failed.
325N/A */
325N/A public static String decodeText(String etext)
325N/A throws UnsupportedEncodingException {
325N/A /*
325N/A * We look for sequences separated by "linear-white-space".
325N/A * (as per RFC 2047, Section 6.1.1)
325N/A * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
325N/A */
325N/A String lwsp = " \t\n\r";
325N/A StringTokenizer st;
325N/A
325N/A /*
325N/A * First, lets do a quick run thru the string and check
325N/A * whether the sequence "=?" exists at all. If none exists,
325N/A * we know there are no encoded-words in here and we can just
325N/A * return the string as-is, without suffering thru the later
325N/A * decoding logic.
325N/A * This handles the most common case of unencoded headers
325N/A * efficiently.
325N/A */
325N/A if (etext.indexOf("=?") == -1)
325N/A return etext;
325N/A
325N/A // Encoded words found. Start decoding ...
325N/A
325N/A st = new StringTokenizer(etext, lwsp, true);
325N/A StringBuffer sb = new StringBuffer(); // decode buffer
325N/A StringBuffer wsb = new StringBuffer(); // white space buffer
325N/A boolean prevWasEncoded = false;
325N/A
325N/A while (st.hasMoreTokens()) {
325N/A char c;
325N/A String s = st.nextToken();
325N/A // If whitespace, append it to the whitespace buffer
325N/A if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
325N/A (c == '\r') || (c == '\n'))
325N/A wsb.append(c);
325N/A else {
325N/A // Check if token is an 'encoded-word' ..
325N/A String word;
325N/A try {
325N/A word = decodeWord(s);
325N/A // Yes, this IS an 'encoded-word'.
325N/A if (!prevWasEncoded && wsb.length() > 0) {
325N/A // if the previous word was also encoded, we
325N/A // should ignore the collected whitespace. Else
325N/A // we include the whitespace as well.
325N/A sb.append(wsb);
325N/A }
325N/A prevWasEncoded = true;
325N/A } catch (ParseException pex) {
325N/A // This is NOT an 'encoded-word'.
325N/A word = s;
325N/A // possibly decode inner encoded words
325N/A if (!decodeStrict)
325N/A word = decodeInnerWords(word);
325N/A // include colleced whitespace ..
325N/A if (wsb.length() > 0)
325N/A sb.append(wsb);
325N/A prevWasEncoded = false;
325N/A }
325N/A sb.append(word); // append the actual word
325N/A wsb.setLength(0); // reset wsb for reuse
325N/A }
325N/A }
325N/A return sb.toString();
325N/A }
325N/A
325N/A /**
325N/A * Encode a RFC 822 "word" token into mail-safe form as per
325N/A * RFC 2047. <p>
325N/A *
325N/A * The given Unicode string is examined for non US-ASCII
325N/A * characters. If the string contains only US-ASCII characters,
325N/A * it is returned as-is. If the string contains non US-ASCII
325N/A * characters, it is first character-encoded using the platform's
325N/A * default charset, then transfer-encoded using either the B or
325N/A * Q encoding. The resulting bytes are then returned as a Unicode
325N/A * string containing only ASCII characters. <p>
325N/A *
325N/A * This method is meant to be used when creating RFC 822 "phrases".
325N/A * The InternetAddress class, for example, uses this to encode
325N/A * it's 'phrase' component.
325N/A *
325N/A * @param text unicode string
325N/A * @return Array of Unicode strings containing only US-ASCII
325N/A * characters.
325N/A * @exception UnsupportedEncodingException if the encoding fails
325N/A */
325N/A public static String encodeWord(String word)
325N/A throws UnsupportedEncodingException {
325N/A return encodeWord(word, null, null);
325N/A }
325N/A
325N/A /**
325N/A * Encode a RFC 822 "word" token into mail-safe form as per
325N/A * RFC 2047. <p>
325N/A *
325N/A * The given Unicode string is examined for non US-ASCII
325N/A * characters. If the string contains only US-ASCII characters,
325N/A * it is returned as-is. If the string contains non US-ASCII
325N/A * characters, it is first character-encoded using the specified
325N/A * charset, then transfer-encoded using either the B or Q encoding.
325N/A * The resulting bytes are then returned as a Unicode string
325N/A * containing only ASCII characters. <p>
325N/A *
325N/A * @param text unicode string
325N/A * @param charset the MIME charset
325N/A * @param encoding the encoding to be used. Currently supported
325N/A * values are "B" and "Q". If this parameter is null, then
325N/A * the "Q" encoding is used if most of characters to be
325N/A * encoded are in the ASCII charset, otherwise "B" encoding
325N/A * is used.
325N/A * @return Unicode string containing only US-ASCII characters
325N/A * @exception UnsupportedEncodingException if the encoding fails
325N/A */
325N/A public static String encodeWord(String word, String charset,
325N/A String encoding)
325N/A throws UnsupportedEncodingException {
325N/A return encodeWord(word, charset, encoding, true);
325N/A }
325N/A
325N/A /*
325N/A * Encode the given string. The parameter 'encodingWord' should
325N/A * be true if a RFC 822 "word" token is being encoded and false if a
325N/A * RFC 822 "text" token is being encoded. This is because the
325N/A * "Q" encoding defined in RFC 2047 has more restrictions when
325N/A * encoding "word" tokens. (Sigh)
325N/A */
325N/A private static String encodeWord(String string, String charset,
325N/A String encoding, boolean encodingWord)
325N/A throws UnsupportedEncodingException {
325N/A
325N/A // If 'string' contains only US-ASCII characters, just
325N/A // return it.
325N/A int ascii = checkAscii(string);
325N/A if (ascii == ALL_ASCII)
325N/A return string;
325N/A
325N/A // Else, apply the specified charset conversion.
325N/A String jcharset;
325N/A if (charset == null) { // use default charset
325N/A jcharset = getDefaultJavaCharset(); // the java charset
325N/A charset = getDefaultMIMECharset(); // the MIME equivalent
325N/A } else // MIME charset -> java charset
325N/A jcharset = javaCharset(charset);
325N/A
325N/A // If no transfer-encoding is specified, figure one out.
325N/A if (encoding == null) {
325N/A if (ascii != MOSTLY_NONASCII)
325N/A encoding = "Q";
325N/A else
325N/A encoding = "B";
325N/A }
325N/A
325N/A boolean b64;
325N/A if (encoding.equalsIgnoreCase("B"))
325N/A b64 = true;
325N/A else if (encoding.equalsIgnoreCase("Q"))
325N/A b64 = false;
325N/A else
325N/A throw new UnsupportedEncodingException(
325N/A "Unknown transfer encoding: " + encoding);
325N/A
325N/A StringBuffer outb = new StringBuffer(); // the output buffer
325N/A doEncode(string, b64, jcharset,
325N/A // As per RFC 2047, size of an encoded string should not
325N/A // exceed 75 bytes.
325N/A // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
325N/A 75 - 7 - charset.length(), // the available space
325N/A "=?" + charset + "?" + encoding + "?", // prefix
325N/A true, encodingWord, outb);
325N/A
325N/A return outb.toString();
325N/A }
325N/A
325N/A private static void doEncode(String string, boolean b64,
325N/A String jcharset, int avail, String prefix,
325N/A boolean first, boolean encodingWord, StringBuffer buf)
325N/A throws UnsupportedEncodingException {
325N/A
325N/A // First find out what the length of the encoded version of
325N/A // 'string' would be.
325N/A byte[] bytes = string.getBytes(jcharset);
325N/A int len;
325N/A if (b64) // "B" encoding
325N/A len = BEncoderStream.encodedLength(bytes);
325N/A else // "Q"
325N/A len = QEncoderStream.encodedLength(bytes, encodingWord);
325N/A
325N/A int size;
325N/A if ((len > avail) && ((size = string.length()) > 1)) {
325N/A // If the length is greater than 'avail', split 'string'
325N/A // into two and recurse.
325N/A doEncode(string.substring(0, size/2), b64, jcharset,
325N/A avail, prefix, first, encodingWord, buf);
325N/A doEncode(string.substring(size/2, size), b64, jcharset,
325N/A avail, prefix, false, encodingWord, buf);
325N/A } else {
325N/A // length <= than 'avail'. Encode the given string
325N/A ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);
325N/A OutputStream eos; // the encoder
325N/A if (b64) // "B" encoding
325N/A eos = new BEncoderStream(os);
325N/A else // "Q" encoding
325N/A eos = new QEncoderStream(os, encodingWord);
325N/A
325N/A try { // do the encoding
325N/A eos.write(bytes);
325N/A eos.close();
325N/A } catch (IOException ioex) { }
325N/A
325N/A byte[] encodedBytes = os.toByteArray(); // the encoded stuff
325N/A // Now write out the encoded (all ASCII) bytes into our
325N/A // StringBuffer
325N/A if (!first) // not the first line of this sequence
325N/A if (foldEncodedWords)
325N/A buf.append("\r\n "); // start a continuation line
325N/A else
325N/A buf.append(" "); // line will be folded later
325N/A
325N/A buf.append(prefix);
325N/A for (int i = 0; i < encodedBytes.length; i++)
325N/A buf.append((char)encodedBytes[i]);
325N/A buf.append("?="); // terminate the current sequence
325N/A }
325N/A }
325N/A
325N/A /**
325N/A * The string is parsed using the rules in RFC 2047 for parsing
325N/A * an "encoded-word". If the parse fails, a ParseException is
325N/A * thrown. Otherwise, it is transfer-decoded, and then
325N/A * charset-converted into Unicode. If the charset-conversion
325N/A * fails, an UnsupportedEncodingException is thrown.<p>
325N/A *
325N/A * @param eword the possibly encoded value
325N/A * @exception ParseException if the string is not an
325N/A * encoded-word as per RFC 2047.
325N/A * @exception UnsupportedEncodingException if the charset
325N/A * conversion failed.
325N/A */
325N/A public static String decodeWord(String eword)
325N/A throws ParseException, UnsupportedEncodingException {
325N/A
325N/A if (!eword.startsWith("=?")) // not an encoded word
325N/A throw new ParseException();
325N/A
325N/A // get charset
325N/A int start = 2; int pos;
325N/A if ((pos = eword.indexOf('?', start)) == -1)
325N/A throw new ParseException();
325N/A String charset = javaCharset(eword.substring(start, pos));
325N/A
325N/A // get encoding
325N/A start = pos+1;
325N/A if ((pos = eword.indexOf('?', start)) == -1)
325N/A throw new ParseException();
325N/A String encoding = eword.substring(start, pos);
325N/A
325N/A // get encoded-sequence
325N/A start = pos+1;
325N/A if ((pos = eword.indexOf("?=", start)) == -1)
325N/A throw new ParseException();
325N/A String word = eword.substring(start, pos);
325N/A
325N/A try {
325N/A // Extract the bytes from word
325N/A ByteArrayInputStream bis =
325N/A new ByteArrayInputStream(ASCIIUtility.getBytes(word));
325N/A
325N/A // Get the appropriate decoder
325N/A InputStream is;
325N/A if (encoding.equalsIgnoreCase("B"))
325N/A is = new BASE64DecoderStream(bis);
325N/A else if (encoding.equalsIgnoreCase("Q"))
325N/A is = new QDecoderStream(bis);
325N/A else
325N/A throw new UnsupportedEncodingException(
325N/A "unknown encoding: " + encoding);
325N/A
325N/A // For b64 & q, size of decoded word <= size of word. So
325N/A // the decoded bytes must fit into the 'bytes' array. This
325N/A // is certainly more efficient than writing bytes into a
325N/A // ByteArrayOutputStream and then pulling out the byte[]
325N/A // from it.
325N/A int count = bis.available();
325N/A byte[] bytes = new byte[count];
325N/A // count is set to the actual number of decoded bytes
325N/A count = is.read(bytes, 0, count);
325N/A
325N/A // Finally, convert the decoded bytes into a String using
325N/A // the specified charset
325N/A String s = new String(bytes, 0, count, charset);
325N/A if (pos + 2 < eword.length()) {
325N/A // there's still more text in the string
325N/A String rest = eword.substring(pos + 2);
325N/A if (!decodeStrict)
325N/A rest = decodeInnerWords(rest);
325N/A s += rest;
325N/A }
325N/A return s;
325N/A } catch (UnsupportedEncodingException uex) {
325N/A // explicitly catch and rethrow this exception, otherwise
325N/A // the below IOException catch will swallow this up!
325N/A throw uex;
325N/A } catch (IOException ioex) {
325N/A // Shouldn't happen.
325N/A throw new ParseException();
325N/A } catch (IllegalArgumentException iex) {
325N/A /* An unknown charset of the form ISO-XXX-XXX, will cause
325N/A * the JDK to throw an IllegalArgumentException ... Since the
325N/A * JDK will attempt to create a classname using this string,
325N/A * but valid classnames must not contain the character '-',
325N/A * and this results in an IllegalArgumentException, rather than
325N/A * the expected UnsupportedEncodingException. Yikes
325N/A */
325N/A throw new UnsupportedEncodingException();
325N/A }
325N/A }
325N/A
325N/A /**
325N/A * Look for encoded words within a word. The MIME spec doesn't
325N/A * allow this, but many broken mailers, especially Japanese mailers,
325N/A * produce such incorrect encodings.
325N/A */
325N/A private static String decodeInnerWords(String word)
325N/A throws UnsupportedEncodingException {
325N/A int start = 0, i;
325N/A StringBuffer buf = new StringBuffer();
325N/A while ((i = word.indexOf("=?", start)) >= 0) {
325N/A buf.append(word.substring(start, i));
325N/A int end = word.indexOf("?=", i);
325N/A if (end < 0)
325N/A break;
325N/A String s = word.substring(i, end + 2);
325N/A try {
325N/A s = decodeWord(s);
325N/A } catch (ParseException pex) {
325N/A // ignore it, just use the original string
325N/A }
325N/A buf.append(s);
325N/A start = end + 2;
325N/A }
325N/A if (start == 0)
325N/A return word;
325N/A if (start < word.length())
325N/A buf.append(word.substring(start));
325N/A return buf.toString();
325N/A }
325N/A
325N/A /**
325N/A * A utility method to quote a word, if the word contains any
325N/A * characters from the specified 'specials' list.<p>
325N/A *
325N/A * The <code>HeaderTokenizer</code> class defines two special
325N/A * sets of delimiters - MIME and RFC 822. <p>
325N/A *
325N/A * This method is typically used during the generation of
325N/A * RFC 822 and MIME header fields.
325N/A *
325N/A * @param word word to be quoted
325N/A * @param specials the set of special characters
325N/A * @return the possibly quoted word
325N/A * @see javax.mail.internet.HeaderTokenizer#MIME
325N/A * @see javax.mail.internet.HeaderTokenizer#RFC822
325N/A */
325N/A public static String quote(String word, String specials) {
325N/A int len = word.length();
325N/A
325N/A /*
325N/A * Look for any "bad" characters, Escape and
325N/A * quote the entire string if necessary.
325N/A */
325N/A boolean needQuoting = false;
325N/A for (int i = 0; i < len; i++) {
325N/A char c = word.charAt(i);
325N/A if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
325N/A // need to escape them and then quote the whole string
325N/A StringBuffer sb = new StringBuffer(len + 3);
325N/A sb.append('"');
325N/A sb.append(word.substring(0, i));
325N/A int lastc = 0;
325N/A for (int j = i; j < len; j++) {
325N/A char cc = word.charAt(j);
325N/A if ((cc == '"') || (cc == '\\') ||
325N/A (cc == '\r') || (cc == '\n'))
325N/A if (cc == '\n' && lastc == '\r')
325N/A ; // do nothing, CR was already escaped
325N/A else
325N/A sb.append('\\'); // Escape the character
325N/A sb.append(cc);
325N/A lastc = cc;
325N/A }
325N/A sb.append('"');
325N/A return sb.toString();
325N/A } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
325N/A // These characters cause the string to be quoted
325N/A needQuoting = true;
325N/A }
325N/A
325N/A if (needQuoting) {
325N/A StringBuffer sb = new StringBuffer(len + 2);
325N/A sb.append('"').append(word).append('"');
325N/A return sb.toString();
325N/A } else
325N/A return word;
325N/A }
325N/A
325N/A /**
325N/A * Fold a string at linear whitespace so that each line is no longer
325N/A * than 76 characters, if possible. If there are more than 76
325N/A * non-whitespace characters consecutively, the string is folded at
325N/A * the first whitespace after that sequence. The parameter
325N/A * <code>used</code> indicates how many characters have been used in
325N/A * the current line; it is usually the length of the header name. <p>
325N/A *
325N/A * Note that line breaks in the string aren't escaped; they probably
325N/A * should be.
325N/A *
325N/A * @param used characters used in line so far
325N/A * @param s the string to fold
325N/A * @return the folded string
325N/A */
325N/A /*public*/ static String fold(int used, String s) {
325N/A if (!foldText)
325N/A return s;
325N/A
325N/A int end;
325N/A char c;
325N/A // Strip trailing spaces
325N/A for (end = s.length() - 1; end >= 0; end--) {
325N/A c = s.charAt(end);
325N/A if (c != ' ' && c != '\t')
325N/A break;
325N/A }
325N/A if (end != s.length() - 1)
325N/A s = s.substring(0, end + 1);
325N/A
325N/A // if the string fits now, just return it
325N/A if (used + s.length() <= 76)
325N/A return s;
325N/A
325N/A // have to actually fold the string
325N/A StringBuffer sb = new StringBuffer(s.length() + 4);
325N/A char lastc = 0;
325N/A while (used + s.length() > 76) {
325N/A int lastspace = -1;
325N/A for (int i = 0; i < s.length(); i++) {
325N/A if (lastspace != -1 && used + i > 76)
325N/A break;
325N/A c = s.charAt(i);
325N/A if (c == ' ' || c == '\t')
325N/A if (!(lastc == ' ' || lastc == '\t'))
325N/A lastspace = i;
325N/A lastc = c;
325N/A }
325N/A if (lastspace == -1) {
325N/A // no space, use the whole thing
325N/A sb.append(s);
325N/A s = "";
325N/A used = 0;
325N/A break;
325N/A }
325N/A sb.append(s.substring(0, lastspace));
325N/A sb.append("\r\n");
325N/A lastc = s.charAt(lastspace);
325N/A sb.append(lastc);
325N/A s = s.substring(lastspace + 1);
325N/A used = 1;
325N/A }
325N/A sb.append(s);
325N/A return sb.toString();
325N/A }
325N/A
325N/A /**
325N/A * Unfold a folded header. Any line breaks that aren't escaped and
325N/A * are followed by whitespace are removed.
325N/A *
325N/A * @param s the string to unfold
325N/A * @return the unfolded string
325N/A */
325N/A /*public*/ static String unfold(String s) {
325N/A if (!foldText)
325N/A return s;
325N/A
325N/A StringBuffer sb = null;
325N/A int i;
325N/A while ((i = indexOfAny(s, "\r\n")) >= 0) {
325N/A int start = i;
325N/A int l = s.length();
325N/A i++; // skip CR or NL
325N/A if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
325N/A i++; // skip LF
325N/A if (start == 0 || s.charAt(start - 1) != '\\') {
325N/A char c;
325N/A // if next line starts with whitespace, skip all of it
325N/A // XXX - always has to be true?
325N/A if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
325N/A i++; // skip whitespace
325N/A while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))
325N/A i++;
325N/A if (sb == null)
325N/A sb = new StringBuffer(s.length());
325N/A if (start != 0) {
325N/A sb.append(s.substring(0, start));
325N/A sb.append(' ');
325N/A }
325N/A s = s.substring(i);
325N/A continue;
325N/A }
325N/A // it's not a continuation line, just leave it in
325N/A if (sb == null)
325N/A sb = new StringBuffer(s.length());
325N/A sb.append(s.substring(0, i));
325N/A s = s.substring(i);
325N/A } else {
325N/A // there's a backslash at "start - 1"
325N/A // strip it out, but leave in the line break
325N/A if (sb == null)
325N/A sb = new StringBuffer(s.length());
325N/A sb.append(s.substring(0, start - 1));
325N/A sb.append(s.substring(start, i));
325N/A s = s.substring(i);
325N/A }
325N/A }
325N/A if (sb != null) {
325N/A sb.append(s);
325N/A return sb.toString();
325N/A } else
325N/A return s;
325N/A }
325N/A
325N/A /**
325N/A * Return the first index of any of the characters in "any" in "s",
325N/A * or -1 if none are found.
325N/A *
325N/A * This should be a method on String.
325N/A */
325N/A private static int indexOfAny(String s, String any) {
325N/A return indexOfAny(s, any, 0);
325N/A }
325N/A
325N/A private static int indexOfAny(String s, String any, int start) {
325N/A try {
325N/A int len = s.length();
325N/A for (int i = start; i < len; i++) {
325N/A if (any.indexOf(s.charAt(i)) >= 0)
325N/A return i;
325N/A }
325N/A return -1;
325N/A } catch (StringIndexOutOfBoundsException e) {
325N/A return -1;
325N/A }
325N/A }
325N/A
325N/A /**
325N/A * Convert a MIME charset name into a valid Java charset name. <p>
325N/A *
325N/A * @param charset the MIME charset name
325N/A * @return the Java charset equivalent. If a suitable mapping is
325N/A * not available, the passed in charset is itself returned.
325N/A */
325N/A public static String javaCharset(String charset) {
325N/A if (mime2java == null || charset == null)
325N/A // no mapping table, or charset parameter is null
325N/A return charset;
325N/A
325N/A String alias = (String)mime2java.get(charset.toLowerCase());
325N/A return alias == null ? charset : alias;
325N/A }
325N/A
325N/A /**
325N/A * Convert a java charset into its MIME charset name. <p>
325N/A *
325N/A * Note that a future version of JDK (post 1.2) might provide
325N/A * this functionality, in which case, we may deprecate this
325N/A * method then.
325N/A *
325N/A * @param charset the JDK charset
325N/A * @return the MIME/IANA equivalent. If a mapping
325N/A * is not possible, the passed in charset itself
325N/A * is returned.
325N/A * @since JavaMail 1.1
325N/A */
325N/A public static String mimeCharset(String charset) {
325N/A if (java2mime == null || charset == null)
325N/A // no mapping table or charset param is null
325N/A return charset;
325N/A
325N/A String alias = (String)java2mime.get(charset.toLowerCase());
325N/A return alias == null ? charset : alias;
325N/A }
325N/A
325N/A private static String defaultJavaCharset;
325N/A private static String defaultMIMECharset;
325N/A
325N/A /**
325N/A * Get the default charset corresponding to the system's current
325N/A * default locale. If the System property <code>mail.mime.charset</code>
325N/A * is set, a system charset corresponding to this MIME charset will be
325N/A * returned. <p>
325N/A *
325N/A * @return the default charset of the system's default locale,
325N/A * as a Java charset. (NOT a MIME charset)
325N/A * @since JavaMail 1.1
325N/A */
325N/A public static String getDefaultJavaCharset() {
325N/A if (defaultJavaCharset == null) {
325N/A /*
325N/A * If mail.mime.charset is set, it controls the default
325N/A * Java charset as well.
325N/A */
325N/A String mimecs = null;
325N/A
325N/A mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
325N/A
325N/A if (mimecs != null && mimecs.length() > 0) {
325N/A defaultJavaCharset = javaCharset(mimecs);
325N/A return defaultJavaCharset;
325N/A }
325N/A
325N/A try {
325N/A defaultJavaCharset = System.getProperty("file.encoding",
325N/A "8859_1");
325N/A } catch (SecurityException sex) {
325N/A
325N/A class NullInputStream extends InputStream {
325N/A public int read() {
325N/A return 0;
325N/A }
325N/A }
325N/A InputStreamReader reader =
325N/A new InputStreamReader(new NullInputStream());
325N/A defaultJavaCharset = reader.getEncoding();
325N/A if (defaultJavaCharset == null)
325N/A defaultJavaCharset = "8859_1";
325N/A }
325N/A }
325N/A
325N/A return defaultJavaCharset;
325N/A }
325N/A
325N/A /*
325N/A * Get the default MIME charset for this locale.
325N/A */
325N/A static String getDefaultMIMECharset() {
325N/A if (defaultMIMECharset == null) {
325N/A defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
325N/A }
325N/A if (defaultMIMECharset == null)
325N/A defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
325N/A return defaultMIMECharset;
325N/A }
325N/A
325N/A // Tables to map MIME charset names to Java names and vice versa.
325N/A // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
325N/A private static Hashtable mime2java;
325N/A private static Hashtable java2mime;
325N/A
325N/A static {
325N/A java2mime = new Hashtable(40);
325N/A mime2java = new Hashtable(10);
325N/A
325N/A try {
325N/A // Use this class's classloader to load the mapping file
325N/A // XXX - we should use SecuritySupport, but it's in another package
325N/A InputStream is =
325N/A com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(
325N/A "/META-INF/javamail.charset.map");
325N/A
325N/A if (is != null) {
325N/A is = new LineInputStream(is);
325N/A
325N/A // Load the JDK-to-MIME charset mapping table
325N/A loadMappings((LineInputStream)is, java2mime);
325N/A
325N/A // Load the MIME-to-JDK charset mapping table
325N/A loadMappings((LineInputStream)is, mime2java);
325N/A }
325N/A } catch (Exception ex) { }
325N/A
325N/A // If we didn't load the tables, e.g., because we didn't have
325N/A // permission, load them manually. The entries here should be
325N/A // the same as the default javamail.charset.map.
325N/A if (java2mime.isEmpty()) {
325N/A java2mime.put("8859_1", "ISO-8859-1");
325N/A java2mime.put("iso8859_1", "ISO-8859-1");
325N/A java2mime.put("ISO8859-1", "ISO-8859-1");
325N/A
325N/A java2mime.put("8859_2", "ISO-8859-2");
325N/A java2mime.put("iso8859_2", "ISO-8859-2");
325N/A java2mime.put("ISO8859-2", "ISO-8859-2");
325N/A
325N/A java2mime.put("8859_3", "ISO-8859-3");
325N/A java2mime.put("iso8859_3", "ISO-8859-3");
325N/A java2mime.put("ISO8859-3", "ISO-8859-3");
325N/A
325N/A java2mime.put("8859_4", "ISO-8859-4");
325N/A java2mime.put("iso8859_4", "ISO-8859-4");
325N/A java2mime.put("ISO8859-4", "ISO-8859-4");
325N/A
325N/A java2mime.put("8859_5", "ISO-8859-5");
325N/A java2mime.put("iso8859_5", "ISO-8859-5");
325N/A java2mime.put("ISO8859-5", "ISO-8859-5");
325N/A
325N/A java2mime.put("8859_6", "ISO-8859-6");
325N/A java2mime.put("iso8859_6", "ISO-8859-6");
325N/A java2mime.put("ISO8859-6", "ISO-8859-6");
325N/A
325N/A java2mime.put("8859_7", "ISO-8859-7");
325N/A java2mime.put("iso8859_7", "ISO-8859-7");
325N/A java2mime.put("ISO8859-7", "ISO-8859-7");
325N/A
325N/A java2mime.put("8859_8", "ISO-8859-8");
325N/A java2mime.put("iso8859_8", "ISO-8859-8");
325N/A java2mime.put("ISO8859-8", "ISO-8859-8");
325N/A
325N/A java2mime.put("8859_9", "ISO-8859-9");
325N/A java2mime.put("iso8859_9", "ISO-8859-9");
325N/A java2mime.put("ISO8859-9", "ISO-8859-9");
325N/A
325N/A java2mime.put("SJIS", "Shift_JIS");
325N/A java2mime.put("MS932", "Shift_JIS");
325N/A java2mime.put("JIS", "ISO-2022-JP");
325N/A java2mime.put("ISO2022JP", "ISO-2022-JP");
325N/A java2mime.put("EUC_JP", "euc-jp");
325N/A java2mime.put("KOI8_R", "koi8-r");
325N/A java2mime.put("EUC_CN", "euc-cn");
325N/A java2mime.put("EUC_TW", "euc-tw");
325N/A java2mime.put("EUC_KR", "euc-kr");
325N/A }
325N/A if (mime2java.isEmpty()) {
325N/A mime2java.put("iso-2022-cn", "ISO2022CN");
325N/A mime2java.put("iso-2022-kr", "ISO2022KR");
325N/A mime2java.put("utf-8", "UTF8");
325N/A mime2java.put("utf8", "UTF8");
325N/A mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
325N/A mime2java.put("ja_jp.eucjp", "EUCJIS");
325N/A mime2java.put("euc-kr", "KSC5601");
325N/A mime2java.put("euckr", "KSC5601");
325N/A mime2java.put("us-ascii", "ISO-8859-1");
325N/A mime2java.put("x-us-ascii", "ISO-8859-1");
325N/A }
325N/A }
325N/A
325N/A private static void loadMappings(LineInputStream is, Hashtable table) {
325N/A String currLine;
325N/A
325N/A while (true) {
325N/A try {
325N/A currLine = is.readLine();
325N/A } catch (IOException ioex) {
325N/A break; // error in reading, stop
325N/A }
325N/A
325N/A if (currLine == null) // end of file, stop
325N/A break;
325N/A if (currLine.startsWith("--") && currLine.endsWith("--"))
325N/A // end of this table
325N/A break;
325N/A
325N/A // ignore empty lines and comments
325N/A if (currLine.trim().length() == 0 || currLine.startsWith("#"))
325N/A continue;
325N/A
325N/A // A valid entry is of the form <key><separator><value>
325N/A // where, <separator> := SPACE | HT. Parse this
325N/A StringTokenizer tk = new StringTokenizer(currLine, " \t");
325N/A try {
325N/A String key = tk.nextToken();
325N/A String value = tk.nextToken();
325N/A table.put(key.toLowerCase(), value);
325N/A } catch (NoSuchElementException nex) { }
325N/A }
325N/A }
325N/A
325N/A static final int ALL_ASCII = 1;
325N/A static final int MOSTLY_ASCII = 2;
325N/A static final int MOSTLY_NONASCII = 3;
325N/A
325N/A /**
325N/A * Check if the given string contains non US-ASCII characters.
325N/A * @param s string
325N/A * @return ALL_ASCII if all characters in the string
325N/A * belong to the US-ASCII charset. MOSTLY_ASCII
325N/A * if more than half of the available characters
325N/A * are US-ASCII characters. Else MOSTLY_NONASCII.
325N/A */
325N/A static int checkAscii(String s) {
325N/A int ascii = 0, non_ascii = 0;
325N/A int l = s.length();
325N/A
325N/A for (int i = 0; i < l; i++) {
325N/A if (nonascii((int)s.charAt(i))) // non-ascii
325N/A non_ascii++;
325N/A else
325N/A ascii++;
325N/A }
325N/A
325N/A if (non_ascii == 0)
325N/A return ALL_ASCII;
325N/A if (ascii > non_ascii)
325N/A return MOSTLY_ASCII;
325N/A
325N/A return MOSTLY_NONASCII;
325N/A }
325N/A
325N/A /**
325N/A * Check if the given byte array contains non US-ASCII characters.
325N/A * @param b byte array
325N/A * @return ALL_ASCII if all characters in the string
325N/A * belong to the US-ASCII charset. MOSTLY_ASCII
325N/A * if more than half of the available characters
325N/A * are US-ASCII characters. Else MOSTLY_NONASCII.
325N/A *
325N/A * XXX - this method is no longer used
325N/A */
325N/A static int checkAscii(byte[] b) {
325N/A int ascii = 0, non_ascii = 0;
325N/A
325N/A for (int i=0; i < b.length; i++) {
325N/A // The '&' operator automatically causes b[i] to be promoted
325N/A // to an int, and we mask out the higher bytes in the int
325N/A // so that the resulting value is not a negative integer.
325N/A if (nonascii(b[i] & 0xff)) // non-ascii
325N/A non_ascii++;
325N/A else
325N/A ascii++;
325N/A }
325N/A
325N/A if (non_ascii == 0)
325N/A return ALL_ASCII;
325N/A if (ascii > non_ascii)
325N/A return MOSTLY_ASCII;
325N/A
325N/A return MOSTLY_NONASCII;
325N/A }
325N/A
325N/A /**
325N/A * Check if the given input stream contains non US-ASCII characters.
325N/A * Upto <code>max</code> bytes are checked. If <code>max</code> is
325N/A * set to <code>ALL</code>, then all the bytes available in this
325N/A * input stream are checked. If <code>breakOnNonAscii</code> is true
325N/A * the check terminates when the first non-US-ASCII character is
325N/A * found and MOSTLY_NONASCII is returned. Else, the check continues
325N/A * till <code>max</code> bytes or till the end of stream.
325N/A *
325N/A * @param is the input stream
325N/A * @param max maximum bytes to check for. The special value
325N/A * ALL indicates that all the bytes in this input
325N/A * stream must be checked.
325N/A * @param breakOnNonAscii if <code>true</code>, then terminate the
325N/A * the check when the first non-US-ASCII character
325N/A * is found.
325N/A * @return ALL_ASCII if all characters in the string
325N/A * belong to the US-ASCII charset. MOSTLY_ASCII
325N/A * if more than half of the available characters
325N/A * are US-ASCII characters. Else MOSTLY_NONASCII.
325N/A */
325N/A static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
325N/A int ascii = 0, non_ascii = 0;
325N/A int len;
325N/A int block = 4096;
325N/A int linelen = 0;
325N/A boolean longLine = false, badEOL = false;
325N/A boolean checkEOL = encodeEolStrict && breakOnNonAscii;
325N/A byte buf[] = null;
325N/A if (max != 0) {
325N/A block = (max == ALL) ? 4096 : Math.min(max, 4096);
325N/A buf = new byte[block];
325N/A }
325N/A while (max != 0) {
325N/A try {
325N/A if ((len = is.read(buf, 0, block)) == -1)
325N/A break;
325N/A int lastb = 0;
325N/A for (int i = 0; i < len; i++) {
325N/A // The '&' operator automatically causes b[i] to
325N/A // be promoted to an int, and we mask out the higher
325N/A // bytes in the int so that the resulting value is
325N/A // not a negative integer.
325N/A int b = buf[i] & 0xff;
325N/A if (checkEOL &&
325N/A ((lastb == '\r' && b != '\n') ||
325N/A (lastb != '\r' && b == '\n')))
325N/A badEOL = true;
325N/A if (b == '\r' || b == '\n')
325N/A linelen = 0;
325N/A else {
325N/A linelen++;
325N/A if (linelen > 998) // 1000 - CRLF
325N/A longLine = true;
325N/A }
325N/A if (nonascii(b)) { // non-ascii
325N/A if (breakOnNonAscii) // we are done
325N/A return MOSTLY_NONASCII;
325N/A else
325N/A non_ascii++;
325N/A } else
325N/A ascii++;
325N/A lastb = b;
325N/A }
325N/A } catch (IOException ioex) {
325N/A break;
325N/A }
325N/A if (max != ALL)
325N/A max -= len;
325N/A }
325N/A
325N/A if (max == 0 && breakOnNonAscii)
325N/A // We have been told to break on the first non-ascii character.
325N/A // We haven't got any non-ascii character yet, but then we
325N/A // have not checked all of the available bytes either. So we
325N/A // cannot say for sure that this input stream is ALL_ASCII,
325N/A // and hence we must play safe and return MOSTLY_NONASCII
325N/A
325N/A return MOSTLY_NONASCII;
325N/A
325N/A if (non_ascii == 0) { // no non-us-ascii characters so far
325N/A // If we're looking at non-text data, and we saw CR without LF
325N/A // or vice versa, consider this mostly non-ASCII so that it
325N/A // will be base64 encoded (since the quoted-printable encoder
325N/A // doesn't encode this case properly).
325N/A if (badEOL)
325N/A return MOSTLY_NONASCII;
325N/A // if we've seen a long line, we degrade to mostly ascii
325N/A else if (longLine)
325N/A return MOSTLY_ASCII;
325N/A else
325N/A return ALL_ASCII;
325N/A }
325N/A if (ascii > non_ascii) // mostly ascii
325N/A return MOSTLY_ASCII;
325N/A return MOSTLY_NONASCII;
325N/A }
325N/A
325N/A static final boolean nonascii(int b) {
325N/A return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
325N/A }
325N/A}
325N/A
325N/A/**
325N/A * An OutputStream that determines whether the data written to
325N/A * it is all ASCII, mostly ASCII, or mostly non-ASCII.
325N/A */
325N/Aclass AsciiOutputStream extends OutputStream {
325N/A private boolean breakOnNonAscii;
325N/A private int ascii = 0, non_ascii = 0;
325N/A private int linelen = 0;
325N/A private boolean longLine = false;
325N/A private boolean badEOL = false;
325N/A private boolean checkEOL = false;
325N/A private int lastb = 0;
325N/A private int ret = 0;
325N/A
325N/A public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
325N/A this.breakOnNonAscii = breakOnNonAscii;
325N/A checkEOL = encodeEolStrict && breakOnNonAscii;
325N/A }
325N/A
325N/A public void write(int b) throws IOException {
325N/A check(b);
325N/A }
325N/A
325N/A public void write(byte b[]) throws IOException {
325N/A write(b, 0, b.length);
325N/A }
325N/A
325N/A public void write(byte b[], int off, int len) throws IOException {
325N/A len += off;
325N/A for (int i = off; i < len ; i++)
325N/A check(b[i]);
325N/A }
325N/A
325N/A private final void check(int b) throws IOException {
325N/A b &= 0xff;
325N/A if (checkEOL &&
325N/A ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
325N/A badEOL = true;
325N/A if (b == '\r' || b == '\n')
325N/A linelen = 0;
325N/A else {
325N/A linelen++;
325N/A if (linelen > 998) // 1000 - CRLF
325N/A longLine = true;
325N/A }
325N/A if (MimeUtility.nonascii(b)) { // non-ascii
325N/A non_ascii++;
325N/A if (breakOnNonAscii) { // we are done
325N/A ret = MimeUtility.MOSTLY_NONASCII;
325N/A throw new EOFException();
325N/A }
325N/A } else
325N/A ascii++;
325N/A lastb = b;
325N/A }
325N/A
325N/A /**
325N/A * Return ASCII-ness of data stream.
325N/A */
325N/A public int getAscii() {
325N/A if (ret != 0)
325N/A return ret;
325N/A // If we're looking at non-text data, and we saw CR without LF
325N/A // or vice versa, consider this mostly non-ASCII so that it
325N/A // will be base64 encoded (since the quoted-printable encoder
325N/A // doesn't encode this case properly).
325N/A if (badEOL)
325N/A return MimeUtility.MOSTLY_NONASCII;
325N/A else if (non_ascii == 0) { // no non-us-ascii characters so far
325N/A // if we've seen a long line, we degrade to mostly ascii
325N/A if (longLine)
325N/A return MimeUtility.MOSTLY_ASCII;
325N/A else
325N/A return MimeUtility.ALL_ASCII;
325N/A }
325N/A if (ascii > non_ascii) // mostly ascii
325N/A return MimeUtility.MOSTLY_ASCII;
325N/A return MimeUtility.MOSTLY_NONASCII;
325N/A }
325N/A}