tools/native2ascii/A2NFilter.java

	A2NFilter.java revision 0
0N/A/*
0N/A * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
0N/A * published by the Free Software Foundation.  Sun designates this
0N/A * particular file as subject to the "Classpath" exception as provided
0N/A * by Sun in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
0N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0N/A * CA 95054 USA or visit www.sun.com if you need additional information or
0N/A * have any questions.
0N/A */
0N/A
0N/A/**
0N/A * This FilterReader class processes a sequence of characters from
0N/A * a source stream containing a mixture of 7-bit ASCII data and
0N/A * 'back-tick U' escaped sequences representing characters which have
0N/A * the possibility of being encoded in a user specified encoding
0N/A * The filter relies on knowing the target encoding and makes a
0N/A * determination as to whether a given supplied character in its
0N/A * source character stream is encodeable in the target encoding.
0N/A * If not, it is remains in its back-tick U escaped form.
0N/A */
0N/A
0N/Apackage sun.tools.native2ascii;
0N/Aimport java.io.*;
0N/A
0N/A
0N/Aclass A2NFilter extends FilterReader {
0N/A
0N/A    // maintain a trailing buffer to hold any incompleted
0N/A    // unicode escaped sequences
0N/A    private char[] trailChars = null;
0N/A
0N/A    public A2NFilter(Reader in) {
0N/A        super(in);
0N/A    }
0N/A
0N/A    public int read(char[] buf, int off, int len) throws IOException {
0N/A        int numChars = 0;        // how many characters have been read
0N/A        int retChars = 0;        // how many characters we'll return
0N/A
0N/A        char[] cBuf = new char[len];
0N/A        int cOffset = 0;         // offset at which we'll start reading
0N/A        boolean eof = false;
0N/A
0N/A        // copy trailing chars from previous invocation to input buffer
0N/A        if (trailChars != null) {
0N/A            for (int i = 0; i < trailChars.length; i++)
0N/A                cBuf[i] = trailChars[i];
0N/A            numChars = trailChars.length;
0N/A            trailChars = null;
0N/A        }
0N/A
0N/A        int n = in.read(cBuf, numChars, len - numChars);
0N/A        if (n < 0) {
0N/A            eof = true;
0N/A            if (numChars == 0)
0N/A                return -1;              // EOF;
0N/A        } else {
0N/A            numChars += n;
0N/A        }
0N/A
0N/A        for (int i = 0; i < numChars;) {
0N/A            char c = cBuf[i++];
0N/A
0N/A            if (c != '\\' || (eof && numChars <= 5)) {
0N/A                // Not a backslash, so copy and continue
0N/A                // Always pass non backslash chars straight thru
0N/A                // for regular encoding. If backslash occurs in
0N/A                // input stream at the final 5 chars then don't
0N/A                // attempt to read-ahead and de-escape since these
0N/A                // are literal occurrences of U+005C which need to
0N/A                // be encoded verbatim in the target encoding.
0N/A                buf[retChars++] = c;
0N/A                continue;
0N/A            }
0N/A
0N/A            int remaining = numChars - i;
0N/A            if (remaining < 5) {
0N/A                // Might be the first character of a unicode escape, but we
0N/A                // don't have enough characters to tell, so save it and finish
0N/A                trailChars = new char[1 + remaining];
0N/A                trailChars[0] = c;
0N/A                for (int j = 0; j < remaining; j++)
0N/A                    trailChars[1 + j] = cBuf[i + j];
0N/A                break;
0N/A            }
0N/A            // At this point we have at least five characters remaining
0N/A
0N/A            c = cBuf[i++];
0N/A            if (c != 'u') {
0N/A                // Not a unicode escape, so copy and continue
0N/A                buf[retChars++] = '\\';
0N/A                buf[retChars++] = c;
0N/A                continue;
0N/A            }
0N/A
0N/A            // The next four characters are the hex part of a unicode escape
0N/A            char rc = 0;
0N/A            boolean isUE = true;
0N/A            try {
0N/A                rc = (char)Integer.parseInt(new String(cBuf, i, 4), 16);
0N/A            } catch (NumberFormatException x) {
0N/A                isUE = false;
0N/A            }
0N/A            if (isUE && Main.canConvert(rc)) {
0N/A                // We'll be able to convert this
0N/A                buf[retChars++] = rc;
0N/A                i += 4; // Align beyond the current uXXXX sequence
0N/A            } else {
0N/A                // We won't, so just retain the original sequence
0N/A                buf[retChars++] = '\\';
0N/A                buf[retChars++] = 'u';
0N/A                continue;
0N/A            }
0N/A
0N/A        }
0N/A
0N/A        return retChars;
0N/A    }
0N/A
0N/A    public int read() throws IOException {
0N/A        char[] buf = new char[1];
0N/A
0N/A        if (read(buf, 0, 1) == -1)
0N/A            return -1;
0N/A        else
0N/A            return (int)buf[0];
0N/A    }
0N/A
0N/A}