javac/parser/Scanner.java

	Scanner.java revision 408
3863N/A/*
4168N/A * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
3863N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3863N/A *
3863N/A * This code is free software; you can redistribute it and/or modify it
3863N/A * under the terms of the GNU General Public License version 2 only, as
3863N/A * published by the Free Software Foundation.  Sun designates this
3863N/A * particular file as subject to the "Classpath" exception as provided
3863N/A * by Sun in the LICENSE file that accompanied this code.
3863N/A *
3863N/A * This code is distributed in the hope that it will be useful, but WITHOUT
3863N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
3863N/A * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
3863N/A * version 2 for more details (a copy is included in the LICENSE file that
3863N/A * accompanied this code).
3863N/A *
3863N/A * You should have received a copy of the GNU General Public License version
3863N/A * 2 along with this work; if not, write to the Free Software Foundation,
3863N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
3863N/A *
3863N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
3863N/A * CA 95054 USA or visit www.sun.com if you need additional information or
3863N/A * have any questions.
3863N/A */
3863N/A
3863N/Apackage com.sun.tools.javac.parser;
3863N/A
3863N/Aimport java.nio.*;
3863N/A
3863N/Aimport com.sun.tools.javac.code.Source;
3863N/Aimport com.sun.tools.javac.file.JavacFileManager;
3863N/Aimport com.sun.tools.javac.util.*;
3863N/A
3863N/A
3863N/Aimport static com.sun.tools.javac.parser.Token.*;
3863N/Aimport static com.sun.tools.javac.util.LayoutCharacters.*;
3863N/A
3863N/A/** The lexical analyzer maps an input stream consisting of
3863N/A *  ASCII characters and Unicode escapes into a token sequence.
3863N/A *
4141N/A *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
3863N/A *  you write code that depends on this, you do so at your own risk.
4064N/A *  This code and its internal interfaces are subject to change or
4186N/A *  deletion without notice.</b>
3863N/A */
3863N/Apublic class Scanner implements Lexer {
3863N/A
3863N/A    private static boolean scannerDebug = false;
3863N/A
3863N/A    /** A factory for creating scanners. */
3863N/A    public static class Factory {
3863N/A        /** The context key for the scanner factory. */
3863N/A        public static final Context.Key<Scanner.Factory> scannerFactoryKey =
3863N/A            new Context.Key<Scanner.Factory>();
3863N/A
3863N/A        /** Get the Factory instance for this context. */
3863N/A        public static Factory instance(Context context) {
3863N/A            Factory instance = context.get(scannerFactoryKey);
3863N/A            if (instance == null)
4064N/A                instance = new Factory(context);
3863N/A            return instance;
3863N/A        }
3863N/A
3863N/A        final Log log;
3863N/A        final Names names;
3863N/A        final Source source;
3863N/A        final Keywords keywords;
3863N/A
3863N/A        /** Create a new scanner factory. */
3863N/A        protected Factory(Context context) {
3863N/A            context.put(scannerFactoryKey, this);
3863N/A            this.log = Log.instance(context);
3863N/A            this.names = Names.instance(context);
3863N/A            this.source = Source.instance(context);
3863N/A            this.keywords = Keywords.instance(context);
3863N/A        }
3863N/A
4064N/A        public Scanner newScanner(CharSequence input) {
4064N/A            if (input instanceof CharBuffer) {
4064N/A                return new Scanner(this, (CharBuffer)input);
4064N/A            } else {
4064N/A                char[] array = input.toString().toCharArray();
3863N/A                return newScanner(array, array.length);
3863N/A            }
3863N/A        }
3863N/A
3863N/A        public Scanner newScanner(char[] input, int inputLength) {
3863N/A            return new Scanner(this, input, inputLength);
3863N/A        }
3863N/A    }
3863N/A
3863N/A    /* Output variables; set by nextToken():
3863N/A     */
3863N/A
3863N/A    /** The token, set by nextToken().
4064N/A     */
3863N/A    private Token token;
3863N/A
3863N/A    /** Allow hex floating-point literals.
3863N/A     */
3863N/A    private boolean allowHexFloats;
3863N/A
3863N/A    /** Allow binary literals.
3863N/A     */
3863N/A    private boolean allowBinaryLiterals;
4064N/A
3863N/A    /** Allow underscores in literals.
3863N/A     */
3863N/A    private boolean allowUnderscoresInLiterals;
3863N/A
3863N/A    /** The source language setting.
4064N/A     */
4064N/A    private Source source;
3863N/A
3863N/A    /** The token's position, 0-based offset from beginning of text.
4064N/A     */
4064N/A    private int pos;
4064N/A
4066N/A    /** Character position just after the last character of the token.
4066N/A     */
4066N/A    private int endPos;
4066N/A
4066N/A    /** The last character position of the previous token.
4066N/A     */
4066N/A    private int prevEndPos;
4066N/A
4066N/A    /** The position where a lexical error occurred;
4066N/A     */
4066N/A    private int errPos = Position.NOPOS;
4066N/A
4066N/A    /** The name of an identifier or token:
4066N/A     */
4066N/A    private Name name;
4066N/A
4066N/A    /** The radix of a numeric literal token.
4066N/A     */
4066N/A    private int radix;
3863N/A
3863N/A    /** Has a @deprecated been encountered in last doc comment?
4064N/A     *  this needs to be reset by client.
3863N/A     */
3863N/A    protected boolean deprecatedFlag = false;
3863N/A
3863N/A    /** A character buffer for literals.
3863N/A     */
3863N/A    private char[] sbuf = new char[128];
3863N/A    private int sp;
3863N/A
3863N/A    /** The input buffer, index of next chacter to be read,
3863N/A     *  index of one past last character in buffer.
3863N/A     */
3863N/A    private char[] buf;
3863N/A    private int bp;
3863N/A    private int buflen;
3863N/A    private int eofPos;
4064N/A
4064N/A    /** The current character.
3863N/A     */
4064N/A    private char ch;
4064N/A
4064N/A    /** The buffer index of the last converted unicode character
3863N/A     */
4064N/A    private int unicodeConversionBp = -1;
3863N/A
3863N/A    /** The log to be used for error reporting.
4064N/A     */
4064N/A    private final Log log;
4064N/A
4064N/A    /** The name table. */
4064N/A    private final Names names;
4064N/A
4064N/A    /** The keyword table. */
3863N/A    private final Keywords keywords;
3863N/A
3863N/A    /** Common code for constructors. */
3863N/A    private Scanner(Factory fac) {
3863N/A        log = fac.log;
3863N/A        names = fac.names;
3863N/A        keywords = fac.keywords;
3863N/A        source = fac.source;
3863N/A        allowBinaryLiterals = source.allowBinaryLiterals();
4064N/A        allowHexFloats = source.allowHexFloats();
4064N/A        allowUnderscoresInLiterals = source.allowBinaryLiterals();
4064N/A    }
3863N/A
4064N/A    private static final boolean hexFloatsWork = hexFloatsWork();
3863N/A    private static boolean hexFloatsWork() {
3863N/A        try {
3863N/A            Float.valueOf("0x1.0p1");
4064N/A            return true;
3863N/A        } catch (NumberFormatException ex) {
3863N/A            return false;
3863N/A        }
3863N/A    }
3863N/A
3863N/A    /** Create a scanner from the input buffer.  buffer must implement
4064N/A     *  array() and compact(), and remaining() must be less than limit().
4064N/A     */
4064N/A    protected Scanner(Factory fac, CharBuffer buffer) {
4064N/A        this(fac, JavacFileManager.toArray(buffer), buffer.limit());
4064N/A    }
4064N/A
3863N/A    /**
4064N/A     * Create a scanner from the input array.  This method might
4066N/A     * modify the array.  To avoid copying the input array, ensure
3863N/A     * that {@code inputLength < input.length} or
4064N/A     * {@code input[input.length -1]} is a white space character.
4064N/A     *
4064N/A     * @param fac the factory which created this Scanner
4064N/A     * @param input the input, might be modified
4064N/A     * @param inputLength the size of the input.
4064N/A     * Must be positive and less than or equal to input.length.
3863N/A     */
3863N/A    protected Scanner(Factory fac, char[] input, int inputLength) {
4064N/A        this(fac);
3863N/A        eofPos = inputLength;
4064N/A        if (inputLength == input.length) {
3863N/A            if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
3863N/A                inputLength--;
4064N/A            } else {
3863N/A                char[] newInput = new char[inputLength + 1];
3863N/A                System.arraycopy(input, 0, newInput, 0, input.length);
4064N/A                input = newInput;
4064N/A            }
4064N/A        }
4064N/A        buf = input;
4064N/A        buflen = inputLength;
4064N/A        buf[buflen] = EOI;
4064N/A        bp = -1;
4064N/A        scanChar();
4064N/A    }
4064N/A
3863N/A    /** Report an error at the given position using the provided arguments.
3863N/A     */
3863N/A    private void lexError(int pos, String key, Object... args) {
3863N/A        log.error(pos, key, args);
3863N/A        token = ERROR;
3863N/A        errPos = pos;
3863N/A    }
3863N/A
3863N/A    /** Report an error at the current token position using the provided
3863N/A     *  arguments.
4064N/A     */
4064N/A    private void lexError(String key, Object... args) {
4064N/A        lexError(pos, key, args);
4064N/A    }
3863N/A
4064N/A    /** Convert an ASCII digit from its base (8, 10, or 16)
4064N/A     *  to its value.
4064N/A     */
4064N/A    private int digit(int base) {
4064N/A        char c = ch;
4064N/A        int result = Character.digit(c, base);
4064N/A        if (result >= 0 && c > 0x7f) {
4064N/A            lexError(pos+1, "illegal.nonascii.digit");
4064N/A            ch = "0123456789abcdef".charAt(result);
4064N/A        }
4064N/A        return result;
3863N/A    }
3863N/A
4064N/A    /** Convert unicode escape; bp points to initial '\' character
3863N/A     *  (Spec 3.3).
3863N/A     */
3863N/A    private void convertUnicode() {
3863N/A        if (ch == '\\' && unicodeConversionBp != bp) {
3863N/A            bp++; ch = buf[bp];
3863N/A            if (ch == 'u') {
4064N/A                do {
4064N/A                    bp++; ch = buf[bp];
4064N/A                } while (ch == 'u');
4064N/A                int limit = bp + 3;
4064N/A                if (limit < buflen) {
4064N/A                    int d = digit(16);
4064N/A                    int code = d;
4064N/A                    while (bp < limit && d >= 0) {
4064N/A                        bp++; ch = buf[bp];
4064N/A                        d = digit(16);
4064N/A                        code = (code << 4) + d;
4064N/A                    }
4064N/A                    if (d >= 0) {
4064N/A                        ch = (char)code;
4064N/A                        unicodeConversionBp = bp;
4064N/A                        return;
4064N/A                    }
3863N/A                }
3863N/A                lexError(bp, "illegal.unicode.esc");
4064N/A            } else {
4064N/A                bp--;
4064N/A                ch = '\\';
4064N/A            }
4064N/A        }
3863N/A    }
4064N/A
4064N/A    /** Read next character.
3863N/A     */
3863N/A    private void scanChar() {
4064N/A        ch = buf[++bp];
4064N/A        if (ch == '\\') {
4064N/A            convertUnicode();
4064N/A        }
4064N/A    }
4064N/A
4064N/A    /** Read next character in comment, skipping over double '\' characters.
4064N/A     */
4064N/A    private void scanCommentChar() {
4064N/A        scanChar();
4064N/A        if (ch == '\\') {
4064N/A            if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
4064N/A                bp++;
3863N/A            } else {
4064N/A                convertUnicode();
4064N/A            }
4064N/A        }
4064N/A    }
4064N/A
4064N/A    /** Append a character to sbuf.
4064N/A     */
4064N/A    private void putChar(char ch) {
4064N/A        if (sp == sbuf.length) {
4064N/A            char[] newsbuf = new char[sbuf.length * 2];
4064N/A            System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
4064N/A            sbuf = newsbuf;
4064N/A        }
4064N/A        sbuf[sp++] = ch;
4064N/A    }
4064N/A
4064N/A    /** For debugging purposes: print character.
4064N/A     */
4064N/A    private void dch() {
4064N/A        System.err.print(ch); System.out.flush();
4064N/A    }
4064N/A
4064N/A    /** Read next character in character or string literal and copy into sbuf.
4064N/A     */
4064N/A    private void scanLitChar(boolean forBytecodeName) {
4064N/A        if (ch == '\\') {
4064N/A            if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
4064N/A                bp++;
4064N/A                putChar('\\');
4064N/A                scanChar();
4064N/A            } else {
4064N/A                scanChar();
3863N/A                switch (ch) {
3863N/A                case '0': case '1': case '2': case '3':
3863N/A                case '4': case '5': case '6': case '7':
3863N/A                    char leadch = ch;
3863N/A                    int oct = digit(8);
3863N/A                    scanChar();
3863N/A                    if ('0' <= ch && ch <= '7') {
3863N/A                        oct = oct * 8 + digit(8);
3863N/A                        scanChar();
3863N/A                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
4168N/A                            oct = oct * 8 + digit(8);
3863N/A                            scanChar();
3863N/A                        }
3863N/A                    }
3863N/A                    putChar((char)oct);
3863N/A                    break;
3863N/A                case 'b':
3863N/A                    putChar('\b'); scanChar(); break;
3863N/A                case 't':
3863N/A                    putChar('\t'); scanChar(); break;
3863N/A                case 'n':
3863N/A                    putChar('\n'); scanChar(); break;
3863N/A                case 'f':
3863N/A                    putChar('\f'); scanChar(); break;
3863N/A                case 'r':
3863N/A                    putChar('\r'); scanChar(); break;
3863N/A                case '\'':
3863N/A                    putChar('\''); scanChar(); break;
3863N/A                case '\"':
3863N/A                    putChar('\"'); scanChar(); break;
3863N/A                case '\\':
3863N/A                    putChar('\\'); scanChar(); break;
3863N/A                case '|': case ',': case '?': case '%':
3863N/A                case '^': case '_': case '{': case '}':
3863N/A                case '!': case '-': case '=':
3863N/A                    if (forBytecodeName) {
4064N/A                        // Accept escape sequences for dangerous bytecode chars.
3863N/A                        // This is illegal in normal Java string or character literals.
3863N/A                        // Note that the escape sequence itself is passed through.
3863N/A                        putChar('\\'); putChar(ch); scanChar();
3863N/A                    } else {
3863N/A                        lexError(bp, "illegal.esc.char");
3863N/A                    }
3863N/A                    break;
3863N/A                default:
3863N/A                    lexError(bp, "illegal.esc.char");
3863N/A                }
3863N/A            }
3863N/A        } else if (bp != buflen) {
3863N/A            putChar(ch); scanChar();
3863N/A        }
3863N/A    }
3863N/A    private void scanLitChar() {
3863N/A        scanLitChar(false);
3863N/A    }
3863N/A
3863N/A    /** Read next character in an exotic name #"foo"
3863N/A     */
3863N/A    private void scanBytecodeNameChar() {
3863N/A        switch (ch) {
3863N/A        // reject any "dangerous" char which is illegal somewhere in the JVM spec
3863N/A        // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
3863N/A        case '/': case '.': case ';':  // illegal everywhere
3863N/A        case '<': case '>':  // illegal in methods, dangerous in classes
3863N/A        case '[':  // illegal in classes
3863N/A            lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
3863N/A            break;
3863N/A        }
3863N/A        scanLitChar(true);
3863N/A    }
3863N/A
3863N/A    private void scanDigits(int digitRadix) {
3863N/A        char saveCh;
3863N/A        int savePos;
3863N/A        do {
3863N/A            if (ch != '_') {
3863N/A                putChar(ch);
3863N/A            } else {
3863N/A                if (!allowUnderscoresInLiterals) {
3863N/A                    lexError("unsupported.underscore", source.name);
3863N/A                    allowUnderscoresInLiterals = true;
3863N/A                }
3863N/A            }
3863N/A            saveCh = ch;
3863N/A            savePos = bp;
3863N/A            scanChar();
3863N/A        } while (digit(digitRadix) >= 0 || ch == '_');
3863N/A        if (saveCh == '_')
3863N/A            lexError(savePos, "illegal.underscore");
3863N/A    }
3863N/A
3863N/A    /** Read fractional part of hexadecimal floating point number.
3863N/A     */
3863N/A    private void scanHexExponentAndSuffix() {
3863N/A        if (ch == 'p' || ch == 'P') {
3863N/A            putChar(ch);
3863N/A            scanChar();
3863N/A            skipIllegalUnderscores();
3863N/A            if (ch == '+' || ch == '-') {
3863N/A                putChar(ch);
3863N/A                scanChar();
3863N/A            }
3863N/A            skipIllegalUnderscores();
3863N/A            if ('0' <= ch && ch <= '9') {
3863N/A                scanDigits(10);
3863N/A                if (!allowHexFloats) {
3863N/A                    lexError("unsupported.fp.lit", source.name);
3863N/A                    allowHexFloats = true;
3863N/A                }
3863N/A                else if (!hexFloatsWork)
3863N/A                    lexError("unsupported.cross.fp.lit");
3863N/A            } else
3863N/A                lexError("malformed.fp.lit");
3863N/A        } else {
3863N/A            lexError("malformed.fp.lit");
3863N/A        }
3863N/A        if (ch == 'f' || ch == 'F') {
3863N/A            putChar(ch);
3863N/A            scanChar();
3863N/A            token = FLOATLITERAL;
3863N/A        } else {
3863N/A            if (ch == 'd' || ch == 'D') {
3863N/A                putChar(ch);
3863N/A                scanChar();
3863N/A            }
3863N/A            token = DOUBLELITERAL;
3863N/A        }
    }

    /** Read fractional part of floating point number.
     */
    private void scanFraction() {
        skipIllegalUnderscores();
        if ('0' <= ch && ch <= '9') {
            scanDigits(10);
        }
        int sp1 = sp;
        if (ch == 'e' || ch == 'E') {
            putChar(ch);
            scanChar();
            skipIllegalUnderscores();
            if (ch == '+' || ch == '-') {
                putChar(ch);
                scanChar();
            }
            skipIllegalUnderscores();
            if ('0' <= ch && ch <= '9') {
                scanDigits(10);
                return;
            }
            lexError("malformed.fp.lit");
            sp = sp1;
        }
    }

    /** Read fractional part and 'd' or 'f' suffix of floating point number.
     */
    private void scanFractionAndSuffix() {
        this.radix = 10;
        scanFraction();
        if (ch == 'f' || ch == 'F') {
            putChar(ch);
            scanChar();
            token = FLOATLITERAL;
        } else {
            if (ch == 'd' || ch == 'D') {
                putChar(ch);
                scanChar();
            }
            token = DOUBLELITERAL;
        }
    }

    /** Read fractional part and 'd' or 'f' suffix of floating point number.
     */
    private void scanHexFractionAndSuffix(boolean seendigit) {
        this.radix = 16;
        assert ch == '.';
        putChar(ch);
        scanChar();
        skipIllegalUnderscores();
        if (digit(16) >= 0) {
            seendigit = true;
            scanDigits(16);
        }
        if (!seendigit)
            lexError("invalid.hex.number");
        else
            scanHexExponentAndSuffix();
    }

    private void skipIllegalUnderscores() {
        if (ch == '_') {
            lexError(bp, "illegal.underscore");
            while (ch == '_')
                scanChar();
        }
    }

    /** Read a number.
     *  @param radix  The radix of the number; one of 2, j8, 10, 16.
     */
    private void scanNumber(int radix) {
        this.radix = radix;
        // for octal, allow base-10 digit in case it's a float literal
        int digitRadix = (radix == 8 ? 10 : radix);
        boolean seendigit = false;
        if (digit(digitRadix) >= 0) {
            seendigit = true;
            scanDigits(digitRadix);
        }
        if (radix == 16 && ch == '.') {
            scanHexFractionAndSuffix(seendigit);
        } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
            scanHexExponentAndSuffix();
        } else if (digitRadix == 10 && ch == '.') {
            putChar(ch);
            scanChar();
            scanFractionAndSuffix();
        } else if (digitRadix == 10 &&
                   (ch == 'e' || ch == 'E' ||
                    ch == 'f' || ch == 'F' ||
                    ch == 'd' || ch == 'D')) {
            scanFractionAndSuffix();
        } else {
            if (ch == 'l' || ch == 'L') {
                scanChar();
                token = LONGLITERAL;
            } else {
                token = INTLITERAL;
            }
        }
    }

    /** Read an identifier.
     */
    private void scanIdent() {
        boolean isJavaIdentifierPart;
        char high;
        do {
            if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
            // optimization, was: putChar(ch);

            scanChar();
            switch (ch) {
            case 'A': case 'B': case 'C': case 'D': case 'E':
            case 'F': case 'G': case 'H': case 'I': case 'J':
            case 'K': case 'L': case 'M': case 'N': case 'O':
            case 'P': case 'Q': case 'R': case 'S': case 'T':
            case 'U': case 'V': case 'W': case 'X': case 'Y':
            case 'Z':
            case 'a': case 'b': case 'c': case 'd': case 'e':
            case 'f': case 'g': case 'h': case 'i': case 'j':
            case 'k': case 'l': case 'm': case 'n': case 'o':
            case 'p': case 'q': case 'r': case 's': case 't':
            case 'u': case 'v': case 'w': case 'x': case 'y':
            case 'z':
            case '$': case '_':
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
            case '\u0015': case '\u0016': case '\u0017':
            case '\u0018': case '\u0019': case '\u001B':
            case '\u007F':
                break;
            case '\u001A': // EOI is also a legal identifier part
                if (bp >= buflen) {
                    name = names.fromChars(sbuf, 0, sp);
                    token = keywords.key(name);
                    return;
                }
                break;
            default:
                if (ch < '\u0080') {
                    // all ASCII range chars already handled, above
                    isJavaIdentifierPart = false;
                } else {
                    high = scanSurrogates();
                    if (high != 0) {
                        if (sp == sbuf.length) {
                            putChar(high);
                        } else {
                            sbuf[sp++] = high;
                        }
                        isJavaIdentifierPart = Character.isJavaIdentifierPart(
                            Character.toCodePoint(high, ch));
                    } else {
                        isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
                    }
                }
                if (!isJavaIdentifierPart) {
                    name = names.fromChars(sbuf, 0, sp);
                    token = keywords.key(name);
                    return;
                }
            }
        } while (true);
    }

    /** Are surrogates supported?
     */
    final static boolean surrogatesSupported = surrogatesSupported();
    private static boolean surrogatesSupported() {
        try {
            Character.isHighSurrogate('a');
            return true;
        } catch (NoSuchMethodError ex) {
            return false;
        }
    }

    /** Scan surrogate pairs.  If 'ch' is a high surrogate and
     *  the next character is a low surrogate, then put the low
     *  surrogate in 'ch', and return the high surrogate.
     *  otherwise, just return 0.
     */
    private char scanSurrogates() {
        if (surrogatesSupported && Character.isHighSurrogate(ch)) {
            char high = ch;

            scanChar();

            if (Character.isLowSurrogate(ch)) {
                return high;
            }

            ch = high;
        }

        return 0;
    }

    /** Return true if ch can be part of an operator.
     */
    private boolean isSpecial(char ch) {
        switch (ch) {
        case '!': case '%': case '&': case '*': case '?':
        case '+': case '-': case ':': case '<': case '=':
        case '>': case '^': case '|': case '~':
        case '@':
            return true;
        default:
            return false;
        }
    }

    /** Read longest possible sequence of special characters and convert
     *  to token.
     */
    private void scanOperator() {
        while (true) {
            putChar(ch);
            Name newname = names.fromChars(sbuf, 0, sp);
            if (keywords.key(newname) == IDENTIFIER) {
                sp--;
                break;
            }
            name = newname;
            token = keywords.key(newname);
            scanChar();
            if (!isSpecial(ch)) break;
        }
    }

    /**
     * Scan a documention comment; determine if a deprecated tag is present.
     * Called once the initial /, * have been skipped, positioned at the second *
     * (which is treated as the beginning of the first line).
     * Stops positioned at the closing '/'.
     */
    @SuppressWarnings("fallthrough")
    private void scanDocComment() {
        boolean deprecatedPrefix = false;

        forEachLine:
        while (bp < buflen) {

            // Skip optional WhiteSpace at beginning of line
            while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
                scanCommentChar();
            }

            // Skip optional consecutive Stars
            while (bp < buflen && ch == '*') {
                scanCommentChar();
                if (ch == '/') {
                    return;
                }
            }

            // Skip optional WhiteSpace after Stars
            while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
                scanCommentChar();
            }

            deprecatedPrefix = false;
            // At beginning of line in the JavaDoc sense.
            if (bp < buflen && ch == '@' && !deprecatedFlag) {
                scanCommentChar();
                if (bp < buflen && ch == 'd') {
                    scanCommentChar();
                    if (bp < buflen && ch == 'e') {
                        scanCommentChar();
                        if (bp < buflen && ch == 'p') {
                            scanCommentChar();
                            if (bp < buflen && ch == 'r') {
                                scanCommentChar();
                                if (bp < buflen && ch == 'e') {
                                    scanCommentChar();
                                    if (bp < buflen && ch == 'c') {
                                        scanCommentChar();
                                        if (bp < buflen && ch == 'a') {
                                            scanCommentChar();
                                            if (bp < buflen && ch == 't') {
                                                scanCommentChar();
                                                if (bp < buflen && ch == 'e') {
                                                    scanCommentChar();
                                                    if (bp < buflen && ch == 'd') {
                                                        deprecatedPrefix = true;
                                                        scanCommentChar();
                                                    }}}}}}}}}}}
            if (deprecatedPrefix && bp < buflen) {
                if (Character.isWhitespace(ch)) {
                    deprecatedFlag = true;
                } else if (ch == '*') {
                    scanCommentChar();
                    if (ch == '/') {
                        deprecatedFlag = true;
                        return;
                    }
                }
            }

            // Skip rest of line
            while (bp < buflen) {
                switch (ch) {
                case '*':
                    scanCommentChar();
                    if (ch == '/') {
                        return;
                    }
                    break;
                case CR: // (Spec 3.4)
                    scanCommentChar();
                    if (ch != LF) {
                        continue forEachLine;
                    }
                    /* fall through to LF case */
                case LF: // (Spec 3.4)
                    scanCommentChar();
                    continue forEachLine;
                default:
                    scanCommentChar();
                }
            } // rest of line
        } // forEachLine
        return;
    }

    /** The value of a literal token, recorded as a string.
     *  For integers, leading 0x and 'l' suffixes are suppressed.
     */
    public String stringVal() {
        return new String(sbuf, 0, sp);
    }

    /** Read token.
     */
    public void nextToken() {

        try {
            prevEndPos = endPos;
            sp = 0;

            while (true) {
                pos = bp;
                switch (ch) {
                case ' ': // (Spec 3.6)
                case '\t': // (Spec 3.6)
                case FF: // (Spec 3.6)
                    do {
                        scanChar();
                    } while (ch == ' ' || ch == '\t' || ch == FF);
                    endPos = bp;
                    processWhiteSpace();
                    break;
                case LF: // (Spec 3.4)
                    scanChar();
                    endPos = bp;
                    processLineTerminator();
                    break;
                case CR: // (Spec 3.4)
                    scanChar();
                    if (ch == LF) {
                        scanChar();
                    }
                    endPos = bp;
                    processLineTerminator();
                    break;
                case 'A': case 'B': case 'C': case 'D': case 'E':
                case 'F': case 'G': case 'H': case 'I': case 'J':
                case 'K': case 'L': case 'M': case 'N': case 'O':
                case 'P': case 'Q': case 'R': case 'S': case 'T':
                case 'U': case 'V': case 'W': case 'X': case 'Y':
                case 'Z':
                case 'a': case 'b': case 'c': case 'd': case 'e':
                case 'f': case 'g': case 'h': case 'i': case 'j':
                case 'k': case 'l': case 'm': case 'n': case 'o':
                case 'p': case 'q': case 'r': case 's': case 't':
                case 'u': case 'v': case 'w': case 'x': case 'y':
                case 'z':
                case '$': case '_':
                    scanIdent();
                    return;
                case '0':
                    scanChar();
                    if (ch == 'x' || ch == 'X') {
                        scanChar();
                        skipIllegalUnderscores();
                        if (ch == '.') {
                            scanHexFractionAndSuffix(false);
                        } else if (digit(16) < 0) {
                            lexError("invalid.hex.number");
                        } else {
                            scanNumber(16);
                        }
                    } else if (ch == 'b' || ch == 'B') {
                        if (!allowBinaryLiterals) {
                            lexError("unsupported.binary.lit", source.name);
                            allowBinaryLiterals = true;
                        }
                        scanChar();
                        skipIllegalUnderscores();
                        scanNumber(2);
                    } else {
                        putChar('0');
                        if (ch == '_') {
                            int savePos = bp;
                            do {
                                scanChar();
                            } while (ch == '_');
                            if (digit(10) < 0) {
                                lexError(savePos, "illegal.underscore");
                            }
                        }
                        scanNumber(8);
                    }
                    return;
                case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                    scanNumber(10);
                    return;
                case '.':
                    scanChar();
                    if ('0' <= ch && ch <= '9') {
                        putChar('.');
                        scanFractionAndSuffix();
                    } else if (ch == '.') {
                        putChar('.'); putChar('.');
                        scanChar();
                        if (ch == '.') {
                            scanChar();
                            putChar('.');
                            token = ELLIPSIS;
                        } else {
                            lexError("malformed.fp.lit");
                        }
                    } else {
                        token = DOT;
                    }
                    return;
                case ',':
                    scanChar(); token = COMMA; return;
                case ';':
                    scanChar(); token = SEMI; return;
                case '(':
                    scanChar(); token = LPAREN; return;
                case ')':
                    scanChar(); token = RPAREN; return;
                case '[':
                    scanChar(); token = LBRACKET; return;
                case ']':
                    scanChar(); token = RBRACKET; return;
                case '{':
                    scanChar(); token = LBRACE; return;
                case '}':
                    scanChar(); token = RBRACE; return;
                case '/':
                    scanChar();
                    if (ch == '/') {
                        do {
                            scanCommentChar();
                        } while (ch != CR && ch != LF && bp < buflen);
                        if (bp < buflen) {
                            endPos = bp;
                            processComment(CommentStyle.LINE);
                        }
                        break;
                    } else if (ch == '*') {
                        scanChar();
                        CommentStyle style;
                        if (ch == '*') {
                            style = CommentStyle.JAVADOC;
                            scanDocComment();
                        } else {
                            style = CommentStyle.BLOCK;
                            while (bp < buflen) {
                                if (ch == '*') {
                                    scanChar();
                                    if (ch == '/') break;
                                } else {
                                    scanCommentChar();
                                }
                            }
                        }
                        if (ch == '/') {
                            scanChar();
                            endPos = bp;
                            processComment(style);
                            break;
                        } else {
                            lexError("unclosed.comment");
                            return;
                        }
                    } else if (ch == '=') {
                        name = names.slashequals;
                        token = SLASHEQ;
                        scanChar();
                    } else {
                        name = names.slash;
                        token = SLASH;
                    }
                    return;
                case '\'':
                    scanChar();
                    if (ch == '\'') {
                        lexError("empty.char.lit");
                    } else {
                        if (ch == CR || ch == LF)
                            lexError(pos, "illegal.line.end.in.char.lit");
                        scanLitChar();
                        if (ch == '\'') {
                            scanChar();
                            token = CHARLITERAL;
                        } else {
                            lexError(pos, "unclosed.char.lit");
                        }
                    }
                    return;
                case '\"':
                    scanChar();
                    while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
                        scanLitChar();
                    if (ch == '\"') {
                        token = STRINGLITERAL;
                        scanChar();
                    } else {
                        lexError(pos, "unclosed.str.lit");
                    }
                    return;
                case '#':
                    scanChar();
                    if (ch == '\"') {
                        scanChar();
                        if (ch == '\"')
                            lexError(pos, "empty.bytecode.ident");
                        while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
                            scanBytecodeNameChar();
                        }
                        if (ch == '\"') {
                            name = names.fromChars(sbuf, 0, sp);
                            token = IDENTIFIER;  // even if #"int" or #"do"
                            scanChar();
                        } else {
                            lexError(pos, "unclosed.bytecode.ident");
                        }
                    } else {
                        lexError("illegal.char", String.valueOf((int)'#'));
                    }
                    return;
                default:
                    if (isSpecial(ch)) {
                        scanOperator();
                    } else {
                        boolean isJavaIdentifierStart;
                        if (ch < '\u0080') {
                            // all ASCII range chars already handled, above
                            isJavaIdentifierStart = false;
                        } else {
                            char high = scanSurrogates();
                            if (high != 0) {
                                if (sp == sbuf.length) {
                                    putChar(high);
                                } else {
                                    sbuf[sp++] = high;
                                }

                                isJavaIdentifierStart = Character.isJavaIdentifierStart(
                                    Character.toCodePoint(high, ch));
                            } else {
                                isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
                            }
                        }
                        if (isJavaIdentifierStart) {
                            scanIdent();
                        } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
                            token = EOF;
                            pos = bp = eofPos;
                        } else {
                            lexError("illegal.char", String.valueOf((int)ch));
                            scanChar();
                        }
                    }
                    return;
                }
            }
        } finally {
            endPos = bp;
            if (scannerDebug)
                System.out.println("nextToken(" + pos
                                   + "," + endPos + ")=|" +
                                   new String(getRawCharacters(pos, endPos))
                                   + "|");
        }
    }

    /** Return the current token, set by nextToken().
     */
    public Token token() {
        return token;
    }

    /** Sets the current token.
     */
    public void token(Token token) {
        this.token = token;
    }

    /** Return the current token's position: a 0-based
     *  offset from beginning of the raw input stream
     *  (before unicode translation)
     */
    public int pos() {
        return pos;
    }

    /** Return the last character position of the current token.
     */
    public int endPos() {
        return endPos;
    }

    /** Return the last character position of the previous token.
     */
    public int prevEndPos() {
        return prevEndPos;
    }

    /** Return the position where a lexical error occurred;
     */
    public int errPos() {
        return errPos;
    }

    /** Set the position where a lexical error occurred;
     */
    public void errPos(int pos) {
        errPos = pos;
    }

    /** Return the name of an identifier or token for the current token.
     */
    public Name name() {
        return name;
    }

    /** Return the radix of a numeric literal token.
     */
    public int radix() {
        return radix;
    }

    /** Has a @deprecated been encountered in last doc comment?
     *  This needs to be reset by client with resetDeprecatedFlag.
     */
    public boolean deprecatedFlag() {
        return deprecatedFlag;
    }

    public void resetDeprecatedFlag() {
        deprecatedFlag = false;
    }

    /**
     * Returns the documentation string of the current token.
     */
    public String docComment() {
        return null;
    }

    /**
     * Returns a copy of the input buffer, up to its inputLength.
     * Unicode escape sequences are not translated.
     */
    public char[] getRawCharacters() {
        char[] chars = new char[buflen];
        System.arraycopy(buf, 0, chars, 0, buflen);
        return chars;
    }

    /**
     * Returns a copy of a character array subset of the input buffer.
     * The returned array begins at the <code>beginIndex</code> and
     * extends to the character at index <code>endIndex - 1</code>.
     * Thus the length of the substring is <code>endIndex-beginIndex</code>.
     * This behavior is like
     * <code>String.substring(beginIndex, endIndex)</code>.
     * Unicode escape sequences are not translated.
     *
     * @param beginIndex the beginning index, inclusive.
     * @param endIndex the ending index, exclusive.
     * @throws IndexOutOfBounds if either offset is outside of the
     *         array bounds
     */
    public char[] getRawCharacters(int beginIndex, int endIndex) {
        int length = endIndex - beginIndex;
        char[] chars = new char[length];
        System.arraycopy(buf, beginIndex, chars, 0, length);
        return chars;
    }

    public enum CommentStyle {
        LINE,
        BLOCK,
        JAVADOC,
    }

    /**
     * Called when a complete comment has been scanned. pos and endPos
     * will mark the comment boundary.
     */
    protected void processComment(CommentStyle style) {
        if (scannerDebug)
            System.out.println("processComment(" + pos
                               + "," + endPos + "," + style + ")=|"
                               + new String(getRawCharacters(pos, endPos))
                               + "|");
    }

    /**
     * Called when a complete whitespace run has been scanned. pos and endPos
     * will mark the whitespace boundary.
     */
    protected void processWhiteSpace() {
        if (scannerDebug)
            System.out.println("processWhitespace(" + pos
                               + "," + endPos + ")=|" +
                               new String(getRawCharacters(pos, endPos))
                               + "|");
    }

    /**
     * Called when a line terminator has been processed.
     */
    protected void processLineTerminator() {
        if (scannerDebug)
            System.out.println("processTerminator(" + pos
                               + "," + endPos + ")=|" +
                               new String(getRawCharacters(pos, endPos))
                               + "|");
    }

    /** Build a map for translating between line numbers and
     * positions in the input.
     *
     * @return a LineMap */
    public Position.LineMap getLineMap() {
        return Position.makeLineMap(buf, buflen, false);
    }

}