Scanner.java revision 596
2830N/A/*
2830N/A * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
2830N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
2830N/A *
2830N/A * This code is free software; you can redistribute it and/or modify it
2830N/A * under the terms of the GNU General Public License version 2 only, as
2830N/A * published by the Free Software Foundation. Oracle designates this
2830N/A * particular file as subject to the "Classpath" exception as provided
2830N/A * by Oracle in the LICENSE file that accompanied this code.
2830N/A *
2830N/A * This code is distributed in the hope that it will be useful, but WITHOUT
2830N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
2830N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
2830N/A * version 2 for more details (a copy is included in the LICENSE file that
2830N/A * accompanied this code).
2830N/A *
2830N/A * You should have received a copy of the GNU General Public License version
2830N/A * 2 along with this work; if not, write to the Free Software Foundation,
2830N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2830N/A *
2830N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2830N/A * or visit www.oracle.com if you need additional information or have any
2830N/A * questions.
2830N/A */
2830N/A
2830N/Apackage com.sun.tools.javac.parser;
2830N/A
3478N/Aimport java.nio.*;
2830N/A
2830N/Aimport com.sun.tools.javac.code.Source;
2830N/Aimport com.sun.tools.javac.file.JavacFileManager;
3416N/Aimport com.sun.tools.javac.util.*;
2830N/A
2830N/A
2830N/Aimport static com.sun.tools.javac.parser.Token.*;
2830N/Aimport static com.sun.tools.javac.util.LayoutCharacters.*;
2830N/A
2830N/A/** The lexical analyzer maps an input stream consisting of
2830N/A * ASCII characters and Unicode escapes into a token sequence.
2830N/A *
2830N/A * <p><b>This is NOT part of any supported API.
2830N/A * If you write code that depends on this, you do so at your own risk.
2830N/A * This code and its internal interfaces are subject to change or
3478N/A * deletion without notice.</b>
3478N/A */
3478N/Apublic class Scanner implements Lexer {
3478N/A
3416N/A private static boolean scannerDebug = false;
3416N/A
2830N/A /** A factory for creating scanners. */
2830N/A public static class Factory {
2830N/A /** The context key for the scanner factory. */
2830N/A public static final Context.Key<Scanner.Factory> scannerFactoryKey =
3416N/A new Context.Key<Scanner.Factory>();
3416N/A
3416N/A /** Get the Factory instance for this context. */
3416N/A public static Factory instance(Context context) {
3416N/A Factory instance = context.get(scannerFactoryKey);
3416N/A if (instance == null)
3416N/A instance = new Factory(context);
3416N/A return instance;
3416N/A }
3416N/A
3416N/A final Log log;
3478N/A final Names names;
3416N/A final Source source;
3416N/A final Keywords keywords;
3478N/A
3416N/A /** Create a new scanner factory. */
3416N/A protected Factory(Context context) {
3416N/A context.put(scannerFactoryKey, this);
3416N/A this.log = Log.instance(context);
3416N/A this.names = Names.instance(context);
3416N/A this.source = Source.instance(context);
3416N/A this.keywords = Keywords.instance(context);
2830N/A }
2830N/A
2830N/A public Scanner newScanner(CharSequence input) {
2830N/A if (input instanceof CharBuffer) {
2830N/A return new Scanner(this, (CharBuffer)input);
2830N/A } else {
3216N/A char[] array = input.toString().toCharArray();
2830N/A return newScanner(array, array.length);
2830N/A }
2830N/A }
2830N/A
3478N/A public Scanner newScanner(char[] input, int inputLength) {
3478N/A return new Scanner(this, input, inputLength);
3478N/A }
3478N/A }
3478N/A
3478N/A /* Output variables; set by nextToken():
3478N/A */
3478N/A
3478N/A /** The token, set by nextToken().
3478N/A */
3478N/A private Token token;
3478N/A
2830N/A /** Allow hex floating-point literals.
2830N/A */
2830N/A private boolean allowHexFloats;
3478N/A
2830N/A /** Allow binary literals.
2830N/A */
2830N/A private boolean allowBinaryLiterals;
2830N/A
2830N/A /** Allow underscores in literals.
2830N/A */
2830N/A private boolean allowUnderscoresInLiterals;
3478N/A
2830N/A /** The source language setting.
2830N/A */
2830N/A private Source source;
2830N/A
2830N/A /** The token's position, 0-based offset from beginning of text.
3478N/A */
2830N/A private int pos;
2830N/A
2830N/A /** Character position just after the last character of the token.
2830N/A */
3478N/A private int endPos;
3478N/A
2830N/A /** The last character position of the previous token.
2830N/A */
2830N/A private int prevEndPos;
2830N/A
3478N/A /** The position where a lexical error occurred;
2830N/A */
2830N/A private int errPos = Position.NOPOS;
2830N/A
3478N/A /** The name of an identifier or token:
2830N/A */
3478N/A private Name name;
3478N/A
2830N/A /** The radix of a numeric literal token.
2830N/A */
2830N/A private int radix;
2830N/A
2830N/A /** Has a @deprecated been encountered in last doc comment?
3478N/A * this needs to be reset by client.
2830N/A */
2830N/A protected boolean deprecatedFlag = false;
2830N/A
2830N/A /** A character buffer for literals.
2830N/A */
2830N/A private char[] sbuf = new char[128];
2830N/A private int sp;
2830N/A
2830N/A /** The input buffer, index of next chacter to be read,
2830N/A * index of one past last character in buffer.
2830N/A */
2830N/A private char[] buf;
2830N/A private int bp;
2830N/A private int buflen;
2830N/A private int eofPos;
2830N/A
2830N/A /** The current character.
2830N/A */
3478N/A private char ch;
2830N/A
2830N/A /** The buffer index of the last converted unicode character
2830N/A */
3478N/A private int unicodeConversionBp = -1;
3478N/A
2830N/A /** The log to be used for error reporting.
3478N/A */
3478N/A private final Log log;
2830N/A
2830N/A /** The name table. */
2830N/A private final Names names;
2830N/A
2830N/A /** The keyword table. */
2830N/A private final Keywords keywords;
2830N/A
2830N/A /** Common code for constructors. */
2830N/A private Scanner(Factory fac) {
2830N/A log = fac.log;
2830N/A names = fac.names;
3216N/A keywords = fac.keywords;
2830N/A source = fac.source;
2830N/A allowBinaryLiterals = source.allowBinaryLiterals();
2830N/A allowHexFloats = source.allowHexFloats();
2830N/A allowUnderscoresInLiterals = source.allowBinaryLiterals();
2830N/A }
2830N/A
2830N/A private static final boolean hexFloatsWork = hexFloatsWork();
2830N/A private static boolean hexFloatsWork() {
2830N/A try {
2830N/A Float.valueOf("0x1.0p1");
2830N/A return true;
2830N/A } catch (NumberFormatException ex) {
2830N/A return false;
2830N/A }
2830N/A }
2830N/A
2830N/A /** Create a scanner from the input buffer. buffer must implement
2830N/A * array() and compact(), and remaining() must be less than limit().
2830N/A */
2830N/A protected Scanner(Factory fac, CharBuffer buffer) {
2830N/A this(fac, JavacFileManager.toArray(buffer), buffer.limit());
2830N/A }
2830N/A
2830N/A /**
2830N/A * Create a scanner from the input array. This method might
2830N/A * modify the array. To avoid copying the input array, ensure
2830N/A * that {@code inputLength < input.length} or
2830N/A * {@code input[input.length -1]} is a white space character.
2830N/A *
2830N/A * @param fac the factory which created this Scanner
2830N/A * @param input the input, might be modified
2830N/A * @param inputLength the size of the input.
2830N/A * Must be positive and less than or equal to input.length.
2830N/A */
2830N/A protected Scanner(Factory fac, char[] input, int inputLength) {
2830N/A this(fac);
2830N/A eofPos = inputLength;
2830N/A if (inputLength == input.length) {
2830N/A if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
2830N/A inputLength--;
2830N/A } else {
2830N/A char[] newInput = new char[inputLength + 1];
2830N/A System.arraycopy(input, 0, newInput, 0, input.length);
2830N/A input = newInput;
2830N/A }
2830N/A }
2830N/A buf = input;
2830N/A buflen = inputLength;
2830N/A buf[buflen] = EOI;
2830N/A bp = -1;
2830N/A scanChar();
2830N/A }
2830N/A
2830N/A /** Report an error at the given position using the provided arguments.
2830N/A */
2830N/A private void lexError(int pos, String key, Object... args) {
2830N/A log.error(pos, key, args);
2830N/A token = ERROR;
2830N/A errPos = pos;
2830N/A }
2830N/A
2830N/A /** Report an error at the current token position using the provided
2830N/A * arguments.
2830N/A */
2830N/A private void lexError(String key, Object... args) {
2830N/A lexError(pos, key, args);
2830N/A }
2830N/A
2830N/A /** Convert an ASCII digit from its base (8, 10, or 16)
2830N/A * to its value.
2830N/A */
2830N/A private int digit(int base) {
3478N/A char c = ch;
2830N/A int result = Character.digit(c, base);
2830N/A if (result >= 0 && c > 0x7f) {
2830N/A lexError(pos+1, "illegal.nonascii.digit");
2830N/A ch = "0123456789abcdef".charAt(result);
2830N/A }
2830N/A return result;
2830N/A }
2830N/A
2830N/A /** Convert unicode escape; bp points to initial '\' character
2830N/A * (Spec 3.3).
2830N/A */
2830N/A private void convertUnicode() {
2830N/A if (ch == '\\' && unicodeConversionBp != bp) {
2830N/A bp++; ch = buf[bp];
2830N/A if (ch == 'u') {
2830N/A do {
2830N/A bp++; ch = buf[bp];
2830N/A } while (ch == 'u');
2830N/A int limit = bp + 3;
2830N/A if (limit < buflen) {
2830N/A int d = digit(16);
2830N/A int code = d;
2830N/A while (bp < limit && d >= 0) {
2830N/A bp++; ch = buf[bp];
2830N/A d = digit(16);
2830N/A code = (code << 4) + d;
2830N/A }
2830N/A if (d >= 0) {
2830N/A ch = (char)code;
2830N/A unicodeConversionBp = bp;
2830N/A return;
2830N/A }
2830N/A }
2830N/A lexError(bp, "illegal.unicode.esc");
2830N/A } else {
2830N/A bp--;
2830N/A ch = '\\';
2830N/A }
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read next character.
2830N/A */
2830N/A private void scanChar() {
2830N/A ch = buf[++bp];
2830N/A if (ch == '\\') {
2830N/A convertUnicode();
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read next character in comment, skipping over double '\' characters.
2830N/A */
2830N/A private void scanCommentChar() {
3478N/A scanChar();
3478N/A if (ch == '\\') {
3478N/A if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
2830N/A bp++;
2830N/A } else {
2830N/A convertUnicode();
3478N/A }
2830N/A }
3478N/A }
3478N/A
2830N/A /** Append a character to sbuf.
2830N/A */
2830N/A private void putChar(char ch) {
2830N/A if (sp == sbuf.length) {
2830N/A char[] newsbuf = new char[sbuf.length * 2];
2830N/A System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
2830N/A sbuf = newsbuf;
2830N/A }
2830N/A sbuf[sp++] = ch;
2830N/A }
2830N/A
2830N/A /** For debugging purposes: print character.
3478N/A */
3478N/A private void dch() {
3478N/A System.err.print(ch); System.out.flush();
3478N/A }
3478N/A
2830N/A /** Read next character in character or string literal and copy into sbuf.
2830N/A */
2830N/A private void scanLitChar(boolean forBytecodeName) {
2830N/A if (ch == '\\') {
2830N/A if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
2830N/A bp++;
2830N/A putChar('\\');
2830N/A scanChar();
2830N/A } else {
2830N/A scanChar();
2830N/A switch (ch) {
2830N/A case '0': case '1': case '2': case '3':
2830N/A case '4': case '5': case '6': case '7':
3478N/A char leadch = ch;
3478N/A int oct = digit(8);
2830N/A scanChar();
2830N/A if ('0' <= ch && ch <= '7') {
2830N/A oct = oct * 8 + digit(8);
2830N/A scanChar();
3478N/A if (leadch <= '3' && '0' <= ch && ch <= '7') {
3478N/A oct = oct * 8 + digit(8);
3478N/A scanChar();
3478N/A }
3478N/A }
3478N/A putChar((char)oct);
2830N/A break;
2830N/A case 'b':
2830N/A putChar('\b'); scanChar(); break;
3478N/A case 't':
2830N/A putChar('\t'); scanChar(); break;
2830N/A case 'n':
2830N/A putChar('\n'); scanChar(); break;
3478N/A case 'f':
2830N/A putChar('\f'); scanChar(); break;
2830N/A case 'r':
2830N/A putChar('\r'); scanChar(); break;
2830N/A case '\'':
2830N/A putChar('\''); scanChar(); break;
2830N/A case '\"':
2830N/A putChar('\"'); scanChar(); break;
2830N/A case '\\':
2830N/A putChar('\\'); scanChar(); break;
2830N/A case '|': case ',': case '?': case '%':
2830N/A case '^': case '_': case '{': case '}':
2830N/A case '!': case '-': case '=':
2830N/A if (forBytecodeName) {
2830N/A // Accept escape sequences for dangerous bytecode chars.
2830N/A // This is illegal in normal Java string or character literals.
2830N/A // Note that the escape sequence itself is passed through.
2830N/A putChar('\\'); putChar(ch); scanChar();
2830N/A } else {
2830N/A lexError(bp, "illegal.esc.char");
3478N/A }
3478N/A break;
2830N/A default:
3478N/A lexError(bp, "illegal.esc.char");
3478N/A }
3478N/A }
3478N/A } else if (bp != buflen) {
3478N/A putChar(ch); scanChar();
2830N/A }
2830N/A }
2830N/A private void scanLitChar() {
2830N/A scanLitChar(false);
3478N/A }
2830N/A
3478N/A /** Read next character in an exotic name #"foo"
2830N/A */
2830N/A private void scanBytecodeNameChar() {
2830N/A switch (ch) {
2830N/A // reject any "dangerous" char which is illegal somewhere in the JVM spec
2830N/A // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
2830N/A case '/': case '.': case ';': // illegal everywhere
2830N/A case '<': case '>': // illegal in methods, dangerous in classes
2830N/A case '[': // illegal in classes
2830N/A lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
2830N/A break;
2830N/A }
2830N/A scanLitChar(true);
2830N/A }
2830N/A
2830N/A private void scanDigits(int digitRadix) {
2830N/A char saveCh;
2830N/A int savePos;
2830N/A do {
2830N/A if (ch != '_') {
2830N/A putChar(ch);
3478N/A } else {
2830N/A if (!allowUnderscoresInLiterals) {
2830N/A lexError("unsupported.underscore.lit", source.name);
2830N/A allowUnderscoresInLiterals = true;
2830N/A }
2830N/A }
2830N/A saveCh = ch;
2830N/A savePos = bp;
2830N/A scanChar();
2830N/A } while (digit(digitRadix) >= 0 || ch == '_');
2830N/A if (saveCh == '_')
2830N/A lexError(savePos, "illegal.underscore");
2830N/A }
2830N/A
2830N/A /** Read fractional part of hexadecimal floating point number.
2830N/A */
2830N/A private void scanHexExponentAndSuffix() {
2830N/A if (ch == 'p' || ch == 'P') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A skipIllegalUnderscores();
2830N/A if (ch == '+' || ch == '-') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A }
2830N/A skipIllegalUnderscores();
3478N/A if ('0' <= ch && ch <= '9') {
3060N/A scanDigits(10);
2830N/A if (!allowHexFloats) {
2830N/A lexError("unsupported.fp.lit", source.name);
2830N/A allowHexFloats = true;
3478N/A }
2830N/A else if (!hexFloatsWork)
2830N/A lexError("unsupported.cross.fp.lit");
3478N/A } else
2830N/A lexError("malformed.fp.lit");
2830N/A } else {
2830N/A lexError("malformed.fp.lit");
2830N/A }
3478N/A if (ch == 'f' || ch == 'F') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A token = FLOATLITERAL;
2830N/A } else {
2830N/A if (ch == 'd' || ch == 'D') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A }
2830N/A token = DOUBLELITERAL;
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read fractional part of floating point number.
3478N/A */
3478N/A private void scanFraction() {
3478N/A skipIllegalUnderscores();
2830N/A if ('0' <= ch && ch <= '9') {
2830N/A scanDigits(10);
2830N/A }
2830N/A int sp1 = sp;
2830N/A if (ch == 'e' || ch == 'E') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A skipIllegalUnderscores();
2830N/A if (ch == '+' || ch == '-') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A }
2830N/A skipIllegalUnderscores();
2830N/A if ('0' <= ch && ch <= '9') {
2830N/A scanDigits(10);
2830N/A return;
2830N/A }
2830N/A lexError("malformed.fp.lit");
2830N/A sp = sp1;
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read fractional part and 'd' or 'f' suffix of floating point number.
2830N/A */
3478N/A private void scanFractionAndSuffix() {
3478N/A this.radix = 10;
3478N/A scanFraction();
3478N/A if (ch == 'f' || ch == 'F') {
3478N/A putChar(ch);
3478N/A scanChar();
3478N/A token = FLOATLITERAL;
3478N/A } else {
3478N/A if (ch == 'd' || ch == 'D') {
3478N/A putChar(ch);
3478N/A scanChar();
2830N/A }
2830N/A token = DOUBLELITERAL;
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read fractional part and 'd' or 'f' suffix of floating point number.
2830N/A */
2830N/A private void scanHexFractionAndSuffix(boolean seendigit) {
2830N/A this.radix = 16;
2830N/A assert ch == '.';
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A skipIllegalUnderscores();
2830N/A if (digit(16) >= 0) {
2830N/A seendigit = true;
2830N/A scanDigits(16);
2830N/A }
2830N/A if (!seendigit)
2830N/A lexError("invalid.hex.number");
2830N/A else
2830N/A scanHexExponentAndSuffix();
2830N/A }
2830N/A
2830N/A private void skipIllegalUnderscores() {
2830N/A if (ch == '_') {
2830N/A lexError(bp, "illegal.underscore");
2830N/A while (ch == '_')
2830N/A scanChar();
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read a number.
2830N/A * @param radix The radix of the number; one of 2, j8, 10, 16.
3478N/A */
3478N/A private void scanNumber(int radix) {
3478N/A this.radix = radix;
3478N/A // for octal, allow base-10 digit in case it's a float literal
3478N/A int digitRadix = (radix == 8 ? 10 : radix);
3478N/A boolean seendigit = false;
3478N/A if (digit(digitRadix) >= 0) {
2830N/A seendigit = true;
2830N/A scanDigits(digitRadix);
2830N/A }
2830N/A if (radix == 16 && ch == '.') {
2830N/A scanHexFractionAndSuffix(seendigit);
2830N/A } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
2830N/A scanHexExponentAndSuffix();
2830N/A } else if (digitRadix == 10 && ch == '.') {
2830N/A putChar(ch);
2830N/A scanChar();
2830N/A scanFractionAndSuffix();
2830N/A } else if (digitRadix == 10 &&
2830N/A (ch == 'e' || ch == 'E' ||
2830N/A ch == 'f' || ch == 'F' ||
2830N/A ch == 'd' || ch == 'D')) {
2830N/A scanFractionAndSuffix();
2830N/A } else {
3478N/A if (ch == 'l' || ch == 'L') {
3478N/A scanChar();
3478N/A token = LONGLITERAL;
2830N/A } else {
2830N/A token = INTLITERAL;
2830N/A }
2830N/A }
2830N/A }
2830N/A
2830N/A /** Read an identifier.
2830N/A */
2830N/A private void scanIdent() {
2830N/A boolean isJavaIdentifierPart;
2830N/A char high;
2830N/A do {
2830N/A if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
3478N/A // optimization, was: putChar(ch);
3478N/A
3478N/A scanChar();
2830N/A switch (ch) {
2830N/A case 'A': case 'B': case 'C': case 'D': case 'E':
2830N/A case 'F': case 'G': case 'H': case 'I': case 'J':
2830N/A case 'K': case 'L': case 'M': case 'N': case 'O':
2830N/A case 'P': case 'Q': case 'R': case 'S': case 'T':
2830N/A case 'U': case 'V': case 'W': case 'X': case 'Y':
2830N/A case 'Z':
2830N/A case 'a': case 'b': case 'c': case 'd': case 'e':
2830N/A case 'f': case 'g': case 'h': case 'i': case 'j':
2830N/A case 'k': case 'l': case 'm': case 'n': case 'o':
2830N/A case 'p': case 'q': case 'r': case 's': case 't':
2830N/A case 'u': case 'v': case 'w': case 'x': case 'y':
2830N/A case 'z':
3478N/A case '$': case '_':
3478N/A case '0': case '1': case '2': case '3': case '4':
3478N/A case '5': case '6': case '7': case '8': case '9':
2830N/A case '\u0000': case '\u0001': case '\u0002': case '\u0003':
3478N/A case '\u0004': case '\u0005': case '\u0006': case '\u0007':
3478N/A case '\u0008': case '\u000E': case '\u000F': case '\u0010':
3478N/A case '\u0011': case '\u0012': case '\u0013': case '\u0014':
3478N/A case '\u0015': case '\u0016': case '\u0017':
3478N/A case '\u0018': case '\u0019': case '\u001B':
3478N/A case '\u007F':
3478N/A break;
3478N/A case '\u001A': // EOI is also a legal identifier part
3478N/A if (bp >= buflen) {
2830N/A name = names.fromChars(sbuf, 0, sp);
2830N/A token = keywords.key(name);
2830N/A return;
3478N/A }
3478N/A break;
3478N/A default:
3478N/A if (ch < '\u0080') {
3478N/A // all ASCII range chars already handled, above
3478N/A isJavaIdentifierPart = false;
3478N/A } else {
3478N/A high = scanSurrogates();
3478N/A if (high != 0) {
3478N/A if (sp == sbuf.length) {
3478N/A putChar(high);
2830N/A } else {
2830N/A sbuf[sp++] = high;
2830N/A }
2830N/A isJavaIdentifierPart = Character.isJavaIdentifierPart(
2830N/A Character.toCodePoint(high, ch));
2830N/A } else {
2830N/A isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
2830N/A }
3478N/A }
2830N/A if (!isJavaIdentifierPart) {
2830N/A name = names.fromChars(sbuf, 0, sp);
2830N/A token = keywords.key(name);
2830N/A return;
}
}
} while (true);
}
/** Are surrogates supported?
*/
final static boolean surrogatesSupported = surrogatesSupported();
private static boolean surrogatesSupported() {
try {
Character.isHighSurrogate('a');
return true;
} catch (NoSuchMethodError ex) {
return false;
}
}
/** Scan surrogate pairs. If 'ch' is a high surrogate and
* the next character is a low surrogate, then put the low
* surrogate in 'ch', and return the high surrogate.
* otherwise, just return 0.
*/
private char scanSurrogates() {
if (surrogatesSupported && Character.isHighSurrogate(ch)) {
char high = ch;
scanChar();
if (Character.isLowSurrogate(ch)) {
return high;
}
ch = high;
}
return 0;
}
/** Return true if ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
default:
return false;
}
}
/** Read longest possible sequence of special characters and convert
* to token.
*/
private void scanOperator() {
while (true) {
putChar(ch);
Name newname = names.fromChars(sbuf, 0, sp);
if (keywords.key(newname) == IDENTIFIER) {
sp--;
break;
}
name = newname;
token = keywords.key(newname);
scanChar();
if (!isSpecial(ch)) break;
}
}
/**
* Scan a documention comment; determine if a deprecated tag is present.
* Called once the initial /, * have been skipped, positioned at the second *
* (which is treated as the beginning of the first line).
* Stops positioned at the closing '/'.
*/
@SuppressWarnings("fallthrough")
private void scanDocComment() {
boolean deprecatedPrefix = false;
forEachLine:
while (bp < buflen) {
// Skip optional WhiteSpace at beginning of line
while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
scanCommentChar();
}
// Skip optional consecutive Stars
while (bp < buflen && ch == '*') {
scanCommentChar();
if (ch == '/') {
return;
}
}
// Skip optional WhiteSpace after Stars
while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
scanCommentChar();
}
deprecatedPrefix = false;
// At beginning of line in the JavaDoc sense.
if (bp < buflen && ch == '@' && !deprecatedFlag) {
scanCommentChar();
if (bp < buflen && ch == 'd') {
scanCommentChar();
if (bp < buflen && ch == 'e') {
scanCommentChar();
if (bp < buflen && ch == 'p') {
scanCommentChar();
if (bp < buflen && ch == 'r') {
scanCommentChar();
if (bp < buflen && ch == 'e') {
scanCommentChar();
if (bp < buflen && ch == 'c') {
scanCommentChar();
if (bp < buflen && ch == 'a') {
scanCommentChar();
if (bp < buflen && ch == 't') {
scanCommentChar();
if (bp < buflen && ch == 'e') {
scanCommentChar();
if (bp < buflen && ch == 'd') {
deprecatedPrefix = true;
scanCommentChar();
}}}}}}}}}}}
if (deprecatedPrefix && bp < buflen) {
if (Character.isWhitespace(ch)) {
deprecatedFlag = true;
} else if (ch == '*') {
scanCommentChar();
if (ch == '/') {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
while (bp < buflen) {
switch (ch) {
case '*':
scanCommentChar();
if (ch == '/') {
return;
}
break;
case CR: // (Spec 3.4)
scanCommentChar();
if (ch != LF) {
continue forEachLine;
}
/* fall through to LF case */
case LF: // (Spec 3.4)
scanCommentChar();
continue forEachLine;
default:
scanCommentChar();
}
} // rest of line
} // forEachLine
return;
}
/** The value of a literal token, recorded as a string.
* For integers, leading 0x and 'l' suffixes are suppressed.
*/
public String stringVal() {
return new String(sbuf, 0, sp);
}
/** Read token.
*/
public void nextToken() {
try {
prevEndPos = endPos;
sp = 0;
while (true) {
pos = bp;
switch (ch) {
case ' ': // (Spec 3.6)
case '\t': // (Spec 3.6)
case FF: // (Spec 3.6)
do {
scanChar();
} while (ch == ' ' || ch == '\t' || ch == FF);
endPos = bp;
processWhiteSpace();
break;
case LF: // (Spec 3.4)
scanChar();
endPos = bp;
processLineTerminator();
break;
case CR: // (Spec 3.4)
scanChar();
if (ch == LF) {
scanChar();
}
endPos = bp;
processLineTerminator();
break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '$': case '_':
scanIdent();
return;
case '0':
scanChar();
if (ch == 'x' || ch == 'X') {
scanChar();
skipIllegalUnderscores();
if (ch == '.') {
scanHexFractionAndSuffix(false);
} else if (digit(16) < 0) {
lexError("invalid.hex.number");
} else {
scanNumber(16);
}
} else if (ch == 'b' || ch == 'B') {
if (!allowBinaryLiterals) {
lexError("unsupported.binary.lit", source.name);
allowBinaryLiterals = true;
}
scanChar();
skipIllegalUnderscores();
if (digit(2) < 0) {
lexError("invalid.binary.number");
} else {
scanNumber(2);
}
} else {
putChar('0');
if (ch == '_') {
int savePos = bp;
do {
scanChar();
} while (ch == '_');
if (digit(10) < 0) {
lexError(savePos, "illegal.underscore");
}
}
scanNumber(8);
}
return;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
scanNumber(10);
return;
case '.':
scanChar();
if ('0' <= ch && ch <= '9') {
putChar('.');
scanFractionAndSuffix();
} else if (ch == '.') {
putChar('.'); putChar('.');
scanChar();
if (ch == '.') {
scanChar();
putChar('.');
token = ELLIPSIS;
} else {
lexError("malformed.fp.lit");
}
} else {
token = DOT;
}
return;
case ',':
scanChar(); token = COMMA; return;
case ';':
scanChar(); token = SEMI; return;
case '(':
scanChar(); token = LPAREN; return;
case ')':
scanChar(); token = RPAREN; return;
case '[':
scanChar(); token = LBRACKET; return;
case ']':
scanChar(); token = RBRACKET; return;
case '{':
scanChar(); token = LBRACE; return;
case '}':
scanChar(); token = RBRACE; return;
case '/':
scanChar();
if (ch == '/') {
do {
scanCommentChar();
} while (ch != CR && ch != LF && bp < buflen);
if (bp < buflen) {
endPos = bp;
processComment(CommentStyle.LINE);
}
break;
} else if (ch == '*') {
scanChar();
CommentStyle style;
if (ch == '*') {
style = CommentStyle.JAVADOC;
scanDocComment();
} else {
style = CommentStyle.BLOCK;
while (bp < buflen) {
if (ch == '*') {
scanChar();
if (ch == '/') break;
} else {
scanCommentChar();
}
}
}
if (ch == '/') {
scanChar();
endPos = bp;
processComment(style);
break;
} else {
lexError("unclosed.comment");
return;
}
} else if (ch == '=') {
name = names.slashequals;
token = SLASHEQ;
scanChar();
} else {
name = names.slash;
token = SLASH;
}
return;
case '\'':
scanChar();
if (ch == '\'') {
lexError("empty.char.lit");
} else {
if (ch == CR || ch == LF)
lexError(pos, "illegal.line.end.in.char.lit");
scanLitChar();
if (ch == '\'') {
scanChar();
token = CHARLITERAL;
} else {
lexError(pos, "unclosed.char.lit");
}
}
return;
case '\"':
scanChar();
while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
scanLitChar();
if (ch == '\"') {
token = STRINGLITERAL;
scanChar();
} else {
lexError(pos, "unclosed.str.lit");
}
return;
case '#':
scanChar();
if (ch == '\"') {
scanChar();
if (ch == '\"')
lexError(pos, "empty.bytecode.ident");
while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
scanBytecodeNameChar();
}
if (ch == '\"') {
name = names.fromChars(sbuf, 0, sp);
token = IDENTIFIER; // even if #"int" or #"do"
scanChar();
} else {
lexError(pos, "unclosed.bytecode.ident");
}
} else {
lexError("illegal.char", String.valueOf((int)'#'));
}
return;
default:
if (isSpecial(ch)) {
scanOperator();
} else {
boolean isJavaIdentifierStart;
if (ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierStart = false;
} else {
char high = scanSurrogates();
if (high != 0) {
if (sp == sbuf.length) {
putChar(high);
} else {
sbuf[sp++] = high;
}
isJavaIdentifierStart = Character.isJavaIdentifierStart(
Character.toCodePoint(high, ch));
} else {
isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
}
}
if (isJavaIdentifierStart) {
scanIdent();
} else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
token = EOF;
pos = bp = eofPos;
} else {
lexError("illegal.char", String.valueOf((int)ch));
scanChar();
}
}
return;
}
}
} finally {
endPos = bp;
if (scannerDebug)
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "|");
}
}
/** Return the current token, set by nextToken().
*/
public Token token() {
return token;
}
/** Sets the current token.
*/
public void token(Token token) {
this.token = token;
}
/** Return the current token's position: a 0-based
* offset from beginning of the raw input stream
* (before unicode translation)
*/
public int pos() {
return pos;
}
/** Return the last character position of the current token.
*/
public int endPos() {
return endPos;
}
/** Return the last character position of the previous token.
*/
public int prevEndPos() {
return prevEndPos;
}
/** Return the position where a lexical error occurred;
*/
public int errPos() {
return errPos;
}
/** Set the position where a lexical error occurred;
*/
public void errPos(int pos) {
errPos = pos;
}
/** Return the name of an identifier or token for the current token.
*/
public Name name() {
return name;
}
/** Return the radix of a numeric literal token.
*/
public int radix() {
return radix;
}
/** Has a @deprecated been encountered in last doc comment?
* This needs to be reset by client with resetDeprecatedFlag.
*/
public boolean deprecatedFlag() {
return deprecatedFlag;
}
public void resetDeprecatedFlag() {
deprecatedFlag = false;
}
/**
* Returns the documentation string of the current token.
*/
public String docComment() {
return null;
}
/**
* Returns a copy of the input buffer, up to its inputLength.
* Unicode escape sequences are not translated.
*/
public char[] getRawCharacters() {
char[] chars = new char[buflen];
System.arraycopy(buf, 0, chars, 0, buflen);
return chars;
}
/**
* Returns a copy of a character array subset of the input buffer.
* The returned array begins at the <code>beginIndex</code> and
* extends to the character at index <code>endIndex - 1</code>.
* Thus the length of the substring is <code>endIndex-beginIndex</code>.
* This behavior is like
* <code>String.substring(beginIndex, endIndex)</code>.
* Unicode escape sequences are not translated.
*
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
* @throws IndexOutOfBounds if either offset is outside of the
* array bounds
*/
public char[] getRawCharacters(int beginIndex, int endIndex) {
int length = endIndex - beginIndex;
char[] chars = new char[length];
System.arraycopy(buf, beginIndex, chars, 0, length);
return chars;
}
public enum CommentStyle {
LINE,
BLOCK,
JAVADOC,
}
/**
* Called when a complete comment has been scanned. pos and endPos
* will mark the comment boundary.
*/
protected void processComment(CommentStyle style) {
if (scannerDebug)
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
+ new String(getRawCharacters(pos, endPos))
+ "|");
}
/**
* Called when a complete whitespace run has been scanned. pos and endPos
* will mark the whitespace boundary.
*/
protected void processWhiteSpace() {
if (scannerDebug)
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "|");
}
/**
* Called when a line terminator has been processed.
*/
protected void processLineTerminator() {
if (scannerDebug)
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "|");
}
/** Build a map for translating between line numbers and
* positions in the input.
*
* @return a LineMap */
public Position.LineMap getLineMap() {
return Position.makeLineMap(buf, buflen, false);
}
}