Scanner.java revision 408
3863N/A/*
4168N/A * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
3863N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3863N/A *
3863N/A * This code is free software; you can redistribute it and/or modify it
3863N/A * under the terms of the GNU General Public License version 2 only, as
3863N/A * published by the Free Software Foundation. Sun designates this
3863N/A * particular file as subject to the "Classpath" exception as provided
3863N/A * by Sun in the LICENSE file that accompanied this code.
3863N/A *
3863N/A * This code is distributed in the hope that it will be useful, but WITHOUT
3863N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
3863N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
3863N/A * version 2 for more details (a copy is included in the LICENSE file that
3863N/A * accompanied this code).
3863N/A *
3863N/A * You should have received a copy of the GNU General Public License version
3863N/A * 2 along with this work; if not, write to the Free Software Foundation,
3863N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
3863N/A *
3863N/A * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
3863N/A * CA 95054 USA or visit www.sun.com if you need additional information or
3863N/A * have any questions.
3863N/A */
3863N/A
3863N/Apackage com.sun.tools.javac.parser;
3863N/A
3863N/Aimport java.nio.*;
3863N/A
3863N/Aimport com.sun.tools.javac.code.Source;
3863N/Aimport com.sun.tools.javac.file.JavacFileManager;
3863N/Aimport com.sun.tools.javac.util.*;
3863N/A
3863N/A
3863N/Aimport static com.sun.tools.javac.parser.Token.*;
3863N/Aimport static com.sun.tools.javac.util.LayoutCharacters.*;
3863N/A
3863N/A/** The lexical analyzer maps an input stream consisting of
3863N/A * ASCII characters and Unicode escapes into a token sequence.
3863N/A *
4141N/A * <p><b>This is NOT part of any API supported by Sun Microsystems. If
3863N/A * you write code that depends on this, you do so at your own risk.
4064N/A * This code and its internal interfaces are subject to change or
4186N/A * deletion without notice.</b>
3863N/A */
3863N/Apublic class Scanner implements Lexer {
3863N/A
3863N/A private static boolean scannerDebug = false;
3863N/A
3863N/A /** A factory for creating scanners. */
3863N/A public static class Factory {
3863N/A /** The context key for the scanner factory. */
3863N/A public static final Context.Key<Scanner.Factory> scannerFactoryKey =
3863N/A new Context.Key<Scanner.Factory>();
3863N/A
3863N/A /** Get the Factory instance for this context. */
3863N/A public static Factory instance(Context context) {
3863N/A Factory instance = context.get(scannerFactoryKey);
3863N/A if (instance == null)
4064N/A instance = new Factory(context);
3863N/A return instance;
3863N/A }
3863N/A
3863N/A final Log log;
3863N/A final Names names;
3863N/A final Source source;
3863N/A final Keywords keywords;
3863N/A
3863N/A /** Create a new scanner factory. */
3863N/A protected Factory(Context context) {
3863N/A context.put(scannerFactoryKey, this);
3863N/A this.log = Log.instance(context);
3863N/A this.names = Names.instance(context);
3863N/A this.source = Source.instance(context);
3863N/A this.keywords = Keywords.instance(context);
3863N/A }
3863N/A
4064N/A public Scanner newScanner(CharSequence input) {
4064N/A if (input instanceof CharBuffer) {
4064N/A return new Scanner(this, (CharBuffer)input);
4064N/A } else {
4064N/A char[] array = input.toString().toCharArray();
3863N/A return newScanner(array, array.length);
3863N/A }
3863N/A }
3863N/A
3863N/A public Scanner newScanner(char[] input, int inputLength) {
3863N/A return new Scanner(this, input, inputLength);
3863N/A }
3863N/A }
3863N/A
3863N/A /* Output variables; set by nextToken():
3863N/A */
3863N/A
3863N/A /** The token, set by nextToken().
4064N/A */
3863N/A private Token token;
3863N/A
3863N/A /** Allow hex floating-point literals.
3863N/A */
3863N/A private boolean allowHexFloats;
3863N/A
3863N/A /** Allow binary literals.
3863N/A */
3863N/A private boolean allowBinaryLiterals;
4064N/A
3863N/A /** Allow underscores in literals.
3863N/A */
3863N/A private boolean allowUnderscoresInLiterals;
3863N/A
3863N/A /** The source language setting.
4064N/A */
4064N/A private Source source;
3863N/A
3863N/A /** The token's position, 0-based offset from beginning of text.
4064N/A */
4064N/A private int pos;
4064N/A
4066N/A /** Character position just after the last character of the token.
4066N/A */
4066N/A private int endPos;
4066N/A
4066N/A /** The last character position of the previous token.
4066N/A */
4066N/A private int prevEndPos;
4066N/A
4066N/A /** The position where a lexical error occurred;
4066N/A */
4066N/A private int errPos = Position.NOPOS;
4066N/A
4066N/A /** The name of an identifier or token:
4066N/A */
4066N/A private Name name;
4066N/A
4066N/A /** The radix of a numeric literal token.
4066N/A */
4066N/A private int radix;
3863N/A
3863N/A /** Has a @deprecated been encountered in last doc comment?
4064N/A * this needs to be reset by client.
3863N/A */
3863N/A protected boolean deprecatedFlag = false;
3863N/A
3863N/A /** A character buffer for literals.
3863N/A */
3863N/A private char[] sbuf = new char[128];
3863N/A private int sp;
3863N/A
3863N/A /** The input buffer, index of next chacter to be read,
3863N/A * index of one past last character in buffer.
3863N/A */
3863N/A private char[] buf;
3863N/A private int bp;
3863N/A private int buflen;
3863N/A private int eofPos;
4064N/A
4064N/A /** The current character.
3863N/A */
4064N/A private char ch;
4064N/A
4064N/A /** The buffer index of the last converted unicode character
3863N/A */
4064N/A private int unicodeConversionBp = -1;
3863N/A
3863N/A /** The log to be used for error reporting.
4064N/A */
4064N/A private final Log log;
4064N/A
4064N/A /** The name table. */
4064N/A private final Names names;
4064N/A
4064N/A /** The keyword table. */
3863N/A private final Keywords keywords;
3863N/A
3863N/A /** Common code for constructors. */
3863N/A private Scanner(Factory fac) {
3863N/A log = fac.log;
3863N/A names = fac.names;
3863N/A keywords = fac.keywords;
3863N/A source = fac.source;
3863N/A allowBinaryLiterals = source.allowBinaryLiterals();
4064N/A allowHexFloats = source.allowHexFloats();
4064N/A allowUnderscoresInLiterals = source.allowBinaryLiterals();
4064N/A }
3863N/A
4064N/A private static final boolean hexFloatsWork = hexFloatsWork();
3863N/A private static boolean hexFloatsWork() {
3863N/A try {
3863N/A Float.valueOf("0x1.0p1");
4064N/A return true;
3863N/A } catch (NumberFormatException ex) {
3863N/A return false;
3863N/A }
3863N/A }
3863N/A
3863N/A /** Create a scanner from the input buffer. buffer must implement
4064N/A * array() and compact(), and remaining() must be less than limit().
4064N/A */
4064N/A protected Scanner(Factory fac, CharBuffer buffer) {
4064N/A this(fac, JavacFileManager.toArray(buffer), buffer.limit());
4064N/A }
4064N/A
3863N/A /**
4064N/A * Create a scanner from the input array. This method might
4066N/A * modify the array. To avoid copying the input array, ensure
3863N/A * that {@code inputLength < input.length} or
4064N/A * {@code input[input.length -1]} is a white space character.
4064N/A *
4064N/A * @param fac the factory which created this Scanner
4064N/A * @param input the input, might be modified
4064N/A * @param inputLength the size of the input.
4064N/A * Must be positive and less than or equal to input.length.
3863N/A */
3863N/A protected Scanner(Factory fac, char[] input, int inputLength) {
4064N/A this(fac);
3863N/A eofPos = inputLength;
4064N/A if (inputLength == input.length) {
3863N/A if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
3863N/A inputLength--;
4064N/A } else {
3863N/A char[] newInput = new char[inputLength + 1];
3863N/A System.arraycopy(input, 0, newInput, 0, input.length);
4064N/A input = newInput;
4064N/A }
4064N/A }
4064N/A buf = input;
4064N/A buflen = inputLength;
4064N/A buf[buflen] = EOI;
4064N/A bp = -1;
4064N/A scanChar();
4064N/A }
4064N/A
3863N/A /** Report an error at the given position using the provided arguments.
3863N/A */
3863N/A private void lexError(int pos, String key, Object... args) {
3863N/A log.error(pos, key, args);
3863N/A token = ERROR;
3863N/A errPos = pos;
3863N/A }
3863N/A
3863N/A /** Report an error at the current token position using the provided
3863N/A * arguments.
4064N/A */
4064N/A private void lexError(String key, Object... args) {
4064N/A lexError(pos, key, args);
4064N/A }
3863N/A
4064N/A /** Convert an ASCII digit from its base (8, 10, or 16)
4064N/A * to its value.
4064N/A */
4064N/A private int digit(int base) {
4064N/A char c = ch;
4064N/A int result = Character.digit(c, base);
4064N/A if (result >= 0 && c > 0x7f) {
4064N/A lexError(pos+1, "illegal.nonascii.digit");
4064N/A ch = "0123456789abcdef".charAt(result);
4064N/A }
4064N/A return result;
3863N/A }
3863N/A
4064N/A /** Convert unicode escape; bp points to initial '\' character
3863N/A * (Spec 3.3).
3863N/A */
3863N/A private void convertUnicode() {
3863N/A if (ch == '\\' && unicodeConversionBp != bp) {
3863N/A bp++; ch = buf[bp];
3863N/A if (ch == 'u') {
4064N/A do {
4064N/A bp++; ch = buf[bp];
4064N/A } while (ch == 'u');
4064N/A int limit = bp + 3;
4064N/A if (limit < buflen) {
4064N/A int d = digit(16);
4064N/A int code = d;
4064N/A while (bp < limit && d >= 0) {
4064N/A bp++; ch = buf[bp];
4064N/A d = digit(16);
4064N/A code = (code << 4) + d;
4064N/A }
4064N/A if (d >= 0) {
4064N/A ch = (char)code;
4064N/A unicodeConversionBp = bp;
4064N/A return;
4064N/A }
3863N/A }
3863N/A lexError(bp, "illegal.unicode.esc");
4064N/A } else {
4064N/A bp--;
4064N/A ch = '\\';
4064N/A }
4064N/A }
3863N/A }
4064N/A
4064N/A /** Read next character.
3863N/A */
3863N/A private void scanChar() {
4064N/A ch = buf[++bp];
4064N/A if (ch == '\\') {
4064N/A convertUnicode();
4064N/A }
4064N/A }
4064N/A
4064N/A /** Read next character in comment, skipping over double '\' characters.
4064N/A */
4064N/A private void scanCommentChar() {
4064N/A scanChar();
4064N/A if (ch == '\\') {
4064N/A if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
4064N/A bp++;
3863N/A } else {
4064N/A convertUnicode();
4064N/A }
4064N/A }
4064N/A }
4064N/A
4064N/A /** Append a character to sbuf.
4064N/A */
4064N/A private void putChar(char ch) {
4064N/A if (sp == sbuf.length) {
4064N/A char[] newsbuf = new char[sbuf.length * 2];
4064N/A System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
4064N/A sbuf = newsbuf;
4064N/A }
4064N/A sbuf[sp++] = ch;
4064N/A }
4064N/A
4064N/A /** For debugging purposes: print character.
4064N/A */
4064N/A private void dch() {
4064N/A System.err.print(ch); System.out.flush();
4064N/A }
4064N/A
4064N/A /** Read next character in character or string literal and copy into sbuf.
4064N/A */
4064N/A private void scanLitChar(boolean forBytecodeName) {
4064N/A if (ch == '\\') {
4064N/A if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
4064N/A bp++;
4064N/A putChar('\\');
4064N/A scanChar();
4064N/A } else {
4064N/A scanChar();
3863N/A switch (ch) {
3863N/A case '0': case '1': case '2': case '3':
3863N/A case '4': case '5': case '6': case '7':
3863N/A char leadch = ch;
3863N/A int oct = digit(8);
3863N/A scanChar();
3863N/A if ('0' <= ch && ch <= '7') {
3863N/A oct = oct * 8 + digit(8);
3863N/A scanChar();
3863N/A if (leadch <= '3' && '0' <= ch && ch <= '7') {
4168N/A oct = oct * 8 + digit(8);
3863N/A scanChar();
3863N/A }
3863N/A }
3863N/A putChar((char)oct);
3863N/A break;
3863N/A case 'b':
3863N/A putChar('\b'); scanChar(); break;
3863N/A case 't':
3863N/A putChar('\t'); scanChar(); break;
3863N/A case 'n':
3863N/A putChar('\n'); scanChar(); break;
3863N/A case 'f':
3863N/A putChar('\f'); scanChar(); break;
3863N/A case 'r':
3863N/A putChar('\r'); scanChar(); break;
3863N/A case '\'':
3863N/A putChar('\''); scanChar(); break;
3863N/A case '\"':
3863N/A putChar('\"'); scanChar(); break;
3863N/A case '\\':
3863N/A putChar('\\'); scanChar(); break;
3863N/A case '|': case ',': case '?': case '%':
3863N/A case '^': case '_': case '{': case '}':
3863N/A case '!': case '-': case '=':
3863N/A if (forBytecodeName) {
4064N/A // Accept escape sequences for dangerous bytecode chars.
3863N/A // This is illegal in normal Java string or character literals.
3863N/A // Note that the escape sequence itself is passed through.
3863N/A putChar('\\'); putChar(ch); scanChar();
3863N/A } else {
3863N/A lexError(bp, "illegal.esc.char");
3863N/A }
3863N/A break;
3863N/A default:
3863N/A lexError(bp, "illegal.esc.char");
3863N/A }
3863N/A }
3863N/A } else if (bp != buflen) {
3863N/A putChar(ch); scanChar();
3863N/A }
3863N/A }
3863N/A private void scanLitChar() {
3863N/A scanLitChar(false);
3863N/A }
3863N/A
3863N/A /** Read next character in an exotic name #"foo"
3863N/A */
3863N/A private void scanBytecodeNameChar() {
3863N/A switch (ch) {
3863N/A // reject any "dangerous" char which is illegal somewhere in the JVM spec
3863N/A // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
3863N/A case '/': case '.': case ';': // illegal everywhere
3863N/A case '<': case '>': // illegal in methods, dangerous in classes
3863N/A case '[': // illegal in classes
3863N/A lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
3863N/A break;
3863N/A }
3863N/A scanLitChar(true);
3863N/A }
3863N/A
3863N/A private void scanDigits(int digitRadix) {
3863N/A char saveCh;
3863N/A int savePos;
3863N/A do {
3863N/A if (ch != '_') {
3863N/A putChar(ch);
3863N/A } else {
3863N/A if (!allowUnderscoresInLiterals) {
3863N/A lexError("unsupported.underscore", source.name);
3863N/A allowUnderscoresInLiterals = true;
3863N/A }
3863N/A }
3863N/A saveCh = ch;
3863N/A savePos = bp;
3863N/A scanChar();
3863N/A } while (digit(digitRadix) >= 0 || ch == '_');
3863N/A if (saveCh == '_')
3863N/A lexError(savePos, "illegal.underscore");
3863N/A }
3863N/A
3863N/A /** Read fractional part of hexadecimal floating point number.
3863N/A */
3863N/A private void scanHexExponentAndSuffix() {
3863N/A if (ch == 'p' || ch == 'P') {
3863N/A putChar(ch);
3863N/A scanChar();
3863N/A skipIllegalUnderscores();
3863N/A if (ch == '+' || ch == '-') {
3863N/A putChar(ch);
3863N/A scanChar();
3863N/A }
3863N/A skipIllegalUnderscores();
3863N/A if ('0' <= ch && ch <= '9') {
3863N/A scanDigits(10);
3863N/A if (!allowHexFloats) {
3863N/A lexError("unsupported.fp.lit", source.name);
3863N/A allowHexFloats = true;
3863N/A }
3863N/A else if (!hexFloatsWork)
3863N/A lexError("unsupported.cross.fp.lit");
3863N/A } else
3863N/A lexError("malformed.fp.lit");
3863N/A } else {
3863N/A lexError("malformed.fp.lit");
3863N/A }
3863N/A if (ch == 'f' || ch == 'F') {
3863N/A putChar(ch);
3863N/A scanChar();
3863N/A token = FLOATLITERAL;
3863N/A } else {
3863N/A if (ch == 'd' || ch == 'D') {
3863N/A putChar(ch);
3863N/A scanChar();
3863N/A }
3863N/A token = DOUBLELITERAL;
3863N/A }
}
/** Read fractional part of floating point number.
*/
private void scanFraction() {
skipIllegalUnderscores();
if ('0' <= ch && ch <= '9') {
scanDigits(10);
}
int sp1 = sp;
if (ch == 'e' || ch == 'E') {
putChar(ch);
scanChar();
skipIllegalUnderscores();
if (ch == '+' || ch == '-') {
putChar(ch);
scanChar();
}
skipIllegalUnderscores();
if ('0' <= ch && ch <= '9') {
scanDigits(10);
return;
}
lexError("malformed.fp.lit");
sp = sp1;
}
}
/** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanFractionAndSuffix() {
this.radix = 10;
scanFraction();
if (ch == 'f' || ch == 'F') {
putChar(ch);
scanChar();
token = FLOATLITERAL;
} else {
if (ch == 'd' || ch == 'D') {
putChar(ch);
scanChar();
}
token = DOUBLELITERAL;
}
}
/** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanHexFractionAndSuffix(boolean seendigit) {
this.radix = 16;
assert ch == '.';
putChar(ch);
scanChar();
skipIllegalUnderscores();
if (digit(16) >= 0) {
seendigit = true;
scanDigits(16);
}
if (!seendigit)
lexError("invalid.hex.number");
else
scanHexExponentAndSuffix();
}
private void skipIllegalUnderscores() {
if (ch == '_') {
lexError(bp, "illegal.underscore");
while (ch == '_')
scanChar();
}
}
/** Read a number.
* @param radix The radix of the number; one of 2, j8, 10, 16.
*/
private void scanNumber(int radix) {
this.radix = radix;
// for octal, allow base-10 digit in case it's a float literal
int digitRadix = (radix == 8 ? 10 : radix);
boolean seendigit = false;
if (digit(digitRadix) >= 0) {
seendigit = true;
scanDigits(digitRadix);
}
if (radix == 16 && ch == '.') {
scanHexFractionAndSuffix(seendigit);
} else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
scanHexExponentAndSuffix();
} else if (digitRadix == 10 && ch == '.') {
putChar(ch);
scanChar();
scanFractionAndSuffix();
} else if (digitRadix == 10 &&
(ch == 'e' || ch == 'E' ||
ch == 'f' || ch == 'F' ||
ch == 'd' || ch == 'D')) {
scanFractionAndSuffix();
} else {
if (ch == 'l' || ch == 'L') {
scanChar();
token = LONGLITERAL;
} else {
token = INTLITERAL;
}
}
}
/** Read an identifier.
*/
private void scanIdent() {
boolean isJavaIdentifierPart;
char high;
do {
if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
// optimization, was: putChar(ch);
scanChar();
switch (ch) {
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '$': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '\u0000': case '\u0001': case '\u0002': case '\u0003':
case '\u0004': case '\u0005': case '\u0006': case '\u0007':
case '\u0008': case '\u000E': case '\u000F': case '\u0010':
case '\u0011': case '\u0012': case '\u0013': case '\u0014':
case '\u0015': case '\u0016': case '\u0017':
case '\u0018': case '\u0019': case '\u001B':
case '\u007F':
break;
case '\u001A': // EOI is also a legal identifier part
if (bp >= buflen) {
name = names.fromChars(sbuf, 0, sp);
token = keywords.key(name);
return;
}
break;
default:
if (ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierPart = false;
} else {
high = scanSurrogates();
if (high != 0) {
if (sp == sbuf.length) {
putChar(high);
} else {
sbuf[sp++] = high;
}
isJavaIdentifierPart = Character.isJavaIdentifierPart(
Character.toCodePoint(high, ch));
} else {
isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
}
}
if (!isJavaIdentifierPart) {
name = names.fromChars(sbuf, 0, sp);
token = keywords.key(name);
return;
}
}
} while (true);
}
/** Are surrogates supported?
*/
final static boolean surrogatesSupported = surrogatesSupported();
private static boolean surrogatesSupported() {
try {
Character.isHighSurrogate('a');
return true;
} catch (NoSuchMethodError ex) {
return false;
}
}
/** Scan surrogate pairs. If 'ch' is a high surrogate and
* the next character is a low surrogate, then put the low
* surrogate in 'ch', and return the high surrogate.
* otherwise, just return 0.
*/
private char scanSurrogates() {
if (surrogatesSupported && Character.isHighSurrogate(ch)) {
char high = ch;
scanChar();
if (Character.isLowSurrogate(ch)) {
return high;
}
ch = high;
}
return 0;
}
/** Return true if ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
default:
return false;
}
}
/** Read longest possible sequence of special characters and convert
* to token.
*/
private void scanOperator() {
while (true) {
putChar(ch);
Name newname = names.fromChars(sbuf, 0, sp);
if (keywords.key(newname) == IDENTIFIER) {
sp--;
break;
}
name = newname;
token = keywords.key(newname);
scanChar();
if (!isSpecial(ch)) break;
}
}
/**
* Scan a documention comment; determine if a deprecated tag is present.
* Called once the initial /, * have been skipped, positioned at the second *
* (which is treated as the beginning of the first line).
* Stops positioned at the closing '/'.
*/
@SuppressWarnings("fallthrough")
private void scanDocComment() {
boolean deprecatedPrefix = false;
forEachLine:
while (bp < buflen) {
// Skip optional WhiteSpace at beginning of line
while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
scanCommentChar();
}
// Skip optional consecutive Stars
while (bp < buflen && ch == '*') {
scanCommentChar();
if (ch == '/') {
return;
}
}
// Skip optional WhiteSpace after Stars
while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
scanCommentChar();
}
deprecatedPrefix = false;
// At beginning of line in the JavaDoc sense.
if (bp < buflen && ch == '@' && !deprecatedFlag) {
scanCommentChar();
if (bp < buflen && ch == 'd') {
scanCommentChar();
if (bp < buflen && ch == 'e') {
scanCommentChar();
if (bp < buflen && ch == 'p') {
scanCommentChar();
if (bp < buflen && ch == 'r') {
scanCommentChar();
if (bp < buflen && ch == 'e') {
scanCommentChar();
if (bp < buflen && ch == 'c') {
scanCommentChar();
if (bp < buflen && ch == 'a') {
scanCommentChar();
if (bp < buflen && ch == 't') {
scanCommentChar();
if (bp < buflen && ch == 'e') {
scanCommentChar();
if (bp < buflen && ch == 'd') {
deprecatedPrefix = true;
scanCommentChar();
}}}}}}}}}}}
if (deprecatedPrefix && bp < buflen) {
if (Character.isWhitespace(ch)) {
deprecatedFlag = true;
} else if (ch == '*') {
scanCommentChar();
if (ch == '/') {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
while (bp < buflen) {
switch (ch) {
case '*':
scanCommentChar();
if (ch == '/') {
return;
}
break;
case CR: // (Spec 3.4)
scanCommentChar();
if (ch != LF) {
continue forEachLine;
}
/* fall through to LF case */
case LF: // (Spec 3.4)
scanCommentChar();
continue forEachLine;
default:
scanCommentChar();
}
} // rest of line
} // forEachLine
return;
}
/** The value of a literal token, recorded as a string.
* For integers, leading 0x and 'l' suffixes are suppressed.
*/
public String stringVal() {
return new String(sbuf, 0, sp);
}
/** Read token.
*/
public void nextToken() {
try {
prevEndPos = endPos;
sp = 0;
while (true) {
pos = bp;
switch (ch) {
case ' ': // (Spec 3.6)
case '\t': // (Spec 3.6)
case FF: // (Spec 3.6)
do {
scanChar();
} while (ch == ' ' || ch == '\t' || ch == FF);
endPos = bp;
processWhiteSpace();
break;
case LF: // (Spec 3.4)
scanChar();
endPos = bp;
processLineTerminator();
break;
case CR: // (Spec 3.4)
scanChar();
if (ch == LF) {
scanChar();
}
endPos = bp;
processLineTerminator();
break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '$': case '_':
scanIdent();
return;
case '0':
scanChar();
if (ch == 'x' || ch == 'X') {
scanChar();
skipIllegalUnderscores();
if (ch == '.') {
scanHexFractionAndSuffix(false);
} else if (digit(16) < 0) {
lexError("invalid.hex.number");
} else {
scanNumber(16);
}
} else if (ch == 'b' || ch == 'B') {
if (!allowBinaryLiterals) {
lexError("unsupported.binary.lit", source.name);
allowBinaryLiterals = true;
}
scanChar();
skipIllegalUnderscores();
scanNumber(2);
} else {
putChar('0');
if (ch == '_') {
int savePos = bp;
do {
scanChar();
} while (ch == '_');
if (digit(10) < 0) {
lexError(savePos, "illegal.underscore");
}
}
scanNumber(8);
}
return;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
scanNumber(10);
return;
case '.':
scanChar();
if ('0' <= ch && ch <= '9') {
putChar('.');
scanFractionAndSuffix();
} else if (ch == '.') {
putChar('.'); putChar('.');
scanChar();
if (ch == '.') {
scanChar();
putChar('.');
token = ELLIPSIS;
} else {
lexError("malformed.fp.lit");
}
} else {
token = DOT;
}
return;
case ',':
scanChar(); token = COMMA; return;
case ';':
scanChar(); token = SEMI; return;
case '(':
scanChar(); token = LPAREN; return;
case ')':
scanChar(); token = RPAREN; return;
case '[':
scanChar(); token = LBRACKET; return;
case ']':
scanChar(); token = RBRACKET; return;
case '{':
scanChar(); token = LBRACE; return;
case '}':
scanChar(); token = RBRACE; return;
case '/':
scanChar();
if (ch == '/') {
do {
scanCommentChar();
} while (ch != CR && ch != LF && bp < buflen);
if (bp < buflen) {
endPos = bp;
processComment(CommentStyle.LINE);
}
break;
} else if (ch == '*') {
scanChar();
CommentStyle style;
if (ch == '*') {
style = CommentStyle.JAVADOC;
scanDocComment();
} else {
style = CommentStyle.BLOCK;
while (bp < buflen) {
if (ch == '*') {
scanChar();
if (ch == '/') break;
} else {
scanCommentChar();
}
}
}
if (ch == '/') {
scanChar();
endPos = bp;
processComment(style);
break;
} else {
lexError("unclosed.comment");
return;
}
} else if (ch == '=') {
name = names.slashequals;
token = SLASHEQ;
scanChar();
} else {
name = names.slash;
token = SLASH;
}
return;
case '\'':
scanChar();
if (ch == '\'') {
lexError("empty.char.lit");
} else {
if (ch == CR || ch == LF)
lexError(pos, "illegal.line.end.in.char.lit");
scanLitChar();
if (ch == '\'') {
scanChar();
token = CHARLITERAL;
} else {
lexError(pos, "unclosed.char.lit");
}
}
return;
case '\"':
scanChar();
while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
scanLitChar();
if (ch == '\"') {
token = STRINGLITERAL;
scanChar();
} else {
lexError(pos, "unclosed.str.lit");
}
return;
case '#':
scanChar();
if (ch == '\"') {
scanChar();
if (ch == '\"')
lexError(pos, "empty.bytecode.ident");
while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
scanBytecodeNameChar();
}
if (ch == '\"') {
name = names.fromChars(sbuf, 0, sp);
token = IDENTIFIER; // even if #"int" or #"do"
scanChar();
} else {
lexError(pos, "unclosed.bytecode.ident");
}
} else {
lexError("illegal.char", String.valueOf((int)'#'));
}
return;
default:
if (isSpecial(ch)) {
scanOperator();
} else {
boolean isJavaIdentifierStart;
if (ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierStart = false;
} else {
char high = scanSurrogates();
if (high != 0) {
if (sp == sbuf.length) {
putChar(high);
} else {
sbuf[sp++] = high;
}
isJavaIdentifierStart = Character.isJavaIdentifierStart(
Character.toCodePoint(high, ch));
} else {
isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
}
}
if (isJavaIdentifierStart) {
scanIdent();
} else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
token = EOF;
pos = bp = eofPos;
} else {
lexError("illegal.char", String.valueOf((int)ch));
scanChar();
}
}
return;
}
}
} finally {
endPos = bp;
if (scannerDebug)
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "|");
}
}
/** Return the current token, set by nextToken().
*/
public Token token() {
return token;
}
/** Sets the current token.
*/
public void token(Token token) {
this.token = token;
}
/** Return the current token's position: a 0-based
* offset from beginning of the raw input stream
* (before unicode translation)
*/
public int pos() {
return pos;
}
/** Return the last character position of the current token.
*/
public int endPos() {
return endPos;
}
/** Return the last character position of the previous token.
*/
public int prevEndPos() {
return prevEndPos;
}
/** Return the position where a lexical error occurred;
*/
public int errPos() {
return errPos;
}
/** Set the position where a lexical error occurred;
*/
public void errPos(int pos) {
errPos = pos;
}
/** Return the name of an identifier or token for the current token.
*/
public Name name() {
return name;
}
/** Return the radix of a numeric literal token.
*/
public int radix() {
return radix;
}
/** Has a @deprecated been encountered in last doc comment?
* This needs to be reset by client with resetDeprecatedFlag.
*/
public boolean deprecatedFlag() {
return deprecatedFlag;
}
public void resetDeprecatedFlag() {
deprecatedFlag = false;
}
/**
* Returns the documentation string of the current token.
*/
public String docComment() {
return null;
}
/**
* Returns a copy of the input buffer, up to its inputLength.
* Unicode escape sequences are not translated.
*/
public char[] getRawCharacters() {
char[] chars = new char[buflen];
System.arraycopy(buf, 0, chars, 0, buflen);
return chars;
}
/**
* Returns a copy of a character array subset of the input buffer.
* The returned array begins at the <code>beginIndex</code> and
* extends to the character at index <code>endIndex - 1</code>.
* Thus the length of the substring is <code>endIndex-beginIndex</code>.
* This behavior is like
* <code>String.substring(beginIndex, endIndex)</code>.
* Unicode escape sequences are not translated.
*
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
* @throws IndexOutOfBounds if either offset is outside of the
* array bounds
*/
public char[] getRawCharacters(int beginIndex, int endIndex) {
int length = endIndex - beginIndex;
char[] chars = new char[length];
System.arraycopy(buf, beginIndex, chars, 0, length);
return chars;
}
public enum CommentStyle {
LINE,
BLOCK,
JAVADOC,
}
/**
* Called when a complete comment has been scanned. pos and endPos
* will mark the comment boundary.
*/
protected void processComment(CommentStyle style) {
if (scannerDebug)
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
+ new String(getRawCharacters(pos, endPos))
+ "|");
}
/**
* Called when a complete whitespace run has been scanned. pos and endPos
* will mark the whitespace boundary.
*/
protected void processWhiteSpace() {
if (scannerDebug)
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "|");
}
/**
* Called when a line terminator has been processed.
*/
protected void processLineTerminator() {
if (scannerDebug)
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "|");
}
/** Build a map for translating between line numbers and
* positions in the input.
*
* @return a LineMap */
public Position.LineMap getLineMap() {
return Position.makeLineMap(buf, buflen, false);
}
}