1153N/A/*
1153N/A * CDDL HEADER START
1153N/A *
1153N/A * The contents of this file are subject to the terms of the
1153N/A * Common Development and Distribution License (the "License").
1153N/A * You may not use this file except in compliance with the License.
1153N/A *
1153N/A * See LICENSE.txt included in this distribution for the specific
1153N/A * language governing permissions and limitations under the License.
1153N/A *
1153N/A * When distributing Covered Code, include this CDDL HEADER in each
1153N/A * file and include the License file at LICENSE.txt.
1153N/A * If applicable, add the following below this CDDL HEADER, with the
1153N/A * fields enclosed by brackets "[]" replaced with your own identifying
1153N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1153N/A *
1153N/A * CDDL HEADER END
1153N/A */
1153N/A
1153N/A/*
1153N/A * Gets Php symbols - ignores comments, strings, keywords
1153N/A */
1153N/A
1153N/Apackage org.opensolaris.opengrok.analysis.php;
1153N/Aimport org.opensolaris.opengrok.analysis.JFlexTokenizer;
1392N/Aimport java.util.*;
1153N/A
1153N/A%%
1153N/A%public
1153N/A%class PhpSymbolTokenizer
1153N/A%extends JFlexTokenizer
1153N/A%unicode
1369N/A%init{
1369N/Asuper(in);
1369N/A%init}
1153N/A%type boolean
1153N/A%eofval{
1416N/Athis.finalOffset=zzEndRead;
1153N/Areturn false;
1153N/A%eofval}
1153N/A%char
1392N/A%ignorecase
1392N/A%{
1392N/A private final static Set<String> PSEUDO_TYPES;
1392N/A private Stack<String> docLabels = new Stack<String>();
1153N/A
1392N/A static {
1392N/A PSEUDO_TYPES = new HashSet<String>(Arrays.asList(
1392N/A new String[] {
1392N/A "string", "integer", "int", "boolean", "bool", "float", "double",
1392N/A "object", "mixed", "array", "resource", "void", "null", "callback",
1392N/A "false", "true", "self", "callable"
1392N/A }
1392N/A ));
1392N/A }
1392N/A
1392N/A private boolean isTabOrSpace(int i) {
1392N/A return yycharat(i) == '\t' || yycharat(i) == ' ';
1392N/A }
1394N/A
1394N/A private static boolean isHtmlState(int state) {
1394N/A return state == YYINITIAL;
1394N/A }
1392N/A%}
1392N/A
1392N/AWhiteSpace = [ \t]+
1392N/AEOL = \r|\n|\r\n
1382N/AIdentifier = [a-zA-Z_\u007F-\u10FFFF] [a-zA-Z0-9_\u007F-\u10FFFF]*
1153N/A
1392N/AURIChar = [\?\+\%\&\:\/\.\@\_\;\=\$\,\-\!\~\*\\]
1392N/AFNameChar = [a-zA-Z0-9_\-\.]
1392N/AFile = [a-zA-Z]{FNameChar}* "." ("php"|"php3"|"php4"|"phps"|"phtml"|"inc"|"diff"|"patch")
1392N/APath = "/"? [a-zA-Z]{FNameChar}* ("/" [a-zA-Z]{FNameChar}*[a-zA-Z0-9])+
1392N/A
1392N/ABinaryNumber = 0[b|B][01]+
1392N/AOctalNumber = 0[0-7]+
1392N/ADecimalNumber = [1-9][0-9]+
1392N/AHexadecimalNumber = 0[xX][0-9a-fA-F]+
1392N/AFloatNumber = (([0-9]* "." [0-9]+) | ([0-9]+ "." [0-9]*) | [0-9]+)([eE][+-]?[0-9]+)?
1392N/ANumber = [+-]?({BinaryNumber}|{OctalNumber}|{DecimalNumber}|{HexadecimalNumber}|{FloatNumber})
1392N/A
1392N/AOpeningTag = ("<?" "php"?) | "<?="
1392N/AClosingTag = "?>"
1392N/A
1392N/ACastTypes = "int"|"integer"|"real"|"double"|"float"|"string"|"binary"|"array"
1392N/A |"object"|"bool"|"boolean"|"unset"
1392N/A
1392N/ADoubleQuoteEscapeSequences = \\ (([nrtfve\\$]) | ([xX] [0-9a-fA-F]{1,2}) | ([0-7]{1,3}))
1392N/ASingleQuoteEscapeSequences = \\ [\\\']
1392N/A
1392N/ADocPreviousChar = "*" | {WhiteSpace}
1392N/A
1392N/ADocParamWithType = "return" | "throws" | "throw" | "var" | "see" //"see" can take a URL
1392N/ADocParamWithTypeAndName = "param" | "global" | "property" | "property-read"
1392N/A | "property-write"
1396N/ADocParamWithName = "uses"
1392N/A//method needs special treatment
1392N/A
1392N/A%state IN_SCRIPT STRING SCOMMENT HEREDOC NOWDOC COMMENT QSTRING BACKQUOTE STRINGEXPR STRINGVAR
1392N/A%state DOCCOMMENT DOCCOM_TYPE_THEN_NAME DOCCOM_NAME DOCCOM_TYPE
1153N/A
1153N/A%%
1153N/A
1153N/A<YYINITIAL> {
1392N/A {OpeningTag} { yypush(IN_SCRIPT); }
1382N/A}
1153N/A
1392N/A
1392N/A<IN_SCRIPT> {
1392N/A "$" {Identifier} {
1392N/A //we ignore keywords if the identifier starts with one of variable chars
1392N/A setAttribs(yytext().substring(1), yychar + 1, yychar + yylength());
1392N/A return true;
1392N/A }
1392N/A
1392N/A {Identifier} {
1392N/A if (!Consts.kwd.contains(yytext())) {
1392N/A setAttribs(yytext(), yychar, yychar + yylength());
1392N/A return true;
1392N/A }
1392N/A }
1392N/A
1392N/A \( {WhiteSpace}* {CastTypes} {WhiteSpace}* \) { }
1392N/A
1392N/A b? \" { yypush(STRING); }
1392N/A
1392N/A b? \' { yypush(QSTRING); }
1392N/A
1392N/A ` { yypush(BACKQUOTE); }
1392N/A
1392N/A b? "<<<" {WhiteSpace}* ({Identifier} | (\'{Identifier}\') | (\"{Identifier}\")){EOL} {
1392N/A int i = yycharat(0) == 'b' ? 4 : 3, j = yylength()-1;
1392N/A while (isTabOrSpace(i)) { i++; }
1392N/A while (yycharat(j) == '\n' || yycharat(j) == '\r') { j--; }
1392N/A
1392N/A if (yycharat(i) == '\'' || yycharat(i) == '"') {
1392N/A yypush(NOWDOC);
1392N/A String text = yytext().substring(i+1, j);
1392N/A this.docLabels.push(text);
1392N/A } else {
1392N/A yypush(HEREDOC);
1392N/A String text = yytext().substring(i, j+1);
1392N/A this.docLabels.push(text);
1392N/A }
1392N/A }
1392N/A
1392N/A {Number} { }
1392N/A
1392N/A "#"|"//" { yypush(SCOMMENT); }
1392N/A "/**" { yypush(DOCCOMMENT); }
1392N/A "/*" { yypush(COMMENT); }
1392N/A
1392N/A \{ { yypush(IN_SCRIPT); }
1394N/A \} {
1394N/A if (!this.stack.empty() && !isHtmlState(this.stack.peek()))
1394N/A yypop(); //may pop STRINGEXPR/HEREDOC/BACKQUOTE
1394N/A }
1392N/A
1394N/A {ClosingTag} { while (!isHtmlState(yystate())) yypop(); }
1392N/A} //end of IN_SCRIPT
1392N/A
1153N/A<STRING> {
1392N/A \\\" { }
1392N/A \" { yypop(); }
1392N/A}
1392N/A
1392N/A<BACKQUOTE> {
1392N/A "\\`" { }
1392N/A "`" { yypop(); }
1392N/A}
1392N/A
1392N/A<STRING, BACKQUOTE, HEREDOC> {
1392N/A "\\{" { }
1392N/A
1392N/A {DoubleQuoteEscapeSequences} {}
1392N/A
1392N/A "$" { yypush(STRINGVAR); }
1392N/A
1392N/A "${" { yypush(STRINGEXPR); }
1392N/A
1392N/A /* ${ is different from {$ -- for instance {$foo->bar[1]} is valid
1392N/A * but ${foo->bar[1]} is not. ${ only enters the full blown scripting state
1392N/A * when {Identifer}[ is found (see the PHP scanner). Tthe parser seems to
1392N/A * put more restrictions on the {$ scripting mode than on the
1392N/A * "${" {Identifer} "[" scripting mode, but that's not relevant here */
1392N/A "{$" {
1392N/A yypushback(1);
1392N/A yypush(IN_SCRIPT);
1392N/A }
1153N/A}
1153N/A
1153N/A<QSTRING> {
1392N/A {SingleQuoteEscapeSequences} { }
1392N/A \' { yypop(); }
1392N/A}
1392N/A
1392N/A<HEREDOC, NOWDOC>^{Identifier} ";"? {EOL} {
1392N/A int i = yylength() - 1;
1392N/A while (yycharat(i) == '\n' || yycharat(i) == '\r') { i--; }
1392N/A if (yycharat(i) == ';') { i--; }
1392N/A if (yytext().substring(0, i+1).equals(this.docLabels.peek())) {
1392N/A String text = this.docLabels.pop();
1392N/A yypop();
1392N/A }
1382N/A}
1382N/A
1392N/A<STRING, QSTRING, BACKQUOTE, HEREDOC, NOWDOC>{WhiteSpace}* {EOL} { }
1382N/A
1382N/A<STRINGVAR> {
1392N/A {Identifier} {
1392N/A setAttribs(yytext(), yychar, yychar + yylength());
1392N/A return true;
1392N/A }
1392N/A
1392N/A \[ {Number} \] {
1392N/A yypop(); //because "$arr[0][1]" is the same as $arr[0] . "[1]"
1392N/A }
1392N/A
1392N/A \[ {Identifier} \] {
1392N/A //then the identifier is actually a string!
1392N/A yypop();
1392N/A }
1392N/A
1392N/A \[ "$" {Identifier} \] {
1392N/A setAttribs(yytext().substring(2, yylength()-1), yychar + 2,
1392N/A yychar + yylength() - 1);
1392N/A yypop();
1392N/A return true;
1392N/A }
1392N/A
1392N/A "->" {Identifier} {
1392N/A setAttribs(yytext().substring(2), yychar + 2, yychar + yylength());
1392N/A yypop(); //because "$arr->a[0]" is the same as $arr->a . "[0]"
1392N/A return true;
1392N/A }
1392N/A
1392N/A . | \n { yypushback(1); yypop(); }
1382N/A}
1382N/A
1382N/A<STRINGEXPR> {
1392N/A {Identifier} {
1392N/A setAttribs(yytext(), yychar, yychar + yylength());
1392N/A return true;
1392N/A }
1392N/A \} { yypop(); }
1392N/A \[ { yybegin(IN_SCRIPT); } /* don't push. when we find '}'
1392N/A * and we pop we want to go to
1392N/A * STRING/HEREDOC, not back to
1392N/A * STRINGEXPR */
1153N/A}
1153N/A
1153N/A<SCOMMENT> {
1392N/A {ClosingTag} {
1394N/A while (!isHtmlState(yystate())) yypop();
1392N/A }
1392N/A {WhiteSpace}* {EOL} {
1392N/A yypop();
1392N/A }
1392N/A}
1392N/A
1392N/A<DOCCOMMENT> {
1392N/A /* change relatively to xref -- we also consume the whitespace after */
1392N/A {DocPreviousChar} "@" {DocParamWithType} {WhiteSpace}+ {
1392N/A yybegin(DOCCOM_TYPE);
1392N/A }
1392N/A
1392N/A {DocPreviousChar} "@" {DocParamWithTypeAndName} {WhiteSpace}+ {
1392N/A yybegin(DOCCOM_TYPE_THEN_NAME);
1392N/A }
1396N/A
1396N/A {DocPreviousChar} "@" {DocParamWithName} {WhiteSpace}+ {
1396N/A yybegin(DOCCOM_NAME);
1396N/A }
1153N/A}
1153N/A
1392N/A<DOCCOM_TYPE_THEN_NAME, DOCCOM_TYPE> {
1392N/A /* The rules here had to be substantially changed because we cannot find
1392N/A * several symbols in one match. This is substantially more lax than
1392N/A * the xref rules */
1392N/A
1392N/A [\[\]\|\(\)] { }
1392N/A
1392N/A {WhiteSpace} {
1392N/A yybegin(yystate() == DOCCOM_TYPE_THEN_NAME ? DOCCOM_NAME : DOCCOMMENT);
1392N/A }
1392N/A
1392N/A {Identifier} {
1392N/A if (!PSEUDO_TYPES.contains(yytext().toLowerCase())) {
1392N/A setAttribs(yytext(), yychar, yychar + yylength());
1392N/A return true;
1392N/A }
1392N/A }
1392N/A
1392N/A .|\n { yybegin(DOCCOMMENT); yypushback(1); }
1382N/A}
1382N/A
1392N/A<DOCCOM_NAME> {
1392N/A "$" {Identifier} {
1392N/A setAttribs(yytext().substring(1), yychar + 1, yychar + yylength());
1392N/A yybegin(DOCCOMMENT);
1392N/A return true;
1392N/A }
1392N/A
1392N/A .|\n { yybegin(DOCCOMMENT); yypushback(1); }
1392N/A}
1392N/A
1392N/A<COMMENT, DOCCOMMENT> {
1392N/A {WhiteSpace}* {EOL} { }
1392N/A "*/" { yypop(); }
1153N/A}
1392N/A
1392N/A<YYINITIAL, IN_SCRIPT, STRING, QSTRING, BACKQUOTE, HEREDOC, NOWDOC, SCOMMENT, COMMENT, DOCCOMMENT, STRINGEXPR, STRINGVAR> {
1392N/A {WhiteSpace}* {EOL} { }
1392N/A {WhiteSpace} { }
1392N/A [!-~] { }
1392N/A . { }
1392N/A}
1392N/A
1392N/A<YYINITIAL, SCOMMENT, COMMENT, DOCCOMMENT, STRING, QSTRING, BACKQUOTE, HEREDOC, NOWDOC> {
1392N/A {Path} { }
1392N/A
1392N/A {File} { }
1392N/A
1392N/A ("http" | "https" | "ftp" ) "://" ({FNameChar}|{URIChar})+[a-zA-Z0-9/]
1392N/A { }
1392N/A
1392N/A {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+
1392N/A { }
1392N/A}