antlr.g revision 4fd606d1f5abe38e1f42c38de1d2e895166bd0f4
* Parse an antlr input grammar and build a syntax-diagram. * Written in itself (needs at least 1.06 to work) * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * Parr Research Corporation * with Purdue University and AHPCRC, University of Minnesota /* MR1 10-Apr-97 MR1 Replace #if logic with #include "pcctscfg.h" */ /* MR20 G. Hobbelt For Borland C++ 4.x & 5.x compiling with ALL warnings enabled */ static void chkToken(
char *,
char *,
char *,
int);
/* MR20 G. Hobbelt extern definitions moved to antlr.h */ /* maintained, but not used for now */ /* MR1 10-Apr-97 MR1 Previously unable to put right shift operator */ warn(
"eoln found in string");
warn(
"eoln found in string (in user action)");
warn(
"eoln found in char literal (in user action)");
* This lexical class accepts actions of type [..] and <<..>> * It translates the following special items for C: * $j --> "zzaArg(current zztasp, j)" * $i.j --> "zzaArg(zztaspi, j)" * $i.nondigit> "zzaArg(current zztasp, i).nondigit" * $alnum --> "alnum" (used to ref parameters) * $retval --> "_retv.retval" if > 1 return values else "_retv" * $[token, text] --> "zzconstr_attr(token, text)" * $[] --> "zzempty_attr()" * It translates the following special items for C++: * (attributes are now stored with 'Token' and $i's are only * pointers to the Tokens. Rules don't have attributes now.) * $j --> "_tbj" where b is the block level * $j->nondigit> "_tbj->nondigit" * $alnum --> "alnum" (used to ref parameters) * $retval --> "_retv.retval" if > 1 return values else "_retv" * $[token, text] --> invalid * #[args] --> "zzmk_ast(zzastnew(), args)" * #( root, child1, ..., childn ) * --> "zztmake(root, child1, ...., childn, NULL)" * #i --> "_astbi" where b is the block level * #alnum --> "alnum_ast" (used to ref #label) * #[args] --> "new AST(args)" * #( root, child1, ..., childn ) * --> "AST::tmake(root, child1, ...., childn, NULL)" * A stack is used to nest action terminators because they can be nested * like crazy: << #[$[..],..] >> /* MR1 10-Apr-97 MR1 Previously unable to put right shift operator */ /* MR1 Doesn't matter what kind of action it is - reset*/ #
token Pred "\>\>?" <<
/* these do not nest */ /* terminate $[..] and #[..] */ else if (
topint() ==
'|' ) {
/* end of simple [...] */ #
token "consumeUntil\( [\ \t]* \{~[\}]+\} [\ \t]* \)"#
token "consumeUntil\( ~[\)]+ \)" else err(
"$$ use invalid in C++ mode"); >>
else err(
"$[] use invalid in C++ mode"); >>
else err(
"$[..] use invalid in C++ mode");
fatal(
"$i attrib ref too big");
err(
"cannot mix old-style $i with new-style labels");
fatal(
"$i.field attrib ref too big");
err(
"cannot mix old-style $i with new-style labels");
#
token "$[0-9]+.[0-9]+" <<{
static char i[
20], j[
20];
fatalFL(
"i of $i.j attrib ref too big",
for (p++, q= &j[
0]; *p!=
'\0'; p++) {
fatalFL(
"j of $i.j attrib ref too big",
err(
"cannot mix old-style $i with new-style labels");
#
token "$[_a-zA-Z][_a-zA-Z0-9]*" "$retval attrib ref too big");
{
err(
"$-variables in actions outside of rules are not allowed"); }
/* MR10 */ /* element labels might exist without an elem when */ /* MR10 */ /* it is a forward reference (to a rule) */ /* MR10 */ {
err(
eMsg1(
"There are no token ptrs for rule references: '$%s'",&
zzbegexpr[
1])); }
/* MR10 */ err(
"You can no longer use attributes returned by rules when also using ASTs");
/* MR10 */ err(
" Use upward inheritance (\"rule >[Attrib a] : ... <<$a=...\>\>\")");
/* MR10 */ /* keep track of <<... $label ...>> for semantic predicates in guess mode */ /* MR10 */ /* element labels contain pointer to the owners node */ fatal(
"#i AST ref too big");
/* MR14 Arpad Beszedes 26-May-98 Add support for #line directives when antlr source is pre-processed #
token "#line[\ \t]* [0-9]+ {[\ \t]* \"~[\"]+\" ([\ \t]* [0-9]*)* } (\n|\r|\r\n)"#
token "#line ~[\n\r]* (\n|\r|\r\n)"/* MR14 end of a block to support #line in antlr source code */ #
token "#[_a-zA-Z][_a-zA-Z0-9]*" pushint(
'|');
/* look for '|' to terminate simple [...] */ /* MR1 10-Apr-97 MR1 Previously unable to put right shift operator */ #
token "\\~[\]\)>$#]" <<
zzmore(); >>
/* escaped char, always ignore */#
token "\*/" <<
warn(
"Missing /*; found dangling */ in action");
zzmore(); >>
/* MR14 Arpad Beszedes 26-May-98 Add support for #line directives when antlr source is pre-processed #
token "#line[\ \t]* [0-9]+ {[\ \t]* \"~[\"]+\" ([\ \t]* [0-9]*)* } (\n|\r|\r\n)"#
token "#line ~[\n\r]* (\n|\r|\r\n)"/* MR14 end of a block to support #line in antlr source code */ /* 8-Apr-97 Regularize escape sequence for ">>" */ /* appearing in string literals */ #
token "\>\>" <<
warn(
"Missing <<; found dangling \>\>");
zzskip(); >>
/* MR1 */ <<
/* L o o k F o r A n o t h e r F i l e */ zzskip();
/* Skip the Eof (@) char i.e continue */ #
errclass "meta-symbol" {
"\}" "!" ";" "\|" "\~" "^" "\)" }
* Get a grammar -- Build a list of rules like: (
"{\\}#header" Action /* MR13 */ else warn(
"additional #header statement ignored");
warn(
"additional #first statement ignored");
warn(
"#parser meta-op incompatible with -CC; ignored");
warn(
"#parser meta-op incompatible with '-gp prefix'; '-gp' ignored");
else warn(
"additional #parser statement ignored");
warn(
"missing class definition for trailing '}'");
/* MR21a */ /* Avoid use of a malformed graph when CannotContinue */ /* MR21a */ /* is already set */ /* MR21a */ g =
Or(g, $
1);
warn(
"missing class definition for trailing '}'");
warn(
"missing class definition for trailing '}'");
err(
"only one grammar class allowed in this release");
<<
if ( !
GenCC ) {
err(
"class meta-op used without C++ option"); }>>
* Build -o-->o-R-o-->o- where -o-R-o- is the block from rule 'block'. * Construct the RuleBlk front and EndRule node on the end of the * block. This is used to add FOLLOW pointers to the rule end. Add the * new rule name to the Rname hash table and sets its rulenum. * Store the parameter definitions if any are found. * Note that locks are required on the RuleBlk and EndRule nodes to thwart * Return the left graph pointer == NULL to indicate error/dupl rule def. /* We want a new element label hash table for each rule */ require(a!=
NULL,
"rule rule: cannot allocate error action");
* pragma : "{\\}#pragma" "dup\-labeled\-tokens" * <<Pragma_DupLabeledTokens=1;>> require(a!=
NULL,
"rule laction: cannot allocate action");
/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ /* MR1 via #lexmember <<....>> & #lexprefix <<...>> */ /* MR1 */ "{\\}#lexmember" /* MR1 */ err(
"Use #lexmember only in C++ mode (to insert code in DLG class header");
/* MR1 */ require(a!=
NULL,
"rule lmember: cannot allocate action");
/* MR1 */ "{\\}#lexprefix" /* MR1 */ err(
"Use #lexprefix only in C++ mode (to insert code in DLG class header");
/* MR1 */ require(a!=
NULL,
"rule lprefix: cannot allocate action");
* #pred upper <<isupper()>>? predicate literal * #pred lower <<islower()>>? predicate literal * #pred up_or_low upper || lower predicate expression * concealed interdependence * #pred up_or_low_2 <<isletter()>>? A || B predicate literal equals predicate expr * analyze using lower||upper * generate using isLetter() /* used to allow NonTerminal but it caused problems when a rule name immediately followed a #pred statement */ /* don't free - referenced in predicates */ * predExpr may be NULL due to syntax errors * or simply omitted by the user warn(
eMsg1(
"errclass name conflicts with regular expression '%s'",t));
"hash table mechanism is broken");
p->
classname =
1;
/* entry is errclass name, not token */ warn(
eMsg1(
"redefinition of errclass or conflict w/token or tokclass '%s'; ignored",t));
"hash table mechanism is broken");
p->
classname =
1;
/* entry is class name, not token */ p->
tclass = e;
/* save ptr to this tclass def */ warn(
eMsg1(
"redefinition of tokclass or conflict w/token '%s'; ignored",t));
/* MR23 */ warnFL(
eMsg2(
"this #tokclass statment conflicts with a previous #tokclass %s(\"%s\") statement",
err(
"implicit token definition not allowed with #tokdefs");
err(
"implicit token definition not allowed with #tokdefs");
err(
"implicit token definition not allowed with #tokdefs");
)+
// MR15 Manfred Kogler - forbid empty #tokclass sets (was "+")/* MR1 10-Apr-97 MR1 Allow shift right operator in DLG actions */ /* MR1 Danger when parser feedback to lexer */ {
"=" "[0-9]+" /* define the token type number */ require(a!=
NULL,
"rule token: cannot allocate action");
warnFL(
eMsg2(
"this #token statment conflicts with a previous #token %s(\"%s\") statement",
/* MR12c */ errFL(
"<<nohoist>> appears as init-action - use <<>> <<nohoist>>",
/* MR7 ***** eg->altID = makeAltID(CurBlockID,CurAltNum); *****/ /* MR7 ***** CurAltStart->exception_label = eg->altID; *****/ /* MR7 ***** eg->altID = makeAltID(CurBlockID,CurAltNum); *****/ /* MR7 ***** CurAltStart->exception_label = eg->altID; *****/ {
"\@" /* handle MismatchedToken signals with default handler */ ( <<;>>
/* MR9 Removed unreferenced variable "tok" */ /* record record number of all rule and token refs */ /* We want to reduce number of LT(i) calls and the number of * local attribute variables in C++ mode (for moment, later we'll * do for C also). However, if trees are being built, they * require most of the attrib variables to create the tree nodes * with; therefore, we gen a token ptr for each token ref in C++ /* This now free's the temp set -ATG 5/6/95 */ err(
"one or more $i in action(s) refer to non-token elements");
err(
eMsg1(
"label definition clashes with rule definition: '%s'",
lab));
/* we don't clash with anybody else */ if ( l==
NULL ) {
/* ok to add new element label */ /* add to list of element labels for this rule */ /* MR7 */ leAdd(l);
/* list of labels waiting for exception group definitions */ err(
eMsg1(
"label definitions must be unique per rule: '%s'",
lab));
err(
"implicit token definition not allowed with #tokdefs");
err(
"implicit token definition not allowed with #tokdefs");
| <<
if ( $
old_not )
warn(
"~ WILDCARD is an undefined operation (implies 'nothing')");>>
| <<
if ( $
old_not )
warn(
"~ NONTERMINAL is an undefined operation");>>
require(a!=
NULL,
"rule element: cannot allocate assignment");
| <<
if ( $
old_not )
warn(
"~ ACTION is an undefined operation");>>
| <<
if ( $
old_not )
warn(
"~ SEMANTIC-PREDICATE is an undefined operation");>>
require(a!=
NULL,
"rule element: cannot allocate predicate fail action");
| <<
if ( $
old_not )
warn(
"~ BLOCK is an undefined operation");>>
|
"LL\(1\)" <<
approx =
1;>>
/* MR20 */ |
"LL\(2\)" <<
approx =
2;>>
/* MR20 *//* MR21 */ "cannot allocate first set name");
/* MR21 */ "cannot allocate first set name");
Pred /* generalized predicate */ /* first make into a predicate */ require(a!=
NULL,
"rule element: cannot allocate predicate fail action");
/* for now, just snag context */ if ( !
predMsgDone)
err(
"invalid or missing context guard");
/* MR10 */ /* MR13 */ errFL(
"in guarded predicates all tokens in the guard must be at the same height",
/* MR11 */ errFL(
"without \"-prc on\" (guard)? && <<pred>>? ... doesn't make sense",
/* MR10 */ warn(
eMsgd2(
"length of guard (%d) does not match the length of semantic predicate (%d)",
err(
"(...)? predicate must be first element of production");
/* Error catching alternatives */ /* rule default_exception_handler */ /* rule exception_group */ p[
strlen(p)-
1] =
'\0';
/* kill trailing space */ /* Record ex group in sym tab for this label */ /* Label the exception group itself */ /* Make the labelled element pt to the exception also */ /* MR6 */ err(
eMsg1(
"reference in exception handler to undefined label '%s'",
label->
str));
/* MR6 */ };
/* end test on label->elem */ }
/* end test on label->ex_group */ }
/* end test on exception label *//* MR7 */ /* You may want to remove this exc from the rule list */ /* MR7 */ /* and handle at the labeled element site. */ /* rule exception_handler */ : <<;>>
/* MR9 Removed unreferenced variable "a" */ {
"#define" ID /* ignore if it smells like a gate */ /* First #define after the first #ifndef (if any) is ignored */ : <<
int v;
int maxt=(-
1);
char *t;>>
/* MR3 *//* fprintf(stderr, "#token %s=%d\n", t, v);*/ /* MR2 Andreas Magnusson (Andreas.Magnusson@mailbox.swipnet.se) */ /* MR2 Fix to bug introduced by 1.33MR1 for #tokdefs */ /* MR2 Don't let #tokdefs be confused by */ /* MR2 DLGminToken and DLGmaxToken */ : <<
int v=
0;
int maxt=(-
1);
char *t;>>
/* MR3 *//* fprintf(stderr, "#token %s=%d\n", t, v);*/ /* MR2 Andreas Magnusson (Andreas.Magnusson@mailbox.swipnet.se) */ /* MR2 Fix to bug introduced by 1.33MR1 for #tokdefs */ /* MR2 Don't let #tokdefs be confused by */ /* MR2 DLGminToken and DLGmaxToken */ /* fprintf(stderr, "#token %s=%d\n", t, v);*/ /* MR14 Arpad Beszedes 26-May-98 Add support for #line directives when antlr source is pre-processed /* go back until the last / or \ */ /* copy the string after " / or \ into toStr */ for (i=k+
1;
fromStr[i] !=
'\"'; i++) {
/* MR14 end of a block to support #line in antlr source code */ /* MR2 Andreas Magnusson (Andreas.Magnusson@mailbox.swipnet.se) */ /* MR2 Fix to bug introduced by 1.33MR1 for #tokdefs */ /* MR2 Don't let #tokdefs be confused by */ /* MR2 DLGminToken and DLGmaxToken */ /* semantic check on DLGminToken and DLGmaxmaxToken in #tokdefs */ /* semantics of #token */ /* check to see that they don't try to redefine a token as a token class */ err(
eMsg1(
"redefinition of #tokclass '%s' to #token not allowed; ignored",t));
if ( t==
NULL && e==
NULL ) {
/* none found */ err(
"#token requires at least token name or rexpr");
else if ( t!=
NULL && e!=
NULL ) {
/* both found */ err(
eMsg1(
"new token definition '%s' not allowed - only #token with name already defined by #tokdefs file allowed",t));
err(
eMsg1(
"redefinition of action for %s; ignored",e));
else if ( t!=
NULL ) {
/* only one found */ err(
eMsg1(
"new token definition '%s' not allowed - only #token with name already defined by #tokdefs file allowed",t));
err(
eMsg1(
"redefinition of token %s; ignored",t));
err(
eMsg1(
"action cannot be attached to a token name (%s); ignored",t));
err(
eMsg1(
"redefinition of action for expr %s; ignored",e));
err(
eMsg1(
"redefinition of expr %s; ignored",e));
/* if a token type number was specified, then add the token ID and 'tnum' * pair to the ForcedTokens list. (only applies if an id was given) err(
eMsgd(
"a token has already been forced to token number %d; ignored",
tnum));
if ( !(*s>=
'A' && *s<=
'Z') )
return 0;
while ( (*s>=
'a' && *s<=
'z') ||
if ( *s!=
' ' && *s!=
'}' )
return 0;
if ( *s==
'\n' || *s==
'\r' )
/* MR13 */ warn(
"eoln found in regular expression");
* Walk a string "{ A .. Z }" where A..Z is a space separated list * of token references (either labels or reg exprs). Return a * string "inlineX_set" for some unique integer X. Basically, * we pretend as if we had seen "#tokclass inlineX { A .. Z }" * on the input stream outside of an action. while ( *s==
' ' || *s==
'\t' || *s==
'\n' || *s==
'\r' ) {s++;}
err(
"malformed consumeUntil( {...} ); missing '{'");
while ( *s==
' ' || *s==
'\t' || *s==
'\n' || *s==
'\r' ) {s++;}
err(
"invalid element in consumeUntil( {...} )");
while ( *s==
' ' || *s==
'\t' || *s==
'\n' || *s==
'\r' ) {s++;}
/* ANTLR-specific syntax error message generator * (define USER_ZZSYN when compiling so don't get 2 definitions)