fset.c revision 4fd606d1f5abe38e1f42c38de1d2e895166bd0f4
/*
* fset.c
*
* Compute FIRST and FOLLOW sets.
*
* SOFTWARE RIGHTS
*
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
* company may do whatever they wish with source code distributed with
* PCCTS or the code generated by PCCTS, including the incorporation of
* PCCTS, or its output, into commerical software.
*
* We encourage users to develop software with PCCTS. However, we do ask
* that credit is given to us for developing PCCTS. By "credit",
* we mean that if you incorporate our source code into one of your
* programs (commercial product, research project, or otherwise) that you
* acknowledge this fact somewhere in the documentation, research report,
* etc... If you like PCCTS and have developed a nice tool with the
* output, please mention that you developed it using PCCTS. In
* addition, we ask that this header remain intact in our source code.
* As long as these guidelines are kept, we expect to continue enhancing
* this system and expect to make other tools available as they are
* completed.
*
* ANTLR 1.33
* Terence Parr
* Parr Research Corporation
* with Purdue University and AHPCRC, University of Minnesota
* 1989-2001
*/
#include <stdio.h>
#include <stdlib.h>
#include "pcctscfg.h"
#include "set.h"
#include "syn.h"
#include "hash.h"
#include "generic.h"
#include "dlgdef.h"
#include "limits.h"
#ifdef __USE_PROTOS
#else
static void ensure_predicates_cover_ambiguous_lookahead_sequences();
#endif
/*
* What tokens are k tokens away from junction q?
*
* Follow both p1 and p2 paths (unless RuleBlk) to collect the tokens k away from this
* node.
* We lock the junction according to k--the lookahead. If we have been at this
* junction before looking for the same, k, number of lookahead tokens, we will
* do it again and again...until we blow up the stack. Locks are only used on aLoopBlk,
* FIRST and FOLLOW calcs.
*
* If p->jtype == EndRule we are going to attempt a FOLLOW. (FOLLOWs are really defined
* in terms of FIRST's, however). To proceed with the FOLLOW, p->halt cannot be
* set. p->halt is set to indicate that a reference to the current rule is in progress
* and the FOLLOW is not desirable.
*
* If we attempt a FOLLOW and find that there is no FOLLOW or REACHing beyond the EndRule
* junction yields an empty set, replace the empty set with EOF. No FOLLOW means that
* only EOF can follow the current rule. This normally occurs only on the start symbol
* since all other rules are referenced by another rule somewhere.
*
* Normally, both p1 and p2 are followed. However, checking p2 on a RuleBlk node is
* the same as checking the next rule which is clearly incorrect.
*
* Cycles in the FOLLOW sense are possible. e.g. Fo(c) requires Fo(b) which requires
* Fo(c). Both Fo(b) and Fo(c) are defined to be Fo(b) union Fo(c). Let's say
* Fo(c) is attempted first. It finds all of the FOLLOW symbols and then attempts
* to do Fo(b) which finds of its FOLLOW symbols. So, we have:
*
* Fo(c)
* / \
* a set Fo(b)
* / \
* a set Fo(c) .....Hmmmm..... Infinite recursion!
*
* The 2nd Fo(c) is not attempted and Fo(b) is left deficient, but Fo(c) is now
* correctly Fo(c) union Fo(b). We wish to pick up where we left off, so the fact
* that Fo(b) terminated early means that we lack Fo(c) in the Fo(b) set already
* laying around. SOOOOoooo, we track FOLLOW cycles. All FOLLOW computations are
* cached in a hash table. After the sequence of FOLLOWs finish, we reconcile all
* cycles --> correct all Fo(rule) sets in the cache.
*
* Confused? Good! Read my MS thesis [Purdue Technical Report TR90-30].
* TJP 8/93 -- can now read PhD thesis from Purdue.
*
* Only FIRST sets, for which the FOLLOW is not included, are stored.
*
* SPECIAL CASE of (...)+ blocks:
* I added an optional alt so that the alts could see what
* was behind the (...)+ block--thus using enough lookahead
* to branch out rather than just enough to distinguish
* between alts in the (...)+. However, when the FIRST("(...)+") is
* is needed, must not use this last "optional" alt. This routine
* turns off this path by setting a new 'ignore' flag for
* the alt and then resetting it afterwards.
*/
#ifdef __USE_PROTOS
#else
Junction *p;
int k;
#endif
{
set a, b;
#ifdef DBG_LL1
#endif
/* if this is one of the added optional alts for (...)+ then return */
/* no need to pop backtrace - hasn't been pushed */
/* MR14 */ warnFL(
/* MR14 */ "not possible to compute follow set for alpha in an \"(alpha)? beta\" block. ",
/* MR14 */ MR_alphaBetaTraceReport();
/* MR14 */ };
/* MR14 */ if (p->alpha_beta_guess_end) {
/* MR14 */ return empty;
/* MR14 */ }
/* locks are valid for aLoopBlk,aPlusBlk,RuleBlk,EndRule junctions only */
{
if ( p->lock[k] )
{
{
#ifdef DBG_LL1
#endif
}
return empty;
}
! MR_AmbSourceSearch) /* check for FIRST cache */
{
if ( q != NULL )
{
}
}
!p->halt && /* MR11 was using cache even when halt set */
! MR_AmbSourceSearch) /* FOLLOW set cached already? */
{
if ( q != NULL )
{
#ifdef DBG_LL1
#endif
if ( !q->incomplete )
{
}
}
}
}
a = b = empty;
{
{
return empty;
}
if ( p->p1 == NULL ) set_orel((TokenInd!=NULL?TokenInd[EofToken]:EofToken), &a);/* if no FOLLOW assume EOF */
#ifdef DBG_LL1
#endif
}
/* MR14 */ if (p->guess) {
/* MR14 */ }
/* MR14 */ }
} else {
}
}
/* C a c h e R e s u l t s */
{
/*fprintf(stderr, "Caching %s FIRST %d\n", p->rname, k);*/
}
!p->halt && /* MR11 was using cache even with halt set */
! MR_AmbSourceSearch) /* just completed FOLLOW? */
{
/* Cache Follow set */
if ( q==NULL )
{
}
/*fprintf(stderr, "Caching %s FOLLOW %d\n", p->rname, k);*/
if ( set_nil(a) && !q->incomplete )
{
/* Don't ever save a nil set as complete.
* Turn it into an eof set.
*/
}
FoPop( k );
#ifdef DBG_LL1
#endif
}
}
set_orin(&a, b);
set_free(b);
return a;
}
#ifdef __USE_PROTOS
#else
RuleRefNode *p;
int k;
#endif
{
Junction *r;
int k2;
int save_halt;
#ifdef DBG_LL1
#endif
if ( q == NULL )
{
return a;
}
/* MR9 Problems with rule references in guarded predicates */
/* MR9 Perhaps can use hash table to find rule ? */
/* MR9 */ };
if ( r->lock[k] )
{
return empty;
}
set_orin(&a, b);
set_free(b);
}
return a;
}
/*
* Return FIRST sub k ( token_node )
*
* TJP 10/11/93 modified this so that token nodes that are actually
* ranges (T1..T2) work.
*/
#ifdef __USE_PROTOS
#else
TokNode *p;
int k;
#endif
{
set a;
#ifdef DBG_LL1
ExprString(p->token));
#endif
if (MR_AmbSourceSearch && (k-1) == 0) {
if (! set_nil(intersection)) {
};
} else {
}
};
};
if ( k-1 == 0 ) {
} else {
};
}
return a;
}
#ifdef __USE_PROTOS
#else
ActionNode *p;
int k;
#endif
{
set a;
/* MR11 */ if (k <= pred->k) {
/* MR11 */ return a;
/* MR11 */ };
/* MR11 */ };
/* it might be a good idea when doing an MR_AmbSourceSearch
to *not* look behind predicates under some circumstances
we'll look into that later
*/
return a;
}
/* A m b i g u i t y R e s o l u t i o n */
void
#ifdef __USE_PROTOS
#else
FILE *f;
int want_nls;
#endif
{
int i;
else fprintf(f, " ");
for (i=1; i<=CLL_k; i++)
{
if ( i>1 )
{
}
{
int e,m;
fprintf(f, "{");
for (m=1; m<=3; m++)
{
}
fprintf(f, " ... }");
}
}
fprintf(f, "\n");
}
static void
#ifdef __USE_PROTOS
#else
#endif
{
{
return;
}
((predicate->k > 1 &&
( predicate->k == 1 &&
{
/* MR9 Suppress annoying messages caused by our own clever(?) fix */
}
}
/*
* If delta is the set of ambiguous lookahead sequences, then make sure that
* the predicate(s) for productions alt1,alt2 cover the sequences in delta.
*
* For example,
* a : <<PRED1>>? (A B|A C)
* | b
* ;
* b : <<PRED2>>? A B
* | A C
* ;
*
* This should give a warning that (A C) predicts both productions and alt2
* does not have a predicate in the production that generates (A C).
*
* The warning detection is simple. Let delta = LOOK(alt1) intersection LOOK(alt2).
* Now, if ( delta set-difference context(predicates-for-alt1) != empty then
* alt1 does not "cover" all ambiguous sequences.
*
* If ambig is nonempty, then ambig in LL(k) sense -> use tree info; else use fset
* info. Actually, sets are used only if k=1 for this grammar.
*/
static void
#ifdef __USE_PROTOS
#else
char *sub;
#endif
{
if ( !ParseWithPredicates ) return;
{
{
{
}
{
}
}
non_covered = NULL;
{
{
}
{
}
}
}
{
{
{
}
}
set_free( non_covered );
{
{
}
}
set_free( non_covered );
}
else fatal_internal("productions have no lookahead in predicate checking routine");
}
#ifdef __USE_PROTOS
#else
int inGuessBlock;
int jtype;
char *sub;
#endif
{
/* let antlr give the usual error message */
sub,
" These alts have ambig lookahead sequences resolved by a predicate for\n",
" the second choice. The second choice may not be reachable.\n",
" You may want to use a complementary predicate or rearrange the alts\n"
);
return;
};
/* first do the easy comparison. then do the hard one */
/* I'm not sure this code is reachable.
Predicates following a (...)+ or (...)* block are probably
considered validation predicates and therefore not
participate in the predication expression
*/
sub,
" are identical and have no resolving power\n");
} else {
"the predicates used to disambiguate",
" are identical and have no resolving power\n");
};
} else {
sub,
" are identical when compared without context and may have no\n",
" resolving power for some lookahead sequences.\n");
} else {
"the predicates used to disambiguate",
" are identical when compared without context and may have no\n",
" resolving power for some lookahead sequences.\n");
};
if (InfoP) {
fprintf(output," The predicate for choice 1 after expansion (but without context information):\n\n");
};
fprintf(output," The predicate for choice 2 after expansion (but without context information):\n\n");
};
};
sub,
" appears to \"cover\" the second predicate when compared without context.\n",
" The second predicate may have no resolving power for some lookahead sequences.\n");
} else {
"the predicate used to disambiguate the first choice of",
" appears to \"cover\" the second predicate when compared without context.\n",
" The second predicate may have no resolving power for some lookahead sequences.\n");
};
if (InfoP) {
fprintf(output," The predicate for choice 1 after expansion (but without context information):\n\n");
};
fprintf(output," The predicate for choice 2 after expansion (but without context information):\n\n");
};
};
};
};
}
static int totalOverflow=0; /* MR9 */
void
#ifdef __USE_PROTOS
#else
int jtype;
#endif
{
unsigned **ftbl;
char *sub = "";
long n;
int thisOverflow=0; /* MR9 */
long set_deg_value; /* MR10 */
long threshhold; /* MR10 */
/* These sets are used to constrain LL_k set, but are made CLL_k long anyway */
/* create constraint table and count number of possible ambiguities (use<=LL_k) */
{
/* MR10 */ if (n > 0) {
/* MR10 */ n *= set_deg_value;
/* MR10 */ } else {
/* MR10 */ n=LONG_MAX;
/* MR9 */ if (totalOverflow == 0) {
#if 0
/* MR10 comment this out because it just makes users worry */
#endif
/* MR9 */ };
/* MR9 */ thisOverflow++;
/* MR9 */ totalOverflow++;
/* MR9 */ };
/* MR10 */ } else {
/* MR10 */ n *= set_deg_value;
/* MR9 */ };
set_free(b);
}
switch ( jtype )
{
default : sub = ""; break;
}
/* If the block is marked as a compressed lookahead only block, then
* simply return; ambiguity warning is given only at warning level 2.
*/
{
if ( ParseWithPredicates )
{
{
}
&& WarningLevel>1 )
}
if ( WarningLevel>1 )
{
else
}
return;
}
/* if all sets have degree 1 for k<LL_k, then must be ambig upon >=1 permutation;
* don't bother doing full LL(k) analysis.
* (This "if" block handles the LL(1) case)
*/
n2 = 0;
/* here STARTS the special case in which the lookahead sets for alt1 and alt2
all have degree 1 for k<LL_k (including LL_k=1)
*/
{
/* TJP: added to fix the case where LL(1) and syntactic predicates didn't
* work. It now recognizes syntactic predicates, but does not like combo:
*/
{
if ( WarningLevel==1 )
{
return;
}
else
}
if ( ParseWithPredicates )
{
{
}
if ( WarningLevel == 1 &&
{
return;
}
}
/* end TJP (10/24/93) */
else
{
return;
};
/* because this is a special case in which both alt1 and alt2 have
lookahead sets of degree 1 for k<LL_k (including k=1) the linear
lookahead style search is adequate
*/
return;
}
/* here ENDS the special case in which the lookahead sets for alt1 and alt2
all have degree 1 for k<LL_k (including LL_k=1)
*/
/* in case tree construction runs out of memory, set info to make good err msg */
CurAmbigbtype = sub;
/* Don't do full LL(n) analysis if (...)? block because the block,
by definition, defies LL(n) analysis.
If guess (...)? block and ambiguous then don't remove anything from
2nd alt to resolve ambig.
Want to predict with LL sup 1 ( n ) decision not LL(n) if guess block
since it is much cheaper than LL(n). LL sup 1 ( n ) "covers" the LL(n)
lookahead information.
Note: LL(n) context cannot be computed for semantic predicates when
followed by (..)?.
If (..)? then we scream "AAAHHHH! No LL(n) analysis will help"
Is 'ambig' always defined if we enter this if? I hope so
because the 'ensure...()' func references it. TJP Nov 1993.
*/
/* THM MR30: Instead of using first_item_is_guss_block we use
first_item_is_guess_block_extra which will look inside a
loop block for a guess block. In other words ( (...)? )*.
It there is an ambiguity in this circumstance then we suppress
the normal methods of resolving ambiguities.
*/
{
if ( ParseWithPredicates )
{
{
}
if ( WarningLevel==1 &&
{
return;
}
}
if ( WarningLevel>1 )
{
else
}
return;
}
/* Not resolved with (..)? block. Do full LL(n) analysis */
/* ambig is the set of k-tuples truly in common between alt 1 and alt 2 */
/* MR11 VerifyAmbig once used fset destructively */
/* are all things in intersection really ambigs? */
{
Tree *v;
/* remove ambig permutation from 2nd alternative to resolve ambig;
* We want to compute the set of artificial tuples, arising from
* LL sup 1 (n) compression, that collide with real tuples from the
* 2nd alternative. This is the set of "special case" tuples that
* the LL sup 1 (n) decision template maps incorrectly.
*/
/* when generating code in genExpr() it does
*
* if ( genExprSets(j->fset) && !genExprTree(j->ftree)) {...
*
* Sooooo the j->ftree is the tree of alt2
* after removal of conflicts, not alt1 !
*/
{
/* at the top of ambig is an ALT node */
{
u = trm_perm(u, v); /* remove v FROM u */
}
/* fprintf(stderr, "after rm alt2:"); preorder(u); fprintf(stderr, "\n");*/
}
Tfree( t );
}
{
return;
}
/* TJP:
* At this point, we surely have an LL(k) ambiguity. Check for predicates
*/
if ( ParseWithPredicates )
{
{
}
if ( WarningLevel==1 &&
{
/* We found at least one pred for at least one of the alts;
* If warnings are low, just return.
*/
return;
}
/* else we're gonna give a warning */
}
/* end TJP addition */
else
if ( elevel == 3 )
{
} else {
};
}
/* Don't analyze alpha block of (alpha)?beta; if (alpha)? then analyze
* Return the 1st node of the beta block if present else return j.
*/
Junction *
#ifdef __USE_PROTOS
analysis_point( Junction *j )
#else
analysis_point( j )
Junction *j;
#endif
{
the guess block became invisible at the analysis_point.
first_item_is_guess_block accepts any kind of node,
despite the fact that the formal is a junction. But
I don't want to have to change it all over the place
until I know it works.
*/
{
Junction *p;
{
{
continue;
}
{
}
{
return j;
}
/* MR6 */
/* MR6 A guess block is of the form "(alpha)? beta" or "(alpha)?". */
/* MR6 When beta is omitted (second form) this means "(alpha)? alpha". */
/* MR6 The program does not store another copy of alpha in this case. */
/* MR6 During analysis when the program needs to know what follows the */
/* MR6 guess clause. It calls this routine. */
/* MR6 */
/* MR6 If it is of the form "(alpha)? beta" it returns a pointer to beta.*/
/* MR6 */
/* MR6 If it is of the form "(alpha)?" it returns a pointer to the guess */
/* MR6 block itself thereby reusing the junction tree. */
/* MR6 */
/* MR6 It works by searching the "next in sequence" chain (skipping actions) */
/* MR6 searching for a RuleRef or Token node. (Those are the only 4 kinds */
/* MR6 of nodes: Junctions, RuleRef, Token, and Action.) */
/* MR6 */
/* MR6 This won't work for the special case "(alpha)? ()" because it has no */
/* MR6 rule references or token nodes. It eventually encounters a */
/* MR6 junction of type EndBlk or EndRule and says to its caller: nothing */
/* MR6 more here to analyze - must be of the form "(alpha)?". */
/* MR6 */
/* MR6 In the case of "(alpha)? ()" it should return a pointer to "()" */
/* MR6 */
/* MR6 I think. */
/* MR6 */
}; /* MR6 */
}
}
return j;
}
#ifdef __USE_PROTOS
#else
Junction *j;
int k;
int jtype;
int *max_k;
#endif
{
int savek;
/* C o m p u t e F I R S T s e t w i t h k l o o k a h e a d */
{
#if 0
if (p != p1junction) {
}
#endif
}
/* D e t e c t A m b i g u i t i e s */
*max_k = 1;
{
{
savek = k;
while ( !set_nil(a) )
{
/* if we have hit the max k requested, just give warning */
if ( j->approx==k ) {
}
if ( k==CLL_k )
{
#ifdef NOT_USED
*** int save_CLL_k = CLL_k;
*** /* Get new LL_k from interactive feature if enabled */
*** if ( AImode )
#endif
if (AlphaBetaTrace) MR_MaintainBackTrace=0;
break;
}
else
{
k++; /* attempt ambig alts again with more lookahead */
set_free(a);
}
}
set_free(a);
k = savek;
}
}
return fCurBlk;
}