/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1986-2009 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*
* preprocessor and proto lexical analyzer fsm
* define PROTOMAIN for standalone proto
*/
#include "pplib.h"
#include "ppfsm.h"
/*
* lexical FSM encoding
* derived from a standalone ansi cpp by Dennis Ritchie
* modified for libpp by Glenn Fowler
*
* fsm[] is initialized from fsminit[]. The encoding is blown out into
* fsm[] for time efficiency. When in state state, and one of the
* characters in ch arrives, enter nextstate. States >= TERMINAL are
* either final, or at least require special action. In fsminit[] there
* is a line for each <state,charset,nextstate>. Early entries are
* overwritten by later ones. C_XXX is the universal set and should
* always be first. Some of the fsminit[] entries are templates for
* groups of states. The OP entries trigger the state copies. States
* above TERMINAL are represented in fsm[] as negative values. S_TOK and
* S_TOKB encode the resulting token type in the upper bits. These actions
* differ in that S_TOKB has a lookahead char.
*
* fsm[] has three start states:
*
* PROTO proto (ANSI -> K&R,C++,ANSI)
* QUICK standalone ppcpp()
* TOKEN tokenizing pplex()
*
* If the next state remains the same then the fsm[] transition value is 0.
* MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
* fsm[state+1][0] which is ~S_EOB for all states. This preserves the
* power of 2 fsm[] row size for efficient array indexing. Thanks to
* D. G. Korn for the last two observations. The pseudo non-terminal state
* fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
*
* The bit layout is:
*
* TERM arg SPLICE next
* 15 14-8 7 6-0
*/
/*
* NOTE: these must be `control' characters for all native codesets
* currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
*/
#define END 0
#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
{
};
{
/* proto start state */
/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
/* proto reserved {va_start} */
/* proto reserved {return} */
/* proto reserved {if} */
/* proto reserved {while} */
/* proto reserved {else} */
/* proto reserved {inline} */
/* proto reserved {do,for,void} */
/* proto reserved {static} */
/* proto reserved {extern} */
/* proto reserved {typedef} */
/* saw /, perhaps start of comment */
#if PROTOMAIN
#endif
/* saw / *, start of comment */
/* saw the * possibly ending a comment */
/* saw / in / * comment, possible malformed nest */
/* saw / /, start of comment */
/* saw / in / / comment, possible malformed nest */
/* saw * in / /, possible malformed nest */
/* normal identifier -- always a macro candidate */
/* saw ., operator or dbl constant */
/* saw .., possible ... */
/* saw L (possible start of normal wide literal) */
/* saw " or ' beginning literal */
/* saw \ in literal */
/* eat malformed numeric constant */
/* eat malformed numeric fraction|exponent */
/* saw white space, eat it up */
#if !PROTOMAIN
/* quick template */
/* copy QUICK to QUICK+1 through MAC0+1 */
/* quick start state */
/* grab non-macro tokens */
/* grab numeric and invalid tokens */
/* grab exponent token */
/* saw *, grab possible bad comment terminator */
/* saw L (possible start of wide string or first macro char) */
/* macro candidate template */
/* copy MAC0+1 to MAC0+2 through MACN */
/* saw L (possible start of wide string or macro L) */
/* macro hit template */
/* copy HIT0+1 to HIT0+2 through HITN */
/* saw L (possible start of wide literal) */
/* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
/* normal start state */
/* saw 0, possible oct|hex|dec|dbl constant */
/* saw 0<oct>, oct constant */
/* oct constant qualifier */
/* saw 0 [xX], hex constant */
/* hex constant qualifier */
/* hex [eE][-+] botch */
/* hex dbl fraction */
/* optional hex dbl exponent sign */
/* mandatory hex dbl exponent first digit */
/* hex dbl exponent digits */
/* hex dbl constant qualifier */
/* saw <dec>, dec constant */
/* dec constant qualifier */
/* saw ., operator or dbl constant */
/* dbl fraction */
/* optional dbl exponent sign */
/* mandatory dbl exponent first digit */
/* dbl exponent digits */
/* dbl constant qualifier */
/* saw < starting include header */
/* saw <binop><space> expecting = */
/* 2-char ops */
/* 3-char ops */
#endif
/* end */
};
#if PROTOMAIN
#else
#endif
/*
* runtime FSM modifications
* ppfsm(FSM_INIT,0) must be called first
*/
void
{
register int c;
register int n;
register int i;
register short* rp;
#if !PROTOMAIN
char* t;
int x;
#endif
switch (op)
{
#if !PROTOMAIN
case FSM_IDADD:
while (c = *s++)
if (!ppisid(c))
{
{
setid(c);
for (i = 0; i < TERMINAL; i++)
}
}
break;
case FSM_IDDEL:
while (c = *s++)
if (ppisid(c))
{
clrid(c);
for (i = 0; i < TERMINAL; i++)
}
break;
#endif
case FSM_INIT:
{
{
#if !PROTOMAIN
switch (n)
{
case COPY:
copy(i, c);
continue;
default:
break;
}
#endif
break;
}
{
switch (c)
{
case C_XXX:
for (c = 0; c <= MAX; c++)
rp[c] = n;
/*FALLTHROUGH*/
case C_EOF:
continue;
case C_LET:
s = let;
break;
case C_HEX:
s = hex;
break;
case C_DEC:
s = dec;
break;
case C_OCT:
s = oct;
break;
default:
rp[c] = n;
continue;
}
while (c = *s++)
rp[c] = n;
}
}
/*
* install splice special cases
* and same non-terminal transitions
*/
for (i = 0; i < TERMINAL; i++)
{
s = spl;
while (c = *s++)
{
}
for (c = 0; c <= MAX; c++)
if (rp[c] == i)
rp[c] = 0;
}
#if !PROTOMAIN
/*
* default character types
*/
s = let;
while (c = *s++)
setid(c);
s = dec;
while (c = *s++)
setdig(c);
s = spl;
do setsplice(c = *s++); while (c);
/*
* trigraph map
*/
#endif
break;
#if !PROTOMAIN
case FSM_PLUSPLUS:
{
t = "%<:";
for (i = 0; i < TERMINAL; i++)
{
{
s = t;
while (c = *s++)
{
}
}
}
s = t;
while (c = *s++) setsplice(c);
}
else
{
}
break;
#if COMPATIBLE
case FSM_COMPATIBILITY:
{
}
else
{
}
break;
#endif
case FSM_QUOTADD:
while (c = *s++)
for (i = 0; i < TERMINAL; i++)
break;
case FSM_QUOTDEL:
while (c = *s++)
for (i = 0; i < TERMINAL; i++)
break;
case FSM_OPSPACE:
break;
case FSM_MACRO:
{
}
else x = -1;
i = MAC0 + ((c = *s++) != 'L');
{
n = i;
if (!*s) n += NMAC;
}
if (c = *s++)
{
for (;;)
{
if ((i = n) < HIT0)
{
if (n < MACN) n++;
if (!*s)
{
n += NMAC;
break;
}
fsm[i][c] = n;
}
else
{
if (n < HITN) n++;
if (!*s) break;
{
n -= NMAC;
fsm[i][c] = n;
}
}
c = *s++;
}
if (x >= 0)
{
*s = x;
if (ppisidig(n))
n = HITN;
}
if (fsm[i][c] < n)
fsm[i][c] = n;
}
break;
#endif
}
}
#if !PROTOMAIN
/*
* file buffer refill
* c is current input char
*/
void
refill(register int c)
{
{
c = 0;
}
else
{
c =
#if PROTOTYPE
#endif
}
if (c > 0)
{
#if PROTOTYPE
#endif
{
}
}
else
{
if (c < 0)
{
c = 0;
}
{
c = 2;
}
}
#if CHECKPOINT
#endif
debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
}
#endif