/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1986-2009 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*
* preprocessor and proto lexical analyzer fsm
* define PROTOMAIN for standalone proto
*/
#include "pplib.h"
#include "ppfsm.h"
/*
* lexical FSM encoding
* derived from a standalone ansi cpp by Dennis Ritchie
* modified for libpp by Glenn Fowler
*
* fsm[] is initialized from fsminit[]. The encoding is blown out into
* fsm[] for time efficiency. When in state state, and one of the
* characters in ch arrives, enter nextstate. States >= TERMINAL are
* either final, or at least require special action. In fsminit[] there
* is a line for each <state,charset,nextstate>. Early entries are
* overwritten by later ones. C_XXX is the universal set and should
* always be first. Some of the fsminit[] entries are templates for
* groups of states. The OP entries trigger the state copies. States
* above TERMINAL are represented in fsm[] as negative values. S_TOK and
* S_TOKB encode the resulting token type in the upper bits. These actions
* differ in that S_TOKB has a lookahead char.
*
* fsm[] has three start states:
*
* PROTO proto (ANSI -> K&R,C++,ANSI)
* QUICK standalone ppcpp()
* TOKEN tokenizing pplex()
*
* If the next state remains the same then the fsm[] transition value is 0.
* MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
* fsm[state+1][0] which is ~S_EOB for all states. This preserves the
* power of 2 fsm[] row size for efficient array indexing. Thanks to
* D. G. Korn for the last two observations. The pseudo non-terminal state
* fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
*
* The bit layout is:
*
* TERM arg SPLICE next
* 15 14-8 7 6-0
*/
/*
* NOTE: these must be `control' characters for all native codesets
* currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
*/
#define C_DEC 001
#define C_EOF 002
#define C_HEX 003
#define C_LET 021
#define C_OCT 022
#define C_XXX 023
#define OP (-1)
#define END 0
#define COPY 1
#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
struct fsminit /* fsm initialization row */
{
int state; /* if in this state */
unsigned char ch[4]; /* and see one of these */
int nextstate; /* enter this state if <TERMINAL*/
};
static struct fsminit fsminit[] =
{
/* proto start state */
{ PROTO, { C_XXX }, S_CHR, },
{ PROTO, { C_EOF }, S_EOF, },
{ PROTO, { C_DEC }, BAD1, },
{ PROTO, { '.' }, DOT, },
{ PROTO, { C_LET }, NID, },
{ PROTO, { 'L' }, LIT, },
{ PROTO, { 'd', 'e', 'f', 'i' }, RES1, },
{ PROTO, { 'r', 's', 't', 'v' }, RES1, },
{ PROTO, { 'w', 'N' }, RES1, },
{ PROTO, { '"', '\'' }, S_LITBEG, },
{ PROTO, { '/' }, COM1, },
{ PROTO, { '\n' }, S_NL, },
{ PROTO, { ' ','\t','\f','\v' }, WS1, },
/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
{ RES1, { C_XXX }, S_MACRO, },
{ RES1, { C_LET, C_DEC }, NID, },
{ RES1, { 'a' }, RES1a, },
{ RES1, { 'e' }, RES1e, },
{ RES1, { 'f' }, RES1f, },
{ RES1, { 'h' }, RES1h, },
{ RES1, { 'l' }, RES1l, },
{ RES1, { 'n' }, RES1n, },
{ RES1, { 'o' }, RES1o, },
{ RES1, { 't' }, RES1t, },
{ RES1, { 'x' }, RES1x, },
{ RES1, { 'y' }, RES1y, },
/* proto reserved {va_start} */
{ RES1a, { C_XXX }, S_RESERVED, },
{ RES1a, { C_LET, C_DEC }, NID, },
{ RES1a, { '_','s','t','a' }, RES1a, },
{ RES1a, { 'r' }, RES1a, },
/* proto reserved {return} */
{ RES1e, { C_XXX }, S_RESERVED, },
{ RES1e, { C_LET, C_DEC }, NID, },
{ RES1e, { 't','u','r','n' }, RES1e, },
/* proto reserved {if} */
{ RES1f, { C_XXX }, S_RESERVED, },
{ RES1f, { C_LET, C_DEC }, NID, },
/* proto reserved {while} */
{ RES1h, { C_XXX }, S_RESERVED, },
{ RES1h, { C_LET, C_DEC }, NID, },
{ RES1h, { 'i','l','e' }, RES1h, },
/* proto reserved {else} */
{ RES1l, { C_XXX }, S_RESERVED, },
{ RES1l, { C_LET, C_DEC }, NID, },
{ RES1l, { 's','e' }, RES1l, },
/* proto reserved {inline} */
{ RES1n, { C_XXX }, S_RESERVED, },
{ RES1n, { C_LET, C_DEC }, NID, },
{ RES1n, { 'l','i','n','e' }, RES1n, },
/* proto reserved {do,for,void} */
{ RES1o, { C_XXX }, S_RESERVED, },
{ RES1o, { C_LET, C_DEC }, NID, },
{ RES1o, { 'r','i','d','N' }, RES1o, },
/* proto reserved {static} */
{ RES1t, { C_XXX }, S_RESERVED, },
{ RES1t, { C_LET, C_DEC }, NID, },
{ RES1t, { 'a','t','i','c' }, RES1t, },
/* proto reserved {extern} */
{ RES1x, { C_XXX }, S_RESERVED, },
{ RES1x, { C_LET, C_DEC }, NID, },
{ RES1x, { 't','e','r','n' }, RES1x, },
/* proto reserved {typedef} */
{ RES1y, { C_XXX }, S_RESERVED, },
{ RES1y, { C_LET, C_DEC }, NID, },
{ RES1y, { 'p','e','d','f' }, RES1y, },
/* saw /, perhaps start of comment */
{ COM1, { C_XXX }, S_CHRB, },
{ COM1, { '*' }, COM2, },
#if PROTOMAIN
{ COM1, { '/' }, COM5, },
#endif
/* saw / *, start of comment */
{ COM2, { C_XXX }, COM2, },
{ COM2, { '\n', C_EOF }, S_COMMENT, },
{ COM2, { '/' }, COM4, },
{ COM2, { '*' }, COM3, },
{ COM2, { '#', ';', ')' }, QUAL(COM2), },
/* saw the * possibly ending a comment */
{ COM3, { C_XXX }, COM2, },
{ COM3, { '\n', C_EOF }, S_COMMENT, },
{ COM3, { '#', ';', ')' }, QUAL(COM2), },
{ COM3, { '*' }, COM3, },
{ COM3, { '/' }, S_COMMENT, },
/* saw / in / * comment, possible malformed nest */
{ COM4, { C_XXX }, COM2, },
{ COM4, { '*', '\n', C_EOF }, S_COMMENT, },
{ COM4, { '/' }, COM4, },
/* saw / /, start of comment */
{ COM5, { C_XXX }, COM5, },
{ COM5, { '\n', C_EOF }, S_COMMENT, },
{ COM5, { '/' }, COM6, },
{ COM5, { '*' }, COM7, },
/* saw / in / / comment, possible malformed nest */
{ COM6, { C_XXX }, COM5, },
{ COM6, { '*', '\n', C_EOF }, S_COMMENT, },
{ COM6, { '/' }, COM6, },
/* saw * in / /, possible malformed nest */
{ COM7, { C_XXX }, COM5, },
{ COM7, { '\n', C_EOF }, S_COMMENT, },
{ COM7, { '*' }, COM7, },
{ COM7, { '/' }, S_COMMENT, },
/* normal identifier -- always a macro candidate */
{ NID, { C_XXX }, S_MACRO, },
{ NID, { C_LET, C_DEC }, NID, },
/* saw ., operator or dbl constant */
{ DOT, { C_XXX }, S_CHRB, },
{ DOT, { '.' }, DOT2, },
{ DOT, { C_DEC }, BAD1, },
/* saw .., possible ... */
{ DOT2, { C_XXX }, BACK(T_INVALID), },
{ DOT2, { '.' }, KEEP(T_VARIADIC), },
/* saw L (possible start of normal wide literal) */
{ LIT, { C_XXX }, S_MACRO, },
{ LIT, { C_LET, C_DEC }, NID, },
{ LIT, { '"', '\'' }, QUAL(LIT1), },
/* saw " or ' beginning literal */
{ LIT1, { C_XXX }, LIT1, },
{ LIT1, { '"', '\'' }, S_LITEND, },
{ LIT1, { '\n', C_EOF }, S_LITEND, },
{ LIT1, { '\\' }, LIT2, },
/* saw \ in literal */
{ LIT2, { C_XXX }, S_LITESC, },
{ LIT2, { '\n', C_EOF }, S_LITEND, },
/* eat malformed numeric constant */
{ BAD1, { C_XXX }, BACK(T_INVALID), },
{ BAD1, { C_LET, C_DEC, '.' }, BAD1, },
{ BAD1, { 'e', 'E' }, BAD2, },
/* eat malformed numeric fraction|exponent */
{ BAD2, { C_XXX }, BACK(T_INVALID), },
{ BAD2, { C_LET, C_DEC, '.' }, BAD1, },
{ BAD2, { '+', '-' }, BAD1, },
/* saw white space, eat it up */
{ WS1, { C_XXX }, S_WS, },
{ WS1, { ' ', '\t' }, WS1, },
{ WS1, { '\f', '\v' }, S_VS, },
#if !PROTOMAIN
/* quick template */
{ QUICK, { C_XXX }, QTOK, },
{ QUICK, { C_EOF, MARK }, S_CHRB, },
{ QUICK, { C_LET, C_DEC }, QID, },
{ QUICK, { 'L' }, LIT0, },
{ QUICK, { '"', '\'' }, S_LITBEG, },
{ QUICK, { '/' }, S_CHRB, },
{ QUICK, { '*' }, QCOM, },
{ QUICK, { '#' }, SHARP1, },
{ QUICK, { '\n' }, S_NL, },
{ QUICK, { '\f', '\v' }, S_VS, },
/* copy QUICK to QUICK+1 through MAC0+1 */
{ OP, {QUICK,QUICK+1,MAC0+1}, COPY, },
/* quick start state */
{ QUICK, { C_EOF }, S_EOF, },
{ QUICK, { C_DEC }, QNUM, },
{ QUICK, { MARK }, QTOK, },
{ QUICK, { '/' }, COM1, },
{ QUICK, { ' ', '\t' }, QUICK, },
/* grab non-macro tokens */
{ QTOK, { C_DEC }, QNUM, },
/* grab numeric and invalid tokens */
{ QNUM, { C_LET, C_DEC, '.' }, QNUM, },
{ QNUM, { 'e', 'E' }, QEXP, },
/* grab exponent token */
{ QEXP, { C_LET, C_DEC, '.' }, QNUM, },
{ QEXP, { '+', '-' }, QNUM, },
/* saw *, grab possible bad comment terminator */
{ QCOM, { C_DEC }, QNUM, },
{ QCOM, { '/' }, S_COMMENT, },
/* saw L (possible start of wide string or first macro char) */
{ MAC0, { 'L' }, QID, },
{ MAC0, { '"', '\'' }, QUAL(LIT1), },
/* macro candidate template */
{ MAC0+1, { 'L' }, QID, },
/* copy MAC0+1 to MAC0+2 through MACN */
{ OP, {MAC0+1,MAC0+2,MACN}, COPY },
/* saw L (possible start of wide string or macro L) */
{ HIT0, { C_XXX }, S_MACRO, },
{ HIT0, { C_LET, C_DEC }, QID, },
{ HIT0, { '"', '\'' }, QUAL(LIT1), },
/* macro hit template */
{ HIT0+1, { C_XXX }, S_MACRO, },
{ HIT0+1, { C_LET, C_DEC }, QID, },
/* copy HIT0+1 to HIT0+2 through HITN */
{ OP, {HIT0+1,HIT0+2,HITN}, COPY },
/* saw L (possible start of wide literal) */
{ LIT0, { C_XXX }, S_MACRO, },
{ LIT0, { C_LET, C_DEC }, QID, },
{ LIT0, { '"', '\'' }, QUAL(LIT1), },
/* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
{ COM1, { '=' }, KEEP(T_DIVEQ), },
/* normal start state */
{ TOKEN, { C_XXX }, S_HUH, },
{ TOKEN, { C_EOF }, S_EOF, },
{ TOKEN, { C_DEC }, DEC1, },
{ TOKEN, { '0' }, OCT1, },
{ TOKEN, { '.' }, DOT1, },
{ TOKEN, { C_LET }, NID, },
{ TOKEN, { 'L' }, LIT, },
{ TOKEN, { '"', '\'', '<' }, S_LITBEG, },
{ TOKEN, { '/' }, COM1, },
{ TOKEN, { '\n' }, S_NL, },
{ TOKEN, { ' ', '\t' }, WS1, },
{ TOKEN, { '\f', '\v' }, S_VS, },
{ TOKEN, { '#' }, SHARP1, },
{ TOKEN, { ':' }, COLON1, },
{ TOKEN, { '%' }, PCT1, },
{ TOKEN, { '&' }, AND1, },
{ TOKEN, { '*' }, STAR1, },
{ TOKEN, { '+' }, PLUS1, },
{ TOKEN, { '-' }, MINUS1, },
{ TOKEN, { '=' }, EQ1, },
{ TOKEN, { '!' }, NOT1, },
{ TOKEN, { '>' }, GT1, },
{ TOKEN, { '^' }, CIRC1, },
{ TOKEN, { '|' }, OR1, },
{ TOKEN, { '(', ')', '[', ']' }, S_CHR, },
{ TOKEN, { '{', '}', ',', ';' }, S_CHR, },
{ TOKEN, { '~', '?' }, S_CHR, },
/* saw 0, possible oct|hex|dec|dbl constant */
{ OCT1, { C_XXX }, BACK(T_DECIMAL), },
{ OCT1, { C_LET, C_DEC }, BAD1, },
{ OCT1, { C_OCT }, OCT2, },
{ OCT1, { 'e', 'E' }, DBL2, },
{ OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
{ OCT1, { 'x', 'X' }, HEX1, },
{ OCT1, { '.' }, DBL1, },
/* saw 0<oct>, oct constant */
{ OCT2, { C_XXX }, BACK(T_OCTAL), },
{ OCT2, { C_LET, C_DEC }, BAD1, },
{ OCT2, { C_OCT }, OCT2, },
{ OCT2, { 'e', 'E' }, DBL2, },
{ OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
{ OCT2, { '.' }, DBL1, },
/* oct constant qualifier */
{ OCT3, { C_XXX }, BACK(T_OCTAL), },
{ OCT3, { C_LET, C_DEC, '.' }, BAD1, },
{ OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
/* saw 0 [xX], hex constant */
{ HEX1, { C_XXX }, BACK(T_HEXADECIMAL), },
{ HEX1, { C_LET }, BAD1, },
{ HEX1, { C_HEX }, HEX1, },
{ HEX1, { 'e', 'E' }, HEX3, },
{ HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
{ HEX1, { '.' }, HEX4, },
{ HEX1, { 'p', 'P' }, HEX5, },
/* hex constant qualifier */
{ HEX2, { C_XXX }, BACK(T_HEXADECIMAL), },
{ HEX2, { C_LET, C_DEC, '.' }, BAD1, },
{ HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
/* hex [eE][-+] botch */
{ HEX3, { C_XXX }, BACK(T_HEXADECIMAL), },
{ HEX3, { C_LET, '.', '-', '+'},BAD1, },
{ HEX3, { C_HEX }, HEX1, },
{ HEX3, { 'e', 'E' }, HEX3, },
{ HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
/* hex dbl fraction */
{ HEX4, { C_XXX }, BACK(T_HEXDOUBLE), },
{ HEX4, { C_LET, '.' }, BAD1, },
{ HEX4, { C_HEX }, HEX4, },
{ HEX4, { 'p', 'P' }, HEX5, },
{ HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
/* optional hex dbl exponent sign */
{ HEX5, { C_XXX }, BACK(T_INVALID), },
{ HEX5, { C_LET, '.' }, BAD1, },
{ HEX5, { '+', '-' }, HEX6, },
{ HEX5, { C_DEC }, HEX7, },
/* mandatory hex dbl exponent first digit */
{ HEX6, { C_XXX }, BACK(T_INVALID), },
{ HEX6, { C_LET, '.' }, BAD1, },
{ HEX6, { C_DEC }, HEX7, },
/* hex dbl exponent digits */
{ HEX7, { C_XXX }, BACK(T_HEXDOUBLE), },
{ HEX7, { C_LET, '.' }, BAD1, },
{ HEX7, { C_DEC }, HEX7, },
{ HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
/* hex dbl constant qualifier */
{ HEX8, { C_XXX }, BACK(T_HEXDOUBLE), },
{ HEX8, { C_LET, '.' }, BAD1, },
{ HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
/* saw <dec>, dec constant */
{ DEC1, { C_XXX }, BACK(T_DECIMAL), },
{ DEC1, { C_LET }, BAD1, },
{ DEC1, { C_DEC }, DEC1, },
{ DEC1, { 'e', 'E' }, DBL2, },
{ DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
{ DEC1, { '.' }, DBL1, },
/* dec constant qualifier */
{ DEC2, { C_XXX }, BACK(T_DECIMAL), },
{ DEC2, { C_LET, C_DEC }, BAD1, },
{ DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
/* saw ., operator or dbl constant */
{ DOT1, { C_XXX }, S_CHRB, },
{ DOT1, { '.' }, DOT2, },
{ DOT1, { C_DEC }, DBL1, },
/* dbl fraction */
{ DBL1, { C_XXX }, BACK(T_DOUBLE), },
{ DBL1, { C_LET, '.' }, BAD1, },
{ DBL1, { C_DEC }, DBL1, },
{ DBL1, { 'e', 'E' }, DBL2, },
{ DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
/* optional dbl exponent sign */
{ DBL2, { C_XXX }, BACK(T_INVALID), },
{ DBL2, { C_LET, '.' }, BAD1, },
{ DBL2, { '+', '-' }, DBL3, },
{ DBL2, { C_DEC }, DBL4, },
/* mandatory dbl exponent first digit */
{ DBL3, { C_XXX }, BACK(T_INVALID), },
{ DBL3, { C_LET, '.' }, BAD1, },
{ DBL3, { C_DEC }, DBL4, },
/* dbl exponent digits */
{ DBL4, { C_XXX }, BACK(T_DOUBLE), },
{ DBL4, { C_LET, '.' }, BAD1, },
{ DBL4, { C_DEC }, DBL4, },
{ DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
/* dbl constant qualifier */
{ DBL5, { C_XXX }, BACK(T_DOUBLE), },
{ DBL5, { C_LET, '.' }, BAD1, },
{ DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
/* saw < starting include header */
{ HDR1, { C_XXX }, HDR1, },
{ HDR1, { '>', '\n', C_EOF }, S_LITEND, },
/* saw <binop><space> expecting = */
{ BIN1, { C_XXX }, S_HUH, },
{ BIN1, { ' ', '\t' }, BIN1, },
/* 2-char ops */
{ SHARP1, { C_XXX }, S_SHARP, },
{ PCT1, { C_XXX }, S_CHRB, },
{ PCT1, { '=' }, KEEP(T_MODEQ), },
{ AND1, { C_XXX }, S_CHRB, },
{ AND1, { '=' }, KEEP(T_ANDEQ), },
{ AND1, { '&' }, KEEP(T_ANDAND), },
{ STAR1, { C_XXX }, S_CHRB, },
{ STAR1, { '=' }, KEEP(T_MPYEQ), },
{ STAR1, { '/' }, S_COMMENT, },
{ PLUS1, { C_XXX }, S_CHRB, },
{ PLUS1, { '=' }, KEEP(T_ADDEQ), },
{ PLUS1, { '+' }, KEEP(T_ADDADD), },
{ MINUS1, { C_XXX }, S_CHRB, },
{ MINUS1, { '=' }, KEEP(T_SUBEQ), },
{ MINUS1, { '-' }, KEEP(T_SUBSUB), },
{ MINUS1, { '>' }, KEEP(T_PTRMEM), },
{ COLON1, { C_XXX }, S_CHRB, },
{ COLON1, { '=', '>' }, S_HUH, },
{ LT1, { C_XXX }, S_CHRB, },
{ LT1, { '=' }, KEEP(T_LE), },
{ LT1, { '<' }, LSH1, },
{ EQ1, { C_XXX }, S_CHRB, },
{ EQ1, { '=' }, KEEP(T_EQ), },
{ NOT1, { C_XXX }, S_CHRB, },
{ NOT1, { '=' }, KEEP(T_NE), },
{ GT1, { C_XXX }, S_CHRB, },
{ GT1, { '=' }, KEEP(T_GE), },
{ GT1, { '>' }, RSH1, },
{ CIRC1, { C_XXX }, S_CHRB, },
{ CIRC1, { '=' }, KEEP(T_XOREQ), },
{ OR1, { C_XXX }, S_CHRB, },
{ OR1, { '=' }, KEEP(T_OREQ), },
{ OR1, { '|' }, KEEP(T_OROR), },
/* 3-char ops */
{ ARROW1, { C_XXX }, BACK(T_PTRMEM), },
{ ARROW1, { '*' }, KEEP(T_PTRMEMREF), },
{ LSH1, { C_XXX }, BACK(T_LSHIFT), },
{ LSH1, { '=' }, KEEP(T_LSHIFTEQ), },
{ RSH1, { C_XXX }, BACK(T_RSHIFT), },
{ RSH1, { '=' }, KEEP(T_RSHIFTEQ), },
#endif
/* end */
{ OP, { 0 }, END, }
};
short fsm[TERMINAL+1][MAX+1];
char trigraph[MAX+1];
#if PROTOMAIN
static char spl[] = { '\\', '\r', 0 };
static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
#else
static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
#endif
static char* let = &aln[10];
static char hex[] = "fedcbaFEDCBA9876543210";
static char* dec = &hex[12];
static char* oct = &hex[14];
/*
* runtime FSM modifications
* ppfsm(FSM_INIT,0) must be called first
*/
void
ppfsm(int op, register char* s)
{
register int c;
register int n;
register int i;
register short* rp;
register struct fsminit* fp;
#if !PROTOMAIN
char* t;
int x;
#endif
switch (op)
{
#if !PROTOMAIN
case FSM_IDADD:
while (c = *s++)
if (!ppisid(c))
{
if (fsm[TOKEN][c] == ~S_HUH)
{
setid(c);
for (i = 0; i < TERMINAL; i++)
fsm[i][c] = IDSTATE(fsm[i]['_']);
}
else error(2, "%c: cannot add to identifier set", c);
}
break;
case FSM_IDDEL:
while (c = *s++)
if (ppisid(c))
{
clrid(c);
for (i = 0; i < TERMINAL; i++)
fsm[i][c] = ~S_HUH;
}
break;
#endif
case FSM_INIT:
for (fp = fsminit;; fp++)
{
if ((n = fp->nextstate) >= TERMINAL) n = ~n;
if (fp->state == OP)
{
#if !PROTOMAIN
switch (n)
{
case COPY:
c = fp->ch[0];
n = fp->ch[2];
for (i = fp->ch[1]; i <= n; i++)
copy(i, c);
continue;
default:
break;
}
#endif
break;
}
rp = fsm[fp->state];
for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
{
switch (c)
{
case C_XXX:
for (c = 0; c <= MAX; c++)
rp[c] = n;
/*FALLTHROUGH*/
case C_EOF:
fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
continue;
case C_LET:
s = let;
break;
case C_HEX:
s = hex;
break;
case C_DEC:
s = dec;
break;
case C_OCT:
s = oct;
break;
default:
rp[c] = n;
continue;
}
while (c = *s++)
rp[c] = n;
}
}
/*
* install splice special cases
* and same non-terminal transitions
*/
for (i = 0; i < TERMINAL; i++)
{
rp = fsm[i];
s = spl;
while (c = *s++)
if (c != MARK || !INCOMMENT(rp))
{
if (rp[c] >= 0) rp[c] = ~rp[c];
rp[c] &= ~SPLICE;
}
rp[EOB] = ~S_EOB;
for (c = 0; c <= MAX; c++)
if (rp[c] == i)
rp[c] = 0;
}
fsm[TERMINAL][0] = ~S_EOB;
#if !PROTOMAIN
/*
* default character types
*/
s = let;
while (c = *s++)
setid(c);
s = dec;
while (c = *s++)
setdig(c);
s = spl;
do setsplice(c = *s++); while (c);
/*
* trigraph map
*/
trigraph['='] = '#';
trigraph['('] = '[';
trigraph['/'] = '\\';
trigraph[')'] = ']';
trigraph['\''] = '^';
trigraph['<'] = '{';
trigraph['!'] = '|';
trigraph['>'] = '}';
trigraph['-'] = '~';
#endif
break;
#if !PROTOMAIN
case FSM_PLUSPLUS:
if (pp.option & PLUSPLUS)
{
fsm[COLON1][':'] = ~KEEP(T_SCOPE);
fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
fsm[MINUS1]['>'] = ARROW1;
fsm[COM1]['/'] = COM5;
t = "%<:";
for (i = 0; i < TERMINAL; i++)
{
rp = fsm[i];
if (!INCOMMENT(rp) && !INQUOTE(rp))
{
s = t;
while (c = *s++)
{
if (rp[c] > 0) rp[c] = ~rp[c];
else if (!rp[c]) rp[c] = ~i;
rp[c] &= ~SPLICE;
}
}
}
s = t;
while (c = *s++) setsplice(c);
}
else
{
fsm[COLON1][':'] = ~S_CHRB;
fsm[DOT1]['*'] = ~S_CHRB;
fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
}
break;
#if COMPATIBLE
case FSM_COMPATIBILITY:
if (pp.state & COMPATIBILITY)
{
fsm[HEX1]['e'] = HEX1;
fsm[HEX1]['E'] = HEX1;
fsm[QNUM]['e'] = QNUM;
fsm[QNUM]['E'] = QNUM;
fsm[QNUM]['u'] = ~QUAL(QNUM);
fsm[QNUM]['U'] = ~QUAL(QNUM);
}
else
{
fsm[HEX1]['e'] = HEX3;
fsm[HEX1]['E'] = HEX3;
fsm[QNUM]['e'] = QEXP;
fsm[QNUM]['E'] = QEXP;
fsm[QNUM]['u'] = QNUM;
fsm[QNUM]['U'] = QNUM;
}
break;
#endif
case FSM_QUOTADD:
while (c = *s++)
if (fsm[TOKEN][c] == ~S_HUH)
for (i = 0; i < TERMINAL; i++)
fsm[i][c] = fsm[i]['"'];
else error(2, "%c: cannot add to quote set", c);
break;
case FSM_QUOTDEL:
while (c = *s++)
if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
for (i = 0; i < TERMINAL; i++)
fsm[i][c] = fsm[i]['_'];
break;
case FSM_OPSPACE:
n = s ? BIN1 : ~S_CHRB;
fsm[COM1][' '] = fsm[COM1]['\t'] = n;
fsm[AND1][' '] = fsm[AND1]['\t'] = n;
fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
fsm[OR1][' '] = fsm[OR1]['\t'] = n;
fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
break;
case FSM_MACRO:
if (pp.truncate && strlen(s) >= pp.truncate)
{
x = s[pp.truncate];
s[pp.truncate] = 0;
}
else x = -1;
i = MAC0 + ((c = *s++) != 'L');
if ((n = fsm[QUICK][c]) != (i + NMAC))
{
n = i;
if (!*s) n += NMAC;
}
if (fsm[QUICK][c] != n)
fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
if (c = *s++)
{
for (;;)
{
if ((i = n) < HIT0)
{
if (n < MACN) n++;
if (!*s)
{
n += NMAC;
break;
}
if (fsm[i][c] < HIT0)
fsm[i][c] = n;
if (fsm[i + NMAC][c] < HIT0)
fsm[i + NMAC][c] = n;
}
else
{
if (n < HITN) n++;
if (!*s) break;
if (fsm[i][c] < HIT0)
{
n -= NMAC;
fsm[i][c] = n;
}
}
c = *s++;
}
if (x >= 0)
{
*s = x;
for (n = CHAR_MIN; n <= CHAR_MAX; n++)
if (ppisidig(n))
fsm[HITN][n] = HITN;
n = HITN;
}
if (fsm[i][c] < n)
fsm[i][c] = n;
if (i < HIT0 && fsm[i + NMAC][c] < n)
fsm[i + NMAC][c] = n;
}
break;
#endif
}
}
#if !PROTOMAIN
/*
* file buffer refill
* c is current input char
*/
void
refill(register int c)
{
if (pp.in->flags & IN_eof)
{
pp.in->nextchr--;
c = 0;
}
else
{
*((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
c =
#if PROTOTYPE
(pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
#endif
read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
}
if (c > 0)
{
if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
else pp.in->flags &= ~IN_newline;
#if PROTOTYPE
if (!(pp.in->flags & IN_prototype))
#endif
if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
{
pp.in->flags |= IN_eof;
close(pp.in->fd);
pp.in->fd = -1;
}
}
else
{
if (c < 0)
{
error(ERROR_SYSTEM|3, "read error");
c = 0;
}
else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
{
static char ket[] = { 0, '}', '\n', 0 };
pp.in->flags ^= IN_c;
pp.in->nextchr = ket + 1;
c = 2;
}
pp.in->flags |= IN_eof;
}
#if CHECKPOINT
pp.in->buflen = c;
#endif
pp.in->nextchr[c] = 0;
debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
if (pp.test & 0x0080)
sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
}
#endif