da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz* Copyright (c) 1986-2009 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Glenn Fowler
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * AT&T Research
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * preprocessor and proto lexical analyzer fsm
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * define PROTOMAIN for standalone proto
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * lexical FSM encoding
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * derived from a standalone ansi cpp by Dennis Ritchie
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * modified for libpp by Glenn Fowler
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * fsm[] is initialized from fsminit[]. The encoding is blown out into
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * fsm[] for time efficiency. When in state state, and one of the
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * characters in ch arrives, enter nextstate. States >= TERMINAL are
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * either final, or at least require special action. In fsminit[] there
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * is a line for each <state,charset,nextstate>. Early entries are
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * overwritten by later ones. C_XXX is the universal set and should
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * always be first. Some of the fsminit[] entries are templates for
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * groups of states. The OP entries trigger the state copies. States
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * above TERMINAL are represented in fsm[] as negative values. S_TOK and
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * S_TOKB encode the resulting token type in the upper bits. These actions
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * differ in that S_TOKB has a lookahead char.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * fsm[] has three start states:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * PROTO proto (ANSI -> K&R,C++,ANSI)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * QUICK standalone ppcpp()
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * TOKEN tokenizing pplex()
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * If the next state remains the same then the fsm[] transition value is 0.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * fsm[state+1][0] which is ~S_EOB for all states. This preserves the
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * power of 2 fsm[] row size for efficient array indexing. Thanks to
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * D. G. Korn for the last two observations. The pseudo non-terminal state
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * The bit layout is:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * TERM arg SPLICE next
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * 15 14-8 7 6-0
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * NOTE: these must be `control' characters for all native codesets
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto start state */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {va_start} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {return} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {if} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {while} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {else} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {inline} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {do,for,void} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {static} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {extern} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* proto reserved {typedef} */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw /, perhaps start of comment */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw / *, start of comment */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw the * possibly ending a comment */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw / in / * comment, possible malformed nest */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw / /, start of comment */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw / in / / comment, possible malformed nest */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw * in / /, possible malformed nest */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* normal identifier -- always a macro candidate */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw ., operator or dbl constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw .., possible ... */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw L (possible start of normal wide literal) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw " or ' beginning literal */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw \ in literal */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* eat malformed numeric constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* eat malformed numeric fraction|exponent */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw white space, eat it up */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* quick template */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* copy QUICK to QUICK+1 through MAC0+1 */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* quick start state */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* grab non-macro tokens */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* grab numeric and invalid tokens */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* grab exponent token */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw *, grab possible bad comment terminator */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw L (possible start of wide string or first macro char) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* macro candidate template */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* copy MAC0+1 to MAC0+2 through MACN */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw L (possible start of wide string or macro L) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* macro hit template */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* copy HIT0+1 to HIT0+2 through HITN */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw L (possible start of wide literal) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* normal start state */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw 0, possible oct|hex|dec|dbl constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw 0<oct>, oct constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* oct constant qualifier */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw 0 [xX], hex constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* hex constant qualifier */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* hex [eE][-+] botch */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* hex dbl fraction */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* optional hex dbl exponent sign */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* mandatory hex dbl exponent first digit */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* hex dbl exponent digits */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* hex dbl constant qualifier */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw <dec>, dec constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* dec constant qualifier */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw ., operator or dbl constant */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* dbl fraction */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* optional dbl exponent sign */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* mandatory dbl exponent first digit */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* dbl exponent digits */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* dbl constant qualifier */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw < starting include header */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* saw <binop><space> expecting = */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* 2-char ops */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* 3-char ops */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * runtime FSM modifications
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * ppfsm(FSM_INIT,0) must be called first
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int i;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < TERMINAL; i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < TERMINAL; i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin switch (n)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin switch (c)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (c = 0; c <= MAX; c++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /*FALLTHROUGH*/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * install splice special cases
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * and same non-terminal transitions
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < TERMINAL; i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (c = 0; c <= MAX; c++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (rp[c] == i)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * default character types
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin do setsplice(c = *s++); while (c);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * trigraph map
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin t = "%<:";
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < TERMINAL; i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++) setsplice(c);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < TERMINAL; i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (i = 0; i < TERMINAL; i++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else x = -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!*s) n += NMAC;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (c = *s++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((i = n) < HIT0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (n < MACN) n++;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fsm[i][c] = n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (n < HITN) n++;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!*s) break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fsm[i][c] = n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (x >= 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (fsm[i][c] < n)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fsm[i][c] = n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * file buffer refill
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * c is current input char
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (c > 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (c < 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);