ppfsm.c revision 1
1N/A/***********************************************************************
1N/A* *
1N/A* This software is part of the ast package *
1N/A* Copyright (c) 1986-2010 AT&T Intellectual Property *
1N/A* and is licensed under the *
1N/A* Common Public License, Version 1.0 *
1N/A* by AT&T Intellectual Property *
1N/A* *
1N/A* A copy of the License is available at *
1N/A* http://www.opensource.org/licenses/cpl1.0.txt *
1N/A* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
1N/A* *
1N/A* Information and Software Systems Research *
1N/A* AT&T Research *
1N/A* Florham Park NJ *
1N/A* *
1N/A* Glenn Fowler <gsf@research.att.com> *
1N/A* *
1N/A***********************************************************************/
1N/A#pragma prototyped
1N/A/*
1N/A * Glenn Fowler
1N/A * AT&T Research
1N/A *
1N/A * preprocessor and proto lexical analyzer fsm
1N/A * define PROTOMAIN for standalone proto
1N/A */
1N/A
1N/A#include "pplib.h"
1N/A#include "ppfsm.h"
1N/A
1N/A/*
1N/A * lexical FSM encoding
1N/A * derived from a standalone ansi cpp by Dennis Ritchie
1N/A * modified for libpp by Glenn Fowler
1N/A *
1N/A * fsm[] is initialized from fsminit[]. The encoding is blown out into
1N/A * fsm[] for time efficiency. When in state state, and one of the
1N/A * characters in ch arrives, enter nextstate. States >= TERMINAL are
1N/A * either final, or at least require special action. In fsminit[] there
1N/A * is a line for each <state,charset,nextstate>. Early entries are
1N/A * overwritten by later ones. C_XXX is the universal set and should
1N/A * always be first. Some of the fsminit[] entries are templates for
1N/A * groups of states. The OP entries trigger the state copies. States
1N/A * above TERMINAL are represented in fsm[] as negative values. S_TOK and
1N/A * S_TOKB encode the resulting token type in the upper bits. These actions
1N/A * differ in that S_TOKB has a lookahead char.
1N/A *
1N/A * fsm[] has three start states:
1N/A *
1N/A * PROTO proto (ANSI -> K&R,C++,ANSI)
1N/A * QUICK standalone ppcpp()
1N/A * TOKEN tokenizing pplex()
1N/A *
1N/A * If the next state remains the same then the fsm[] transition value is 0.
1N/A * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
1N/A * fsm[state+1][0] which is ~S_EOB for all states. This preserves the
1N/A * power of 2 fsm[] row size for efficient array indexing. Thanks to
1N/A * D. G. Korn for the last two observations. The pseudo non-terminal state
1N/A * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
1N/A *
1N/A * The bit layout is:
1N/A *
1N/A * TERM arg SPLICE next
1N/A * 15 14-8 7 6-0
1N/A */
1N/A
1N/A/*
1N/A * NOTE: these must be `control' characters for all native codesets
1N/A * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
1N/A */
1N/A
1N/A#define C_DEC 001
1N/A#define C_EOF 002
1N/A#define C_HEX 003
1N/A#define C_LET 021
1N/A#define C_OCT 022
1N/A#define C_XXX 023
1N/A
1N/A#define OP (-1)
1N/A#define END 0
1N/A#define COPY 1
1N/A
1N/A#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
1N/A
1N/Astruct fsminit /* fsm initialization row */
1N/A{
1N/A int state; /* if in this state */
1N/A unsigned char ch[4]; /* and see one of these */
1N/A int nextstate; /* enter this state if <TERMINAL*/
1N/A};
1N/A
1N/Astatic struct fsminit fsminit[] =
1N/A{
1N/A /* proto start state */
1N/A { PROTO, { C_XXX }, S_CHR, },
1N/A { PROTO, { C_EOF }, S_EOF, },
1N/A { PROTO, { C_DEC }, BAD1, },
1N/A { PROTO, { '.' }, DOT, },
1N/A { PROTO, { C_LET }, NID, },
1N/A { PROTO, { 'L' }, LIT, },
1N/A { PROTO, { 'd', 'e', 'f', 'i' }, RES1, },
1N/A { PROTO, { 'r', 's', 't', 'v' }, RES1, },
1N/A { PROTO, { 'w', 'N' }, RES1, },
1N/A { PROTO, { '"', '\'' }, S_LITBEG, },
1N/A { PROTO, { '/' }, COM1, },
1N/A { PROTO, { '\n' }, S_NL, },
1N/A { PROTO, { ' ','\t','\f','\v' }, WS1, },
1N/A
1N/A/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
1N/A { RES1, { C_XXX }, S_MACRO, },
1N/A { RES1, { C_LET, C_DEC }, NID, },
1N/A { RES1, { 'a' }, RES1a, },
1N/A { RES1, { 'e' }, RES1e, },
1N/A { RES1, { 'f' }, RES1f, },
1N/A { RES1, { 'h' }, RES1h, },
1N/A { RES1, { 'l' }, RES1l, },
1N/A { RES1, { 'n' }, RES1n, },
1N/A { RES1, { 'o' }, RES1o, },
1N/A { RES1, { 't' }, RES1t, },
1N/A { RES1, { 'x' }, RES1x, },
1N/A { RES1, { 'y' }, RES1y, },
1N/A
1N/A /* proto reserved {va_start} */
1N/A { RES1a, { C_XXX }, S_RESERVED, },
1N/A { RES1a, { C_LET, C_DEC }, NID, },
1N/A { RES1a, { '_','s','t','a' }, RES1a, },
1N/A { RES1a, { 'r' }, RES1a, },
1N/A
1N/A /* proto reserved {return} */
1N/A { RES1e, { C_XXX }, S_RESERVED, },
1N/A { RES1e, { C_LET, C_DEC }, NID, },
1N/A { RES1e, { 't','u','r','n' }, RES1e, },
1N/A
1N/A /* proto reserved {if} */
1N/A { RES1f, { C_XXX }, S_RESERVED, },
1N/A { RES1f, { C_LET, C_DEC }, NID, },
1N/A
1N/A /* proto reserved {while} */
1N/A { RES1h, { C_XXX }, S_RESERVED, },
1N/A { RES1h, { C_LET, C_DEC }, NID, },
1N/A { RES1h, { 'i','l','e' }, RES1h, },
1N/A
1N/A /* proto reserved {else} */
1N/A { RES1l, { C_XXX }, S_RESERVED, },
1N/A { RES1l, { C_LET, C_DEC }, NID, },
1N/A { RES1l, { 's','e' }, RES1l, },
1N/A
1N/A /* proto reserved {inline} */
1N/A { RES1n, { C_XXX }, S_RESERVED, },
1N/A { RES1n, { C_LET, C_DEC }, NID, },
1N/A { RES1n, { 'l','i','n','e' }, RES1n, },
1N/A
1N/A /* proto reserved {do,for,void} */
1N/A { RES1o, { C_XXX }, S_RESERVED, },
1N/A { RES1o, { C_LET, C_DEC }, NID, },
1N/A { RES1o, { 'r','i','d','N' }, RES1o, },
1N/A
1N/A /* proto reserved {static} */
1N/A { RES1t, { C_XXX }, S_RESERVED, },
1N/A { RES1t, { C_LET, C_DEC }, NID, },
1N/A { RES1t, { 'a','t','i','c' }, RES1t, },
1N/A
1N/A /* proto reserved {extern} */
1N/A { RES1x, { C_XXX }, S_RESERVED, },
1N/A { RES1x, { C_LET, C_DEC }, NID, },
1N/A { RES1x, { 't','e','r','n' }, RES1x, },
1N/A
1N/A /* proto reserved {typedef} */
1N/A { RES1y, { C_XXX }, S_RESERVED, },
1N/A { RES1y, { C_LET, C_DEC }, NID, },
1N/A { RES1y, { 'p','e','d','f' }, RES1y, },
1N/A
1N/A /* saw /, perhaps start of comment */
1N/A { COM1, { C_XXX }, S_CHRB, },
1N/A { COM1, { '*' }, COM2, },
1N/A#if PROTOMAIN
1N/A { COM1, { '/' }, COM5, },
1N/A#endif
1N/A
1N/A /* saw / *, start of comment */
1N/A { COM2, { C_XXX }, COM2, },
1N/A { COM2, { '\n', C_EOF }, S_COMMENT, },
1N/A { COM2, { '/' }, COM4, },
1N/A { COM2, { '*' }, COM3, },
1N/A { COM2, { '#', ';', ')' }, QUAL(COM2), },
1N/A
1N/A /* saw the * possibly ending a comment */
1N/A { COM3, { C_XXX }, COM2, },
1N/A { COM3, { '\n', C_EOF }, S_COMMENT, },
1N/A { COM3, { '#', ';', ')' }, QUAL(COM2), },
1N/A { COM3, { '*' }, COM3, },
1N/A { COM3, { '/' }, S_COMMENT, },
1N/A
1N/A /* saw / in / * comment, possible malformed nest */
1N/A { COM4, { C_XXX }, COM2, },
1N/A { COM4, { '*', '\n', C_EOF }, S_COMMENT, },
1N/A { COM4, { '/' }, COM4, },
1N/A
1N/A /* saw / /, start of comment */
1N/A { COM5, { C_XXX }, COM5, },
1N/A { COM5, { '\n', C_EOF }, S_COMMENT, },
1N/A { COM5, { '/' }, COM6, },
1N/A { COM5, { '*' }, COM7, },
1N/A
1N/A /* saw / in / / comment, possible malformed nest */
1N/A { COM6, { C_XXX }, COM5, },
1N/A { COM6, { '*', '\n', C_EOF }, S_COMMENT, },
1N/A { COM6, { '/' }, COM6, },
1N/A
1N/A /* saw * in / /, possible malformed nest */
1N/A { COM7, { C_XXX }, COM5, },
1N/A { COM7, { '\n', C_EOF }, S_COMMENT, },
1N/A { COM7, { '*' }, COM7, },
1N/A { COM7, { '/' }, S_COMMENT, },
1N/A
1N/A /* normal identifier -- always a macro candidate */
1N/A { NID, { C_XXX }, S_MACRO, },
1N/A { NID, { C_LET, C_DEC }, NID, },
1N/A
1N/A /* saw ., operator or dbl constant */
1N/A { DOT, { C_XXX }, S_CHRB, },
1N/A { DOT, { '.' }, DOT2, },
1N/A { DOT, { C_DEC }, BAD1, },
1N/A
1N/A /* saw .., possible ... */
1N/A { DOT2, { C_XXX }, BACK(T_INVALID), },
1N/A { DOT2, { '.' }, KEEP(T_VARIADIC), },
1N/A
1N/A /* saw L (possible start of normal wide literal) */
1N/A { LIT, { C_XXX }, S_MACRO, },
1N/A { LIT, { C_LET, C_DEC }, NID, },
1N/A { LIT, { '"', '\'' }, QUAL(LIT1), },
1N/A
1N/A /* saw " or ' beginning literal */
1N/A { LIT1, { C_XXX }, LIT1, },
1N/A { LIT1, { '"', '\'' }, S_LITEND, },
1N/A { LIT1, { '\n', C_EOF }, S_LITEND, },
1N/A { LIT1, { '\\' }, LIT2, },
1N/A
1N/A /* saw \ in literal */
1N/A { LIT2, { C_XXX }, S_LITESC, },
1N/A { LIT2, { '\n', C_EOF }, S_LITEND, },
1N/A
1N/A /* eat malformed numeric constant */
1N/A { BAD1, { C_XXX }, BACK(T_INVALID), },
1N/A { BAD1, { C_LET, C_DEC, '.' }, BAD1, },
1N/A { BAD1, { 'e', 'E' }, BAD2, },
1N/A
1N/A /* eat malformed numeric fraction|exponent */
1N/A { BAD2, { C_XXX }, BACK(T_INVALID), },
1N/A { BAD2, { C_LET, C_DEC, '.' }, BAD1, },
1N/A { BAD2, { '+', '-' }, BAD1, },
1N/A
1N/A /* saw white space, eat it up */
1N/A { WS1, { C_XXX }, S_WS, },
1N/A { WS1, { ' ', '\t' }, WS1, },
1N/A { WS1, { '\f', '\v' }, S_VS, },
1N/A
1N/A#if !PROTOMAIN
1N/A
1N/A /* quick template */
1N/A { QUICK, { C_XXX }, QTOK, },
1N/A { QUICK, { C_EOF, MARK }, S_CHRB, },
1N/A { QUICK, { C_LET, C_DEC }, QID, },
1N/A { QUICK, { 'L' }, LIT0, },
1N/A { QUICK, { '"', '\'' }, S_LITBEG, },
1N/A { QUICK, { '/' }, S_CHRB, },
1N/A { QUICK, { '*' }, QCOM, },
1N/A { QUICK, { '#' }, SHARP1, },
1N/A { QUICK, { '\n' }, S_NL, },
1N/A { QUICK, { '\f', '\v' }, S_VS, },
1N/A
1N/A /* copy QUICK to QUICK+1 through MAC0+1 */
1N/A { OP, {QUICK,QUICK+1,MAC0+1}, COPY, },
1N/A
1N/A /* quick start state */
1N/A { QUICK, { C_EOF }, S_EOF, },
1N/A { QUICK, { C_DEC }, QNUM, },
1N/A { QUICK, { MARK }, QTOK, },
1N/A { QUICK, { '/' }, COM1, },
1N/A { QUICK, { ' ', '\t' }, QUICK, },
1N/A
1N/A /* grab non-macro tokens */
1N/A { QTOK, { C_DEC }, QNUM, },
1N/A
1N/A /* grab numeric and invalid tokens */
1N/A { QNUM, { C_LET, C_DEC, '.' }, QNUM, },
1N/A { QNUM, { 'e', 'E' }, QEXP, },
1N/A
1N/A /* grab exponent token */
1N/A { QEXP, { C_LET, C_DEC, '.' }, QNUM, },
1N/A { QEXP, { '+', '-' }, QNUM, },
1N/A
1N/A /* saw *, grab possible bad comment terminator */
1N/A { QCOM, { C_DEC }, QNUM, },
1N/A { QCOM, { '/' }, S_COMMENT, },
1N/A
1N/A /* saw L (possible start of wide string or first macro char) */
1N/A { MAC0, { 'L' }, QID, },
1N/A { MAC0, { '"', '\'' }, QUAL(LIT1), },
1N/A
1N/A /* macro candidate template */
1N/A { MAC0+1, { 'L' }, QID, },
1N/A
1N/A /* copy MAC0+1 to MAC0+2 through MACN */
1N/A { OP, {MAC0+1,MAC0+2,MACN}, COPY },
1N/A
1N/A /* saw L (possible start of wide string or macro L) */
1N/A { HIT0, { C_XXX }, S_MACRO, },
1N/A { HIT0, { C_LET, C_DEC }, QID, },
1N/A { HIT0, { '"', '\'' }, QUAL(LIT1), },
1N/A
1N/A /* macro hit template */
1N/A { HIT0+1, { C_XXX }, S_MACRO, },
1N/A { HIT0+1, { C_LET, C_DEC }, QID, },
1N/A
1N/A /* copy HIT0+1 to HIT0+2 through HITN */
1N/A { OP, {HIT0+1,HIT0+2,HITN}, COPY },
1N/A
1N/A /* saw L (possible start of wide literal) */
1N/A { LIT0, { C_XXX }, S_MACRO, },
1N/A { LIT0, { C_LET, C_DEC }, QID, },
1N/A { LIT0, { '"', '\'' }, QUAL(LIT1), },
1N/A
1N/A /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
1N/A { COM1, { '=' }, KEEP(T_DIVEQ), },
1N/A
1N/A /* normal start state */
1N/A { TOKEN, { C_XXX }, S_HUH, },
1N/A { TOKEN, { C_EOF }, S_EOF, },
1N/A { TOKEN, { C_DEC }, DEC1, },
1N/A { TOKEN, { '0' }, OCT1, },
1N/A { TOKEN, { '.' }, DOT1, },
1N/A { TOKEN, { C_LET }, NID, },
1N/A { TOKEN, { 'L' }, LIT, },
1N/A { TOKEN, { '"', '\'', '<' }, S_LITBEG, },
1N/A { TOKEN, { '/' }, COM1, },
1N/A { TOKEN, { '\n' }, S_NL, },
1N/A { TOKEN, { ' ', '\t' }, WS1, },
1N/A { TOKEN, { '\f', '\v' }, S_VS, },
1N/A { TOKEN, { '#' }, SHARP1, },
1N/A { TOKEN, { ':' }, COLON1, },
1N/A { TOKEN, { '%' }, PCT1, },
1N/A { TOKEN, { '&' }, AND1, },
1N/A { TOKEN, { '*' }, STAR1, },
1N/A { TOKEN, { '+' }, PLUS1, },
1N/A { TOKEN, { '-' }, MINUS1, },
1N/A { TOKEN, { '=' }, EQ1, },
1N/A { TOKEN, { '!' }, NOT1, },
1N/A { TOKEN, { '>' }, GT1, },
1N/A { TOKEN, { '^' }, CIRC1, },
1N/A { TOKEN, { '|' }, OR1, },
1N/A { TOKEN, { '(', ')', '[', ']' }, S_CHR, },
1N/A { TOKEN, { '{', '}', ',', ';' }, S_CHR, },
1N/A { TOKEN, { '~', '?' }, S_CHR, },
1N/A
1N/A /* saw 0, possible oct|hex|dec|dbl constant */
1N/A { OCT1, { C_XXX }, BACK(T_DECIMAL), },
1N/A { OCT1, { C_LET, C_DEC }, BAD1, },
1N/A { OCT1, { C_OCT }, OCT2, },
1N/A { OCT1, { 'e', 'E' }, DBL2, },
1N/A { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
1N/A { OCT1, { 'x', 'X' }, HEX1, },
1N/A { OCT1, { '.' }, DBL1, },
1N/A
1N/A /* saw 0<oct>, oct constant */
1N/A { OCT2, { C_XXX }, BACK(T_OCTAL), },
1N/A { OCT2, { C_LET, C_DEC }, BAD1, },
1N/A { OCT2, { C_OCT }, OCT2, },
1N/A { OCT2, { 'e', 'E' }, DBL2, },
1N/A { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
1N/A { OCT2, { '.' }, DBL1, },
1N/A
1N/A /* oct constant qualifier */
1N/A { OCT3, { C_XXX }, BACK(T_OCTAL), },
1N/A { OCT3, { C_LET, C_DEC, '.' }, BAD1, },
1N/A { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
1N/A
1N/A /* saw 0 [xX], hex constant */
1N/A { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), },
1N/A { HEX1, { C_LET }, BAD1, },
1N/A { HEX1, { C_HEX }, HEX1, },
1N/A { HEX1, { 'e', 'E' }, HEX3, },
1N/A { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
1N/A { HEX1, { '.' }, HEX4, },
1N/A { HEX1, { 'p', 'P' }, HEX5, },
1N/A
1N/A /* hex constant qualifier */
1N/A { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), },
1N/A { HEX2, { C_LET, C_DEC, '.' }, BAD1, },
1N/A { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
1N/A
1N/A /* hex [eE][-+] botch */
1N/A { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), },
1N/A { HEX3, { C_LET, '.', '-', '+'},BAD1, },
1N/A { HEX3, { C_HEX }, HEX1, },
1N/A { HEX3, { 'e', 'E' }, HEX3, },
1N/A { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
1N/A
1N/A /* hex dbl fraction */
1N/A { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), },
1N/A { HEX4, { C_LET, '.' }, BAD1, },
1N/A { HEX4, { C_HEX }, HEX4, },
1N/A { HEX4, { 'p', 'P' }, HEX5, },
1N/A { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
1N/A
1N/A /* optional hex dbl exponent sign */
1N/A { HEX5, { C_XXX }, BACK(T_INVALID), },
1N/A { HEX5, { C_LET, '.' }, BAD1, },
1N/A { HEX5, { '+', '-' }, HEX6, },
1N/A { HEX5, { C_DEC }, HEX7, },
1N/A
1N/A /* mandatory hex dbl exponent first digit */
1N/A { HEX6, { C_XXX }, BACK(T_INVALID), },
1N/A { HEX6, { C_LET, '.' }, BAD1, },
1N/A { HEX6, { C_DEC }, HEX7, },
1N/A
1N/A /* hex dbl exponent digits */
1N/A { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), },
1N/A { HEX7, { C_LET, '.' }, BAD1, },
1N/A { HEX7, { C_DEC }, HEX7, },
1N/A { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
1N/A
1N/A /* hex dbl constant qualifier */
1N/A { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), },
1N/A { HEX8, { C_LET, '.' }, BAD1, },
1N/A { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
1N/A
1N/A /* saw <dec>, dec constant */
1N/A { DEC1, { C_XXX }, BACK(T_DECIMAL), },
1N/A { DEC1, { C_LET }, BAD1, },
1N/A { DEC1, { C_DEC }, DEC1, },
1N/A { DEC1, { 'e', 'E' }, DBL2, },
1N/A { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
1N/A { DEC1, { '.' }, DBL1, },
1N/A
1N/A /* dec constant qualifier */
1N/A { DEC2, { C_XXX }, BACK(T_DECIMAL), },
1N/A { DEC2, { C_LET, C_DEC }, BAD1, },
1N/A { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
1N/A
1N/A /* saw ., operator or dbl constant */
1N/A { DOT1, { C_XXX }, S_CHRB, },
1N/A { DOT1, { '.' }, DOT2, },
1N/A { DOT1, { C_DEC }, DBL1, },
1N/A
1N/A /* dbl fraction */
1N/A { DBL1, { C_XXX }, BACK(T_DOUBLE), },
1N/A { DBL1, { C_LET, '.' }, BAD1, },
1N/A { DBL1, { C_DEC }, DBL1, },
1N/A { DBL1, { 'e', 'E' }, DBL2, },
1N/A { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
1N/A
1N/A /* optional dbl exponent sign */
1N/A { DBL2, { C_XXX }, BACK(T_INVALID), },
1N/A { DBL2, { C_LET, '.' }, BAD1, },
1N/A { DBL2, { '+', '-' }, DBL3, },
1N/A { DBL2, { C_DEC }, DBL4, },
1N/A
1N/A /* mandatory dbl exponent first digit */
1N/A { DBL3, { C_XXX }, BACK(T_INVALID), },
1N/A { DBL3, { C_LET, '.' }, BAD1, },
1N/A { DBL3, { C_DEC }, DBL4, },
1N/A
1N/A /* dbl exponent digits */
1N/A { DBL4, { C_XXX }, BACK(T_DOUBLE), },
1N/A { DBL4, { C_LET, '.' }, BAD1, },
1N/A { DBL4, { C_DEC }, DBL4, },
1N/A { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
1N/A
1N/A /* dbl constant qualifier */
1N/A { DBL5, { C_XXX }, BACK(T_DOUBLE), },
1N/A { DBL5, { C_LET, '.' }, BAD1, },
1N/A { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
1N/A
1N/A /* saw < starting include header */
1N/A { HDR1, { C_XXX }, HDR1, },
1N/A { HDR1, { '>', '\n', C_EOF }, S_LITEND, },
1N/A
1N/A /* saw <binop><space> expecting = */
1N/A { BIN1, { C_XXX }, S_HUH, },
1N/A { BIN1, { ' ', '\t' }, BIN1, },
1N/A
1N/A /* 2-char ops */
1N/A
1N/A { SHARP1, { C_XXX }, S_SHARP, },
1N/A
1N/A { PCT1, { C_XXX }, S_CHRB, },
1N/A { PCT1, { '=' }, KEEP(T_MODEQ), },
1N/A
1N/A { AND1, { C_XXX }, S_CHRB, },
1N/A { AND1, { '=' }, KEEP(T_ANDEQ), },
1N/A { AND1, { '&' }, KEEP(T_ANDAND), },
1N/A
1N/A { STAR1, { C_XXX }, S_CHRB, },
1N/A { STAR1, { '=' }, KEEP(T_MPYEQ), },
1N/A { STAR1, { '/' }, S_COMMENT, },
1N/A
1N/A { PLUS1, { C_XXX }, S_CHRB, },
1N/A { PLUS1, { '=' }, KEEP(T_ADDEQ), },
1N/A { PLUS1, { '+' }, KEEP(T_ADDADD), },
1N/A
1N/A { MINUS1, { C_XXX }, S_CHRB, },
1N/A { MINUS1, { '=' }, KEEP(T_SUBEQ), },
1N/A { MINUS1, { '-' }, KEEP(T_SUBSUB), },
1N/A { MINUS1, { '>' }, KEEP(T_PTRMEM), },
1N/A
1N/A { COLON1, { C_XXX }, S_CHRB, },
1N/A { COLON1, { '=', '>' }, S_HUH, },
1N/A
1N/A { LT1, { C_XXX }, S_CHRB, },
1N/A { LT1, { '=' }, KEEP(T_LE), },
1N/A { LT1, { '<' }, LSH1, },
1N/A
1N/A { EQ1, { C_XXX }, S_CHRB, },
1N/A { EQ1, { '=' }, KEEP(T_EQ), },
1N/A
1N/A { NOT1, { C_XXX }, S_CHRB, },
1N/A { NOT1, { '=' }, KEEP(T_NE), },
1N/A
1N/A { GT1, { C_XXX }, S_CHRB, },
1N/A { GT1, { '=' }, KEEP(T_GE), },
1N/A { GT1, { '>' }, RSH1, },
1N/A
1N/A { CIRC1, { C_XXX }, S_CHRB, },
1N/A { CIRC1, { '=' }, KEEP(T_XOREQ), },
1N/A
1N/A { OR1, { C_XXX }, S_CHRB, },
1N/A { OR1, { '=' }, KEEP(T_OREQ), },
1N/A { OR1, { '|' }, KEEP(T_OROR), },
1N/A
1N/A /* 3-char ops */
1N/A
1N/A { ARROW1, { C_XXX }, BACK(T_PTRMEM), },
1N/A { ARROW1, { '*' }, KEEP(T_PTRMEMREF), },
1N/A
1N/A { LSH1, { C_XXX }, BACK(T_LSHIFT), },
1N/A { LSH1, { '=' }, KEEP(T_LSHIFTEQ), },
1N/A
1N/A { RSH1, { C_XXX }, BACK(T_RSHIFT), },
1N/A { RSH1, { '=' }, KEEP(T_RSHIFTEQ), },
1N/A
1N/A#endif
1N/A
1N/A /* end */
1N/A { OP, { 0 }, END, }
1N/A};
1N/A
1N/Ashort fsm[TERMINAL+1][MAX+1];
1N/A
1N/Achar trigraph[MAX+1];
1N/A
1N/A#if PROTOMAIN
1N/Astatic char spl[] = { '\\', '\r', 0 };
1N/Astatic char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
1N/A#else
1N/Astatic char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
1N/Astatic char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
1N/A#endif
1N/Astatic char* let = &aln[10];
1N/Astatic char hex[] = "fedcbaFEDCBA9876543210";
1N/Astatic char* dec = &hex[12];
1N/Astatic char* oct = &hex[14];
1N/A
1N/A/*
1N/A * runtime FSM modifications
1N/A * ppfsm(FSM_INIT,0) must be called first
1N/A */
1N/A
1N/Avoid
1N/Appfsm(int op, register char* s)
1N/A{
1N/A register int c;
1N/A register int n;
1N/A register int i;
1N/A register short* rp;
1N/A register struct fsminit* fp;
1N/A#if !PROTOMAIN
1N/A char* t;
1N/A int x;
1N/A#endif
1N/A
1N/A switch (op)
1N/A {
1N/A
1N/A#if !PROTOMAIN
1N/A
1N/A case FSM_IDADD:
1N/A while (c = *s++)
1N/A if (!ppisid(c))
1N/A {
1N/A if (fsm[TOKEN][c] == ~S_HUH)
1N/A {
1N/A setid(c);
1N/A for (i = 0; i < TERMINAL; i++)
1N/A fsm[i][c] = IDSTATE(fsm[i]['_']);
1N/A }
1N/A else error(2, "%c: cannot add to identifier set", c);
1N/A }
1N/A break;
1N/A
1N/A case FSM_IDDEL:
1N/A while (c = *s++)
1N/A if (ppisid(c))
1N/A {
1N/A clrid(c);
1N/A for (i = 0; i < TERMINAL; i++)
1N/A fsm[i][c] = ~S_HUH;
1N/A }
1N/A break;
1N/A
1N/A#endif
1N/A
1N/A case FSM_INIT:
1N/A for (fp = fsminit;; fp++)
1N/A {
1N/A if ((n = fp->nextstate) >= TERMINAL) n = ~n;
1N/A if (fp->state == OP)
1N/A {
1N/A#if !PROTOMAIN
1N/A switch (n)
1N/A {
1N/A case COPY:
1N/A c = fp->ch[0];
1N/A n = fp->ch[2];
1N/A for (i = fp->ch[1]; i <= n; i++)
1N/A copy(i, c);
1N/A continue;
1N/A default:
1N/A break;
1N/A }
1N/A#endif
1N/A break;
1N/A }
1N/A rp = fsm[fp->state];
1N/A for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
1N/A {
1N/A switch (c)
1N/A {
1N/A case C_XXX:
1N/A for (c = 0; c <= MAX; c++)
1N/A rp[c] = n;
1N/A /*FALLTHROUGH*/
1N/A
1N/A case C_EOF:
1N/A fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
1N/A continue;
1N/A
1N/A case C_LET:
1N/A s = let;
1N/A break;
1N/A
1N/A case C_HEX:
1N/A s = hex;
1N/A break;
1N/A
1N/A case C_DEC:
1N/A s = dec;
1N/A break;
1N/A
1N/A case C_OCT:
1N/A s = oct;
1N/A break;
1N/A
1N/A default:
1N/A rp[c] = n;
1N/A continue;
1N/A }
1N/A while (c = *s++)
1N/A rp[c] = n;
1N/A }
1N/A }
1N/A
1N/A /*
1N/A * install splice special cases
1N/A * and same non-terminal transitions
1N/A */
1N/A
1N/A for (i = 0; i < TERMINAL; i++)
1N/A {
1N/A rp = fsm[i];
1N/A s = spl;
1N/A while (c = *s++)
1N/A if (c != MARK || !INCOMMENT(rp))
1N/A {
1N/A if (rp[c] >= 0) rp[c] = ~rp[c];
1N/A rp[c] &= ~SPLICE;
1N/A }
1N/A rp[EOB] = ~S_EOB;
1N/A for (c = 0; c <= MAX; c++)
1N/A if (rp[c] == i)
1N/A rp[c] = 0;
1N/A }
1N/A fsm[TERMINAL][0] = ~S_EOB;
1N/A
1N/A#if !PROTOMAIN
1N/A
1N/A /*
1N/A * default character types
1N/A */
1N/A
1N/A s = let;
1N/A while (c = *s++)
1N/A setid(c);
1N/A s = dec;
1N/A while (c = *s++)
1N/A setdig(c);
1N/A s = spl;
1N/A do setsplice(c = *s++); while (c);
1N/A
1N/A /*
1N/A * trigraph map
1N/A */
1N/A
1N/A trigraph['='] = '#';
1N/A trigraph['('] = '[';
1N/A trigraph['/'] = '\\';
1N/A trigraph[')'] = ']';
1N/A trigraph['\''] = '^';
1N/A trigraph['<'] = '{';
1N/A trigraph['!'] = '|';
1N/A trigraph['>'] = '}';
1N/A trigraph['-'] = '~';
1N/A#endif
1N/A break;
1N/A
1N/A#if !PROTOMAIN
1N/A
1N/A case FSM_PLUSPLUS:
1N/A if (pp.option & PLUSPLUS)
1N/A {
1N/A fsm[COLON1][':'] = ~KEEP(T_SCOPE);
1N/A fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
1N/A fsm[MINUS1]['>'] = ARROW1;
1N/A fsm[COM1]['/'] = COM5;
1N/A t = "%<:";
1N/A for (i = 0; i < TERMINAL; i++)
1N/A {
1N/A rp = fsm[i];
1N/A if (!INCOMMENT(rp) && !INQUOTE(rp))
1N/A {
1N/A s = t;
1N/A while (c = *s++)
1N/A {
1N/A if (rp[c] > 0) rp[c] = ~rp[c];
1N/A else if (!rp[c]) rp[c] = ~i;
1N/A rp[c] &= ~SPLICE;
1N/A }
1N/A }
1N/A }
1N/A s = t;
1N/A while (c = *s++) setsplice(c);
1N/A }
1N/A else
1N/A {
1N/A fsm[COLON1][':'] = ~S_CHRB;
1N/A fsm[DOT1]['*'] = ~S_CHRB;
1N/A fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
1N/A fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
1N/A }
1N/A break;
1N/A
1N/A#if COMPATIBLE
1N/A
1N/A case FSM_COMPATIBILITY:
1N/A if (pp.state & COMPATIBILITY)
1N/A {
1N/A fsm[HEX1]['e'] = HEX1;
1N/A fsm[HEX1]['E'] = HEX1;
1N/A fsm[QNUM]['e'] = QNUM;
1N/A fsm[QNUM]['E'] = QNUM;
1N/A fsm[QNUM]['u'] = ~QUAL(QNUM);
1N/A fsm[QNUM]['U'] = ~QUAL(QNUM);
1N/A }
1N/A else
1N/A {
1N/A fsm[HEX1]['e'] = HEX3;
1N/A fsm[HEX1]['E'] = HEX3;
1N/A fsm[QNUM]['e'] = QEXP;
1N/A fsm[QNUM]['E'] = QEXP;
1N/A fsm[QNUM]['u'] = QNUM;
1N/A fsm[QNUM]['U'] = QNUM;
1N/A }
1N/A break;
1N/A
1N/A#endif
1N/A
1N/A case FSM_QUOTADD:
1N/A while (c = *s++)
1N/A if (fsm[TOKEN][c] == ~S_HUH)
1N/A for (i = 0; i < TERMINAL; i++)
1N/A fsm[i][c] = fsm[i]['"'];
1N/A else error(2, "%c: cannot add to quote set", c);
1N/A break;
1N/A
1N/A case FSM_QUOTDEL:
1N/A while (c = *s++)
1N/A if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
1N/A for (i = 0; i < TERMINAL; i++)
1N/A fsm[i][c] = fsm[i]['_'];
1N/A break;
1N/A
1N/A case FSM_OPSPACE:
1N/A n = s ? BIN1 : ~S_CHRB;
1N/A fsm[COM1][' '] = fsm[COM1]['\t'] = n;
1N/A fsm[AND1][' '] = fsm[AND1]['\t'] = n;
1N/A fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
1N/A fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
1N/A fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
1N/A fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
1N/A fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
1N/A fsm[OR1][' '] = fsm[OR1]['\t'] = n;
1N/A fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
1N/A fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
1N/A break;
1N/A
1N/A case FSM_MACRO:
1N/A if (pp.truncate && strlen(s) >= pp.truncate)
1N/A {
1N/A x = s[pp.truncate];
1N/A s[pp.truncate] = 0;
1N/A }
1N/A else x = -1;
1N/A i = MAC0 + ((c = *s++) != 'L');
1N/A if ((n = fsm[QUICK][c]) != (i + NMAC))
1N/A {
1N/A n = i;
1N/A if (!*s) n += NMAC;
1N/A }
1N/A if (fsm[QUICK][c] != n)
1N/A fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
1N/A if (c = *s++)
1N/A {
1N/A for (;;)
1N/A {
1N/A if ((i = n) < HIT0)
1N/A {
1N/A if (n < MACN) n++;
1N/A if (!*s)
1N/A {
1N/A n += NMAC;
1N/A break;
1N/A }
1N/A if (fsm[i][c] < HIT0)
1N/A fsm[i][c] = n;
1N/A if (fsm[i + NMAC][c] < HIT0)
1N/A fsm[i + NMAC][c] = n;
1N/A }
1N/A else
1N/A {
1N/A if (n < HITN) n++;
1N/A if (!*s) break;
1N/A if (fsm[i][c] < HIT0)
1N/A {
1N/A n -= NMAC;
1N/A fsm[i][c] = n;
1N/A }
1N/A }
1N/A c = *s++;
1N/A }
1N/A if (x >= 0)
1N/A {
1N/A *s = x;
1N/A for (n = CHAR_MIN; n <= CHAR_MAX; n++)
1N/A if (ppisidig(n))
1N/A fsm[HITN][n] = HITN;
1N/A n = HITN;
1N/A }
1N/A if (fsm[i][c] < n)
1N/A fsm[i][c] = n;
1N/A if (i < HIT0 && fsm[i + NMAC][c] < n)
1N/A fsm[i + NMAC][c] = n;
1N/A }
1N/A break;
1N/A
1N/A#endif
1N/A
1N/A }
1N/A}
1N/A
1N/A#if !PROTOMAIN
1N/A
1N/A/*
1N/A * file buffer refill
1N/A * c is current input char
1N/A */
1N/A
1N/Avoid
1N/Arefill(register int c)
1N/A{
1N/A if (pp.in->flags & IN_eof)
1N/A {
1N/A pp.in->nextchr--;
1N/A c = 0;
1N/A }
1N/A else
1N/A {
1N/A *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
1N/A c =
1N/A#if PROTOTYPE
1N/A (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
1N/A#endif
1N/A read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
1N/A }
1N/A if (c > 0)
1N/A {
1N/A if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
1N/A else pp.in->flags &= ~IN_newline;
1N/A#if PROTOTYPE
1N/A if (!(pp.in->flags & IN_prototype))
1N/A#endif
1N/A if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
1N/A {
1N/A pp.in->flags |= IN_eof;
1N/A close(pp.in->fd);
1N/A pp.in->fd = -1;
1N/A }
1N/A }
1N/A else
1N/A {
1N/A if (c < 0)
1N/A {
1N/A error(ERROR_SYSTEM|3, "read error");
1N/A c = 0;
1N/A }
1N/A else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
1N/A {
1N/A static char ket[] = { 0, '}', '\n', 0 };
1N/A
1N/A pp.in->flags ^= IN_c;
1N/A pp.in->nextchr = ket + 1;
1N/A c = 2;
1N/A }
1N/A pp.in->flags |= IN_eof;
1N/A }
1N/A#if CHECKPOINT
1N/A pp.in->buflen = c;
1N/A#endif
1N/A pp.in->nextchr[c] = 0;
1N/A debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
1N/A if (pp.test & 0x0080)
1N/A sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
1N/A}
1N/A
1N/A#endif