/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1982-2012 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* KornShell lexical analyzer
*
* Written by David Korn
* AT&T Labs
*
*/
#include <ast.h>
#include <stak.h>
#include <fcin.h>
#include <nval.h>
#if KSHELL
# include "defs.h"
#else
# include <shell.h>
#endif /* KSHELL */
#include "argnod.h"
#include "test.h"
#include "lexstates.h"
#include "io.h"
#if _lib_iswblank < 0 /* set in lexstates.h to enable this code */
int
{
static int initialized;
if (!initialized)
{
initialized = 1;
}
}
#endif
/*
* This structure allows for arbitrary depth nesting of (...), {...}, [...]
*/
struct lexstate
{
};
struct lexdata
{
char nocopy;
char paren;
char dolparen;
char nest;
char docword;
char nested_tilde;
char *docend;
char noarg;
char balance;
char warn;
char message;
char arith;
char *first;
int level;
int lastc;
int lex_max;
int *lex_match;
int lex_state;
int docextra;
#if SHOPT_KIA
#endif
};
#define _SHLEX_PRIVATE \
#include "shlex.h"
#ifdef SF_BUFCONST
#else
#endif
static void nested_here(Lex_t*);
static int stack_grow(Lex_t*);
#if SHOPT_KIA
{
unsigned long r;
{
r=kiaentity(lp,lp->lexd.first+lp->lexd.kiaoff+type,off-lp->lexd.kiaoff,'v',-1,-1,lp->current,'v',0,"");
}
else
{
{
/* variable starts on stak, copy remainder */
}
else
{
/* variable in data buffer */
}
}
}
#endif /* SHOPT_KIA */
/*
* This routine gets called when reading across a buffer boundary
* If lexd.nocopy is off, then current token is saved on the stack
*/
{
#if KSHELL
/* write to history file and to stderr if necessary */
{
if(sh_isstate(SH_VERBOSE))
}
#endif
return;
{
else
}
{
#if SHOPT_KIA
#endif /* SHOPT_KIA */
}
{
}
}
/*
* fill up another input buffer
* preserves lexical state
*/
{
register int c;
c = fcfill();
if(ap)
if(fcfile() || c)
{
}
return(c);
}
/*
* mode=1 for reinitialization
*/
{
if(!lp)
{
}
lp->comp_assign = 0;
if(!mode)
{
}
return(lp);
}
#ifdef DBUG
{
register int flag;
{
macro = "macro:";
expand = "expand:";
if(flag&ARG_QUOTED)
quoted = "quoted:";
}
return(tok);
}
#endif
/*
* Get the next word and put it on the top of the stak
* A pointer to the current word is stored in lp->arg
* Returns the token type
*/
{
register const char *state;
int epatchar=0;
#if SHOPT_MULTIBYTE
LEN=1;
#endif /* SHOPT_MULTIBYTE */
{
}
{
if(c=='[')
}
else
{
}
{
{
}
else if(c=='<')
{
}
else if(c>0)
}
{
else
}
while(1)
{
/* skip over characters in the current state */
switch(n)
{
case S_BREAK:
goto breakloop;
case S_EOF:
{
fcseek(-1);
continue;
}
/* check for zero byte in file */
if(n==0 && fcfile())
{
if(shp->readscript)
{
}
else
{
}
}
/* end-of-file */
{
{
case '$':
{
c = '\'';
break;
}
continue;
case RBRACT:
c = LBRACT;
break;
case 1: /* for ((...)) */
case RPAREN:
c = LPAREN;
break;
default:
c = LBRACE;
break;
case '"': case '`': case '\'':
break;
}
{
}
}
goto breakloop;
case S_COM:
/* skip one or more comment line(s) */
do
{
while(fcgetc(c)>0 && c!='\n');
{
break;
}
fcseek(1);
fcseek(1);
}
while(c=='#');
if(c<0)
n = S_NLTOK;
/* FALL THRU */
case S_NLTOK:
/* check for here-document */
{
{
}
}
/* FALL THRU */
case S_NL:
/* skip over new-lines */
if(n==S_NLTOK)
{
lp->comp_assign = 0;
}
case S_BLNK:
continue;
/* implicit RPAREN for =~ test operator */
{
c = RPAREN;
goto do_pop;
}
continue;
case S_OP:
/* return operator token */
if(c=='<' || c=='>')
{
else
{
}
}
else
{
if(c==RPAREN)
{
}
}
if(fcgetc(n)>0)
{
if(n==c)
{
if(c=='<')
else if(n==LPAREN)
{
return(c);
fcseek(1);
}
c |= SYMREP;
}
else if(c=='(' || c==')')
else if(c=='&')
{
{
{
}
c = '>';
}
else if(n=='|')
c |= SYMPIPE;
else
n = 0;
}
else if(n=='&')
c |= SYMAMP;
else if(c!='<' && c!='>')
n = 0;
else if(n==LPAREN)
{
c |= SYMLPAR;
}
else if(n=='|')
c |= SYMPIPE;
else if(c=='<' && n=='>')
{
c = IORDWRSYM;
fcgetc(n);
if(fcgetc(n)==';')
{
}
else if(n>0)
n= 0;
}
else if(n=='#' && (c=='<'||c=='>'))
c |= SYMSHARP;
else if(n==';' && c=='>')
{
c |= SYMSEMI;
{
}
}
else
n = 0;
if(n)
{
fcseek(1);
}
else
{
}
}
else
lp->comp_assign = 0;
case S_ESC:
/* check for \<new-line> */
fcgetc(n);
c=2;
#if SHOPT_CRNL
if(n=='\r')
{
if(fcgetc(n)=='\n')
c=3;
else
{
n='\r';
}
}
#endif /* SHOPT_CRNL */
if(n=='\n')
{
/* synchronize */
fcclose();
if(sp)
else
/* remove \new-line */
if(n<=ARGVAL)
{
mode = 0;
}
continue;
}
wordflags |= ARG_QUOTED;
goto err;
#ifndef STR_MAXIMAL
)
#endif /* STR_MAXIMAL */
break;
case S_NAME:
/* FALL THRU */
case S_TILDE:
{
{
goto tilde;
}
continue;
}
case S_RES:
if(c=='.')
if(n!=S_TILDE)
continue;
fcgetc(n);
if(n>0)
{
if(c=='~' && n==LPAREN)
{
}
{
continue;
}
}
if(n==LPAREN)
goto epat;
continue;
case S_REG:
{
/* skip new-line joining */
{
fcseek(1);
continue;
}
{
continue;
}
}
continue;
case S_LIT:
{
{
fcseek(2);
}
continue;
}
wordflags |= ARG_QUOTED;
{
goto err;
{
}
}
{
continue;
}
/* check for multi-line single-quoted string */
break;
case S_ESC2:
/* \ inside '' */
{
fcgetc(n);
if(n=='\n')
}
continue;
case S_GRAVE:
/* FALL THRU */
case S_QUOTE:
{
continue;
}
if(n==S_QUOTE)
{
{
}
ingrave ^= (c=='`');
continue;
}
{
}
else if(c=='"' && n==RBRACE)
break;
case S_DOL:
/* don't check syntax inside `` */
continue;
#if SHOPT_KIA
else
#endif /* SHOPT_KIA */
continue;
case S_PAR:
continue;
case S_RBRA:
goto err;
{
}
break;
case S_EDOL:
/* end $identifier */
#if SHOPT_KIA
#endif /* SHOPT_KIA */
break;
case S_DOT:
/* make sure next character is alpha */
if(fcgetc(n)>0)
{
if(n=='.')
fcgetc(n);
if(n>0)
}
continue;
{
if(n=='=')
continue;
break;
}
else if(n==RBRACE)
continue;
if(isastchar(n))
continue;
goto err;
case S_SPC1:
{
continue;
}
/* FALL THRU */
case S_ALP:
goto err;
case S_SPC2:
case S_DIG:
{
case '$':
if(n==S_ALP) /* $identifier */
mode = ST_DOLNAME;
else
{
}
break;
#if SHOPT_TYPEDEF
case '@':
#endif /* SHOPT_TYPEDEF */
case '!':
if(n!=S_ALP)
goto dolerr;
case '#':
if(c=='#')
n = S_ALP;
case RBRACE:
if(n==S_ALP)
{
if(c=='.')
}
else
{
if(fcgetc(c)>0)
goto err;
if(n==S_DIG)
else
}
break;
case '0':
if(n==S_DIG)
break;
default:
goto dolerr;
}
break;
case S_ERR:
goto err;
{
/* see whether inside `...` */
goto err;
}
else
continue;
case S_MOD1:
{
/* allow ' inside "${...}" */
if(c==':' && fcgetc(n)>0)
{
n = state[n];
}
if(n==S_MOD1)
{
continue;
}
}
/* FALL THRU */
case S_MOD2:
#if SHOPT_KIA
#endif /* SHOPT_KIA */
if(c!=':' && fcgetc(n)>0)
{
if(n!=c)
c = 0;
if(!c || (fcgetc(n)>0))
{
if(n==LPAREN)
{
if(c!='%')
{
}
}
}
}
continue;
case S_LBRA:
{
if(fcgetc(c)>0)
continue;
{
c = LBRACE;
goto do_comsub;
}
}
err:
if(iswalpha(c))
continue;
if(n!='$')
{
}
else
{
wordflags |= ARG_MESSAGE;
}
continue;
case S_META:
continue;
case S_PUSH:
fcgetc(n);
if(n==RPAREN)
continue;
else
continue;
case S_POP:
{
continue;
}
break;
{
goto breakloop;
}
continue;
{
{
if(c!=RPAREN)
continue;
}
if(n>0)
n = RPAREN;
}
if(c==RBRACE)
if(c==';' && n!=';')
{
continue;
}
{
continue;
}
if(epatchar!='~')
epatchar = '@';
/* quotes in subscript need expansion */
/* check for ((...)) */
if(n==1 && c==RPAREN)
{
{
goto breakloop;
}
/* backward compatibility */
{
else
{
fcseek(n);
}
}
}
return(0);
if(c!=n)
{
}
goto epat;
continue;
case S_EQ:
/* FALL THRU */
case S_COLON:
if(assignment)
{
if(fcgetc(c)=='~')
assignment = 0;
if(c!=EOF)
}
break;
case S_LABEL:
{
c = fcget();
{
assignment = -1;
goto breakloop;
}
}
break;
case S_BRACT:
/* check for possible subscript */
{
fcgetc(n);
if(n>0 && n==']')
{
continue;
}
else
wordflags |= ARG_QUOTED;
continue;
}
break;
case S_BRACE:
{
int isfirst;
{
{
fcgetc(n);
if(n>0)
else
n = '\n';
}
break;
}
{
continue;
}
{
goto do_reg;
}
fcgetc(n);
/* check for {} */
break;
if(n>0)
break;
/* check for reserved word { or } */
break;
{
}
goto epat;
break;
}
case S_PAT:
/* FALL THRU */
case S_EPAT:
epat:
{
epatchar = c;
{
}
continue;
}
if(n>0)
continue;
break;
}
lp->comp_assign = 0;
return(0);
}
{
return(0);
}
{
}
if(n>0)
/* add balancing character if necessary */
{
}
if(n==1)
{
/* check for numbered redirection */
n = state[0];
{
return(c);
}
if(n==LBRACT)
c = 0;
else if(n=='~')
c = ARG_MAC;
else
n = 1;
}
else if(n>2 && state[0]=='{' && state[n-1]=='}' && !lp->lex.intest && !lp->lex.incase && (c=='<' || c== '>') && sh_isoption(SH_BRACEEXPAND))
{
{
}
c = wordflags;
}
else
c = wordflags;
if(assignment<0)
{
}
c &= ~ARG_EXP;
if((c&ARG_EXP) && (c&ARG_QUOTED))
c |= ARG_MAC;
{
/* eliminate trailing )) */
}
if(c&ARG_MESSAGE)
{
if(sh_isoption(SH_DICTIONARY))
c |= ARG_MAC;
}
{
}
else
if(assignment)
{
{
{
}
{
}
else
{
}
}
switch(c)
{
case TEST_END:
case TEST_SEQ:
/* FALL THRU */
default:
{
if(c&TEST_PATTERN)
else if(c==TEST_REP)
}
case 0:
}
}
{
/* check for {, }, ! */
c = state[0];
if(n==1 && (c=='{' || c=='}' || c=='!'))
{
}
{
}
}
c = 0;
{
{
{
else if(c==ESACSYM)
else
c = 0;
}
{
}
else
if(c==INSYM)
else if(c==TIMESYM)
{
/* yech - POSIX requires time -p */
if(n>0)
if(n=='-')
c=0;
}
}
{
/* check for aliases */
#if KSHELL
#endif /* KSHELL */
{
}
}
}
}
/*
* read to end of command substitution
*/
{
else
if(off<0)
if(off<0)
*cp = c;
{
{
{
fcseek(n);
}
count++;
fcgetc(c);
}
while(1)
{
/* look for case and esac */
n=0;
while(1)
{
fcgetc(c);
/* skip leading white space */
if(n==0 && !sh_lexstates[ST_BEGIN][c])
continue;
if(n==4)
break;
if(sh_lexstates[ST_NAME][c])
goto skip;
word[n++] = c;
}
{
}
skip:
if(c && (c!='#' || n==0))
switch(c)
{
case LBRACE:
{
count++;
}
break;
case RBRACE:
goto done;
if(count==1)
break;
case LPAREN:
count++;
break;
case RPAREN:
goto done;
break;
case EOFSYM:
case IOSEEKSYM:
if(fcgetc(c)!='#' && c>0)
break;
case IODOCSYM:
break;
case 0:
break;
case ';':
do
fcgetc(c);
while(!sh_lexstates[ST_BEGIN][c]);
goto rbrace;
if(c>0)
/* fall through*/
default:
}
}
}
done:
return(messages);
}
/*
* here-doc nested in $(...)
* allocate ionode with delimiter filled in without disturbing stak
*/
{
register int n=0,offset;
char *base;
{
}
if(offset)
else
}
/*
* skip to <close> character
* if <copy> is non,zero, then the characters are copied to the stack
* <state> is the initial lexical state
*/
{
register char *cp;
if(copy)
else
if(copy)
{
}
else
}
#if SHOPT_CRNL
{
int m=0,k;
if(*++next=='\n')
{
{
return(m>0?m:-1);
m += k;
}
}
return(m>0?m:-1);
return(m+k);
}
#endif /* SHOPT_CRNL */
/*
* read in here-document from script
* quoted here documents, and here-documents without special chars are
* noted with the IOQUOTE flag
* returns 1 for complete here-doc, 0 for EOF
*/
{
register const char *state;
register int c,n;
{
if(fcfill()>0)
}
/* check for and strip quoted characters in delimiter string */
{
/* skip over leading tabs in document */
{
{
if(c==' ')
stripcol++;
else
}
}
else
while(fcgetc(c)=='\t');
if(c>0)
}
else
n = S_NL;
while(1)
{
if(n!=S_NL)
{
/* skip over regular characters */
#if SHOPT_MULTIBYTE
do
{
{
n = S_EOF;
break;
}
}
#endif /* SHOPT_MULTIBYTE */
}
{
if(LEN < 0)
else
{
if(n==S_ESC)
c--;
}
#if SHOPT_MULTIBYTE
if(LEN==0)
LEN=1;
if(LEN < 0)
{
n = LEN;
LEN += n;
}
else
#endif /* SHOPT_MULTIBYTE */
if(c<0)
break;
if(n==S_ESC)
{
#if SHOPT_CRNL
#endif /* SHOPT_CRNL */
if(c==NL)
fcseek(1);
{
}
}
}
else
switch(n)
{
case S_NL:
{
{
/* write out line */
}
/* skip over tabs */
if(stripcol)
{
int col=0;
do
{
fcgetc(c);
if(c==' ')
col++;
else
break;
}
while (c==' ' || c=='\t');
}
else while(c=='\t')
fcgetc(c);
if(c<=0)
goto done;
}
break;
nsave = n = 0;
while(1)
{
if(!(c=fcget()))
{
{
}
nsave = n;
{
goto done;
}
}
#if SHOPT_CRNL
{
if(c)
c='\r';
}
#endif /* SHOPT_CRNL */
if(c==NL)
{
{
}
if(c==RPAREN)
goto done;
}
{
/*
* The match for delimiter failed.
* nsave>0 only when a buffer boundary
* was crossed while checking the
* delimiter
*/
{
}
if(c==NL)
break;
}
}
break;
case S_ESC:
n=1;
#if SHOPT_CRNL
if(c=='\r')
{
fcseek(1);
if(c=fcget())
if(c==NL)
n=2;
else
{
special++;
break;
}
}
#endif /* SHOPT_CRNL */
if(c==NL)
{
/* new-line joining */
{
}
}
else
special++;
fcget();
break;
case S_GRAVE:
case S_DOL:
special++;
break;
}
n=0;
}
done:
else if(!special)
return(c);
}
/*
* generates string for given token
*/
{
int n=1;
if(sym < 0)
return((char*)sh_translate(e_lexzerobyte));
if(sym==0)
{
tp++;
}
return((char*)sh_translate(e_endoffile));
return((char*)sh_translate(e_newline));
else
{
{
case SYMAMP:
sym = '&';
break;
case SYMPIPE:
sym = '|';
break;
case SYMGT:
sym = '>';
break;
case SYMLPAR:
break;
case SYMSHARP:
sym = '#';
break;
case SYMSEMI:
if(tok[0]=='<')
tok[n++] = '>';
sym = ';';
break;
default:
sym = 0;
}
}
tok[n] = 0;
return(tok);
}
/*
* print a bad syntax message
*/
{
register char *tokstr;
{
}
else
{
/* clear out any pending input */
while(fcget()>0);
fcclose();
}
else
fcclose();
#if KSHELL
#else
#endif
else
}
{
register char *ep;
while(left--)
return(sp);
}
/*
* Assumes that current word is unfrozen on top of the stak
* If <mode> is zero, gets rid of quoting and consider argument as string
* and returns pointer to frozen arg
* If mode==1, just replace $"..." strings with international strings
* The result is left on the stak
* If mode==2, the each $"" string is printed on standard output
*/
{
register int n;
int bracket=0;
#if SHOPT_MULTIBYTE
if(mbwide())
{
do
{
int len;
{
case -1: /* illegal multi-byte char */
case 0:
case 1:
break;
default:
/*
* None of the state tables contain
* entries for multibyte characters,
* however, they should be treated
* the same as any other alph
* character. Therefore, we'll use
* the state of the 'a' character.
*/
n=state['a'];
}
}
while(n == 0);
}
else
#endif /* SHOPT_MULTIBYTE */
if(mode<0)
inquote = 1;
while(1)
{
switch(n)
{
case S_EOF:
if(mode<=0)
{
}
return(argp);
case S_LIT:
if(!(inquote&1))
{
{
dp--;
if(ep)
{
*dp = 0;
}
ep = 0;
}
}
break;
case S_QUOTE:
break;
if(!inlit)
{
if(mode<=0)
dp--;
if(ep)
{
char *msg;
if(mode==2)
{
ep = 0;
break;
}
*--dp = 0;
#if ERROR_VERSION >= 20000317L
#else
# if ERROR_VERSION >= 20000101L
# else
# endif
#endif
{
}
*dp++ = '"';
}
ep = 0;
}
break;
case S_DOL: /* check for $'...' and $"..." */
if(inlit)
break;
{
inquote <<= 1;
break;
}
if(inquote&1)
break;
{
if(*sp=='"')
inquote |= 1;
else
inlit = 1;
sp++;
{
if(mode==2)
else if(mode==1)
else
}
}
break;
case S_ESC:
#if SHOPT_CRNL
sp++;
#endif /* SHOPT_CRNL */
{
if(mode<0)
{
{
}
*dp++ = '\\';
}
if(ep)
break;
}
n = *sp;
#if SHOPT_DOS
break;
#endif /* SHOPT_DOS */
{
if(n=='\n')
dp--;
else
dp[-1] = n;
sp++;
}
break;
case S_POP:
break;
{
inquote >>= 1;
if(xp)
xp = 0;
inquote = 1;
}
{
{
}
*dp++ = ']';
}
break;
case S_BRACT:
if(mode<0)
{
{
{
}
*dp++ = '[';
}
else if(bracket++==0)
inquote = 0;
}
break;
}
#if SHOPT_MULTIBYTE
if(mbwide())
{
do
{
int len;
{
case -1: /* illegal multi-byte char */
case 0:
case 1:
break;
default:
/*
* None of the state tables contain
* entries for multibyte characters,
* however, they should be treated
* the same as any other alph
* character. Therefore, we'll use
* the state of the 'a' character.
*/
while(len--)
n=state['a'];
}
}
while(n == 0);
}
else
#endif /* SHOPT_MULTIBYTE */
}
}
struct alias
{
int nextc;
int line;
};
/*
* This code gets called whenever an end of string is found with alias
*/
#ifndef SF_ATEXIT
# define SF_ATEXIT 0
#endif
/*
* This code gets called whenever an end of string is found with alias
*/
#ifdef SF_BUFCONST
#else
#endif
{
return(0);
{
if(type==SF_CLOSING)
{
}
goto done;
}
{
/* if last character is a blank, then next work can be alias */
register int c = fcpeek(-1);
if(isblank(c))
return(1);
}
done:
if(np)
return(0);
}
{
{
#if SHOPT_KIA
{
unsigned long r;
sfprintf(lp->kiatmp,"p;%..64d;p;%..64d;%d;%d;e;\n",lp->current,r,lp->sh->inlineno,lp->sh->inlineno);
}
#endif /* SHOPT_KIA */
}
else
fcclose();
}
/*
* grow storage stack for nested constructs by STACK_ARRAY
*/
{
else
}