cmd/awk/awk.lx.l

	awk.lx.l revision 7c478bd95313f5f23a4c958a745db2134aa03244
%{
/*  Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
%}
%{
/*    All Rights Reserved   */
%}

%{
/*                              */
%}

%{
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/* Copyright (c) 1996, 2001 by Sun Microsystems, Inc.           */
/* All rights reserved.                                         */
%}
%{
/*                              */
%}

%{
#ident  "%Z%%M% %I% %E% SMI"    /* SVr4.0 2.9   */
%}

%Start A str sc reg comment

%{

#include    <sys/types.h>
#include    "awk.h"
#include    "y.tab.h"

#undef  input   /* defeat lex */
#undef  unput

extern YYSTYPE  yylval;
extern int  infunc;

off_t   lineno  = 1;
int bracecnt = 0;
int brackcnt  = 0;
int parencnt = 0;
#define DEBUG
#ifdef  DEBUG
#   define  RET(x)  {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
#else
#   define  RET(x)  return(x)
#endif

#define CADD    cbuf[clen++] = yytext[0]; \
        if (clen >= RECSIZE-1) { \
            ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
            BEGIN A; \
        }

uchar   cbuf[RECSIZE];
uchar   *s;
int clen, cflag;
%}

A   [a-zA-Z_]
B   [a-zA-Z0-9_]
D   [0-9]
O   [0-7]
H   [0-9a-fA-F]
WS  [ \t]

%%
    switch (yybgin-yysvec-1) {  /* witchcraft */
    case 0:
        BEGIN A;
        break;
    case sc:
        BEGIN A;
        RET('}');
    }

<A>\n       { lineno++; RET(NL); }
<A>#.*      { ; }   /* strip comments */
<A>{WS}+    { ; }
<A>;        { RET(';'); }

<A>"\\"\n   { lineno++; }
<A>BEGIN    { RET(XBEGIN); }
<A>END      { RET(XEND); }
<A>func(tion)?  { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
<A>return   { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
<A>"&&"     { RET(AND); }
<A>"||"     { RET(BOR); }
<A>"!"      { RET(NOT); }
<A>"!="     { yylval.i = NE; RET(NE); }
<A>"~"      { yylval.i = MATCH; RET(MATCHOP); }
<A>"!~"     { yylval.i = NOTMATCH; RET(MATCHOP); }
<A>"<"      { yylval.i = LT; RET(LT); }
<A>"<="     { yylval.i = LE; RET(LE); }
<A>"=="     { yylval.i = EQ; RET(EQ); }
<A>">="     { yylval.i = GE; RET(GE); }
<A>">"      { yylval.i = GT; RET(GT); }
<A>">>"     { yylval.i = APPEND; RET(APPEND); }
<A>"++"     { yylval.i = INCR; RET(INCR); }
<A>"--"     { yylval.i = DECR; RET(DECR); }
<A>"+="     { yylval.i = ADDEQ; RET(ASGNOP); }
<A>"-="     { yylval.i = SUBEQ; RET(ASGNOP); }
<A>"*="     { yylval.i = MULTEQ; RET(ASGNOP); }
<A>"/="     { yylval.i = DIVEQ; RET(ASGNOP); }
<A>"%="     { yylval.i = MODEQ; RET(ASGNOP); }
<A>"^="     { yylval.i = POWEQ; RET(ASGNOP); }
<A>"**="    { yylval.i = POWEQ; RET(ASGNOP); }
<A>"="      { yylval.i = ASSIGN; RET(ASGNOP); }
<A>"**"     { RET(POWER); }
<A>"^"      { RET(POWER); }

<A>"$"{D}+  { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
<A>"$NF"    { unputstr("(NF)"); return(INDIRECT); }
<A>"$"{A}{B}*   { int c, n;
          c = input(); unput(c);
          if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
            unputstr(yytext+1);
            return(INDIRECT);
          } else {
            yylval.cp = setsymtab(yytext+1,"",0.0,STR|NUM,symtab);
            RET(IVAR);
          }
        }
<A>"$"      { RET(INDIRECT); }
<A>NF       { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }

<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
          yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
          RET(NUMBER); }

<A>while    { RET(WHILE); }
<A>for      { RET(FOR); }
<A>do       { RET(DO); }
<A>if       { RET(IF); }
<A>else     { RET(ELSE); }
<A>next     { RET(NEXT); }
<A>exit     { RET(EXIT); }
<A>break    { RET(BREAK); }
<A>continue { RET(CONTINUE); }
<A>print    { yylval.i = PRINT; RET(PRINT); }
<A>printf   { yylval.i = PRINTF; RET(PRINTF); }
<A>sprintf  { yylval.i = SPRINTF; RET(SPRINTF); }
<A>split    { yylval.i = SPLIT; RET(SPLIT); }
<A>substr   { RET(SUBSTR); }
<A>sub      { yylval.i = SUB; RET(SUB); }
<A>gsub     { yylval.i = GSUB; RET(GSUB); }
<A>index    { RET(INDEX); }
<A>match    { RET(MATCHFCN); }
<A>in       { RET(IN); }
<A>getline  { RET(GETLINE); }
<A>close    { RET(CLOSE); }
<A>delete   { RET(DELETE); }
<A>length   { yylval.i = FLENGTH; RET(BLTIN); }
<A>log      { yylval.i = FLOG; RET(BLTIN); }
<A>int      { yylval.i = FINT; RET(BLTIN); }
<A>exp      { yylval.i = FEXP; RET(BLTIN); }
<A>sqrt     { yylval.i = FSQRT; RET(BLTIN); }
<A>sin      { yylval.i = FSIN; RET(BLTIN); }
<A>cos      { yylval.i = FCOS; RET(BLTIN); }
<A>atan2    { yylval.i = FATAN; RET(BLTIN); }
<A>system   { yylval.i = FSYSTEM; RET(BLTIN); }
<A>rand     { yylval.i = FRAND; RET(BLTIN); }
<A>srand    { yylval.i = FSRAND; RET(BLTIN); }
<A>toupper  { yylval.i = FTOUPPER; RET(BLTIN); }
<A>tolower  { yylval.i = FTOLOWER; RET(BLTIN); }

<A>{A}{B}*  { int n, c;
          c = input(); unput(c);    /* look for '(' */
          if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
            yylval.i = n;
            RET(ARG);
          } else {
            yylval.cp = setsymtab(yytext,"",0.0,STR|NUM,symtab);
            if (c == '(') {
                RET(CALL);
            } else {
                RET(VAR);
            }
          }
        }
<A>\"       { BEGIN str; clen = 0; }

<A>"}"      { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
<A>"]"      { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
<A>")"      { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }

<A>.        { if (yytext[0] == '{') bracecnt++;
          else if (yytext[0] == '[') brackcnt++;
          else if (yytext[0] == '(') parencnt++;
          RET(yylval.i = yytext[0]); /* everything else */ }

<reg>\\.    { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
<reg>\n     { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
<reg>"/"    { BEGIN A;
          cbuf[clen] = 0;
          yylval.s = tostring(cbuf);
          unput('/');
          RET(REGEXPR); }
<reg>.      { CADD; }

<str>\"     { BEGIN A;
          cbuf[clen] = 0; s = tostring(cbuf);
          cbuf[clen] = ' '; cbuf[++clen] = 0;
          yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
          RET(STRING); }
<str>\n     { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
<str>"\\\"" { cbuf[clen++] = '"'; }
<str>"\\"n  { cbuf[clen++] = '\n'; }
<str>"\\"t  { cbuf[clen++] = '\t'; }
<str>"\\"f  { cbuf[clen++] = '\f'; }
<str>"\\"r  { cbuf[clen++] = '\r'; }
<str>"\\"b  { cbuf[clen++] = '\b'; }
<str>"\\"v  { cbuf[clen++] = '\v'; }    /* these ANSIisms may not be known by */
<str>"\\"a  { cbuf[clen++] = '\007'; }  /* your compiler. hence 007 for bell */
<str>"\\\\" { cbuf[clen++] = '\\'; }
<str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
          sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
<str>"\\"x({H}+) { int n;   /* ANSI permits any number! */
          sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
<str>"\\".  { cbuf[clen++] = yytext[1]; }
<str>.      { CADD; }

%%

startreg()
{
    BEGIN reg;
    clen = 0;
}

/* input() and unput() are transcriptions of the standard lex
   macros for input and output with additions for error message
   printing.  God help us all if someone changes how lex works.
*/

uchar   ebuf[300];
uchar   *ep = ebuf;

input()
{
    register int c;
    extern uchar *lexprog;

    if (yysptr > yysbuf)
        c = U(*--yysptr);
    else if (lexprog != NULL)   /* awk '...' */
        c = *lexprog++;
    else                /* awk -f ... */
        c = pgetc();
    if (c == '\n')
        yylineno++;
    else if (c == EOF)
        c = 0;
    if (ep >= ebuf + sizeof ebuf)
        ep = ebuf;
    return *ep++ = c;
}

unput(c)
{
    yytchar = c;
    if (yytchar == '\n')
        yylineno--;
    *yysptr++ = yytchar;
    if (--ep < ebuf)
        ep = ebuf + sizeof(ebuf) - 1;
}


unputstr(s)
    char *s;
{
    int i;

    for (i = strlen(s)-1; i >= 0; i--)
        unput(s[i]);
}