compile.c revision 84441f85b19f6b8080883f30109e58e43c893709
/*
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <libintl.h>
#include <note.h>
#include "defs.h"
#include "extern.h"
#define LHSZ 128
static struct labhash {
int lh_ref;
static char *compile_addr(char *, struct s_addr *);
static char *compile_ccl(char **, char *);
static char *compile_delimited(char *, char *, int);
static char *compile_flags(char *, struct s_subst *);
static regex_t *compile_re(char *, int);
static char *compile_subst(char *, struct s_subst *);
static char *compile_text(void);
static char *compile_tr(char *, struct s_tr **);
static struct s_command
**compile_stream(struct s_command **);
static char *duptoeol(char *, const char *);
static void enterlabel(struct s_command *);
static struct s_command
*findlabel(char *);
static void uselabel(void);
/*
* Command specification. This is used to drive the command parser.
*/
struct s_format {
char code; /* Command code */
int naddr; /* Number of address args */
};
{'}', 0, ENDGROUP},
{':', 0, LABEL},
{'#', 0, COMMENT},
{'\0', 0, COMMENT},
};
/* The compiled program. */
/*
* Compile the program into prog.
* Initialise appends.
*/
void
compile(void)
{
uselabel();
if (appendnum == 0)
NULL)
}
#define EATSPACE() do { \
if (p) \
while (*p && isspace((unsigned char)*p)) \
p++; \
} while (0)
static struct s_command **
{
char *p;
int naddr; /* Number of addresses */
stack = 0;
for (;;) {
if (stack != 0)
fatal(_("unexpected EOF (pending }'s)"));
return (link);
}
if (p) {
if (*p == '#' || *p == '\0')
continue;
else if (*p == ';') {
p++;
goto semicolon;
}
}
/* First parse the addresses */
naddr = 0;
/* Valid characters to start an address */
if (addrchar(*p)) {
naddr++;
EATSPACE(); /* EXTENSION */
if (*p == ',') {
p++;
EATSPACE(); /* EXTENSION */
naddr++;
== NULL)
EATSPACE();
} else
} else
nonsel: /* Now parse the command */
if (!*p)
fatal(_("command expected"));
break;
fatal(_("invalid command code %c"), *p);
fatal(_("command %c expects up to %d address(es), "
case NONSEL: /* ! */
p++;
EATSPACE();
goto nonsel;
case GROUP: /* { */
p++;
EATSPACE();
if (*p)
goto semicolon;
break;
case ENDGROUP:
/*
* Short-circuit command processing, since end of
* group is really just a noop.
*/
if (stack == 0)
fatal(_("unexpected }"));
/*FALLTHROUGH*/
case EMPTY: /* d D g G h H l n N p P q x = \0 */
p++;
EATSPACE();
if (*p == ';') {
p++;
goto semicolon;
}
if (*p)
fatal(_("extra characters at the end of %c "
break;
case TEXT: /* a c i */
p++;
EATSPACE();
if (*p != '\\')
fatal(_("command %c expects \\ "
p++;
EATSPACE();
if (*p)
fatal(_("extra characters after \\ "
"at the end of %c command"),
cmd->t = compile_text();
break;
case COMMENT: /* \0 # */
break;
case WFILE: /* w */
p++;
EATSPACE();
if (*p == '\0')
fatal(_("filename expected"));
if (aflag)
break;
case RFILE: /* r */
p++;
EATSPACE();
if (*p == '\0')
fatal(_("filename expected"));
else
break;
case BRANCH: /* b t */
p++;
EATSPACE();
if (*p == '\0')
else
break;
case LABEL: /* : */
p++;
EATSPACE();
if (strlen(p) == 0)
fatal(_("empty label"));
break;
case SUBST: /* s */
p++;
if (*p == '\0' || *p == '\\')
fatal(_("substitute pattern can not "
"be delimited by newline or backslash"));
NULL)
p = compile_delimited(p, re, 0);
if (p == NULL)
fatal(_("unterminated substitute pattern"));
/* Compile RE with no case sensitivity temporarily */
if (*re == '\0')
else
--p;
p = compile_subst(p, cmd->u.s);
p = compile_flags(p, cmd->u.s);
/* Recompile RE with case sens. from "I" flag if any */
if (*re == '\0')
else
EATSPACE();
if (*p == ';') {
p++;
goto semicolon;
}
break;
case TR: /* y */
p++;
p = compile_tr(p, &cmd->u.y);
EATSPACE();
if (*p == ';') {
p++;
goto semicolon;
}
if (*p)
fatal(_("extra text at the end of a "
"transform command"));
break;
}
}
}
/*
* Get a delimited string. P points to the delimeter of the string; d points
* to a buffer area. Newline and delimiter escapes are processed; other
* escapes are ignored.
*
* Returns a pointer to the first character after the final delimiter or NULL
* in the case of a non-terminated string. The character array d is filled
* with the processed string.
*/
static char *
compile_delimited(char *p, char *d, int is_tr)
{
char c;
c = *p++;
if (c == '\0')
return (NULL);
else if (c == '\\')
fatal(_("\\ can not be used as a string delimiter"));
else if (c == '\n')
fatal(_("newline can not be used as a string delimiter"));
while (*p) {
if (*p == '[' && *p != c) {
if ((d = compile_ccl(&p, d)) == NULL)
fatal(_("unbalanced brackets ([])"));
continue;
} else if (*p == '\\' && p[1] == '[') {
*d++ = *p++;
} else if (*p == '\\' && p[1] == c)
p++;
else if (*p == '\\' && p[1] == 'n') {
*d++ = '\n';
p += 2;
continue;
} else if (*p == '\\' && p[1] == '\\') {
if (is_tr)
p++;
else
*d++ = *p++;
} else if (*p == c) {
*d = '\0';
return (p + 1);
}
*d++ = *p++;
}
return (NULL);
}
/* compile_ccl: expand a POSIX character class */
static char *
compile_ccl(char **sp, char *t)
{
int c, d;
char *s = *sp;
*t++ = *s++;
if (*s == '^')
*t++ = *s++;
if (*s == ']')
*t++ = *s++;
for (; *s && (*t = *s) != ']'; s++, t++)
if (*s == '[' &&
((d = *(s+1)) == '.' || d == ':' || d == '=')) {
*++t = *++s, t++, s++;
for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
if ((c = *s) == '\0')
return (NULL);
}
}
/*
* Compiles the regular expression in RE and returns a pointer to the compiled
* regular expression.
* Cflags are passed to regcomp.
*/
static regex_t *
{
if (case_insensitive)
return (rep);
}
/*
* Compile the substitution string of a regular expression and set res to
* point to a saved copy of it. Nsub is the number of parenthesized regular
* expressions.
*/
static char *
compile_subst(char *p, struct s_subst *s)
{
int asize;
c = *p++; /* Terminator character */
if (c == '\0')
return (NULL);
s->maxbref = 0;
size = 0;
do {
for (; *p; p++) {
if (*p == '\\' || sawesc) {
/*
* If this is a continuation from the last
* buffer, we won't have a character to
* skip over.
*/
if (sawesc)
sawesc = 0;
else
p++;
if (*p == '\0') {
/*
* This escaped character is continued
* in the next part of the line. Note
* this fact, then cause the loop to
* exit w/ normal EOL case and reenter
* above with the new buffer.
*/
sawesc = 1;
p--;
continue;
*sp++ = '\\';
ref = *p - '0';
fatal(_("not defined in "
"the RE: \\%c"), *p);
} else if (*p == '&' || *p == '\\')
*sp++ = '\\';
} else if (*p == c) {
if (*++p == '\0' && more) {
&more))
p = lbuf;
}
*sp++ = '\0';
return (p);
} else if (*p == '\n') {
fatal(_("unescaped newline inside "
"substitute pattern"));
/* NOTREACHED */
}
*sp++ = *p;
}
asize *= 2;
}
fatal(_("unterminated substitute in regular expression"));
return (NULL);
}
/*
* Compile the flags of the s command
*/
static char *
compile_flags(char *p, struct s_subst *s)
{
int gn; /* True if we have seen g or n */
unsigned long nval;
s->n = 1; /* Default */
s->p = 0;
s->wfd = -1;
s->icase = 0;
gn = 0;
for (;;) {
EATSPACE(); /* EXTENSION */
switch (*p) {
case 'g':
if (gn)
fatal(_("more than one number or "
"'g' in substitute flags"));
gn = 1;
s->n = 0;
break;
case '\0':
case '\n':
case ';':
return (p);
case 'p':
s->p = 1;
break;
case 'I':
s->icase = 1;
break;
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
if (gn)
fatal(_("more than one number or "
"'g' in substitute flags"));
gn = 1;
errno = 0;
fatal(_("overflow in the 'N' substitute flag"));
s->n = nval;
p--;
break;
case 'w':
p++;
#ifdef HISTORIC_PRACTICE
if (*p != ' ') {
fatal(_("space missing before w wfile"));
return (p);
}
#endif
EATSPACE();
q = wfile;
while (*p) {
if (*p == '\n')
break;
*q++ = *p++;
}
*q = '\0';
if (q == wfile)
fatal(_("no wfile specified"));
return (p);
default:
fatal(_("bad flag in substitute command: '%c'"), *p);
break;
}
p++;
}
}
/*
* Compile a translation set of strings into a lookup table.
*/
static char *
{
struct s_tr *y;
int i;
y->nmultis = 0;
if (*p == '\0' || *p == '\\')
fatal(_("transform pattern can not be delimited by "
"newline or backslash"));
if (p == NULL)
fatal(_("unterminated transform source string"));
if (p == NULL)
fatal(_("unterminated transform target string"));
EATSPACE();
fatal(_("transform strings are not the same length"));
if (MB_CUR_MAX == 1) {
/*
* The single-byte encoding case is easy: generate a
* lookup table.
*/
for (i = 0; i <= UCHAR_MAX; i++)
y->bytetab[i] = (char)i;
} else {
/*
* Multi-byte encoding case: generate a lookup table as
* above, but only for single-byte characters. The first
* bytes of multi-byte characters have their lookup table
* entries set to 0, which causes do_tr() to search through
* an auxiliary vector of multi-byte mappings.
*/
for (i = 0; i <= UCHAR_MAX; i++)
while (*op != '\0') {
else {
i = y->nmultis++;
}
}
}
return (p);
}
/*
* Compile the text following an a or i command.
*/
static char *
compile_text(void)
{
int esc_nl;
size = 0;
p = lbuf;
EATSPACE();
for (esc_nl = 0; *p != '\0'; p++) {
if (*p == '\\' && p[1] != '\0' && *++p == '\n')
esc_nl = 1;
*s++ = *p;
}
if (!esc_nl) {
*s = '\0';
break;
}
asize *= 2;
}
}
return (p);
}
/*
* Get an address and return a pointer to the first character after
* it. Fill the structure pointed to according to the address.
*/
static char *
compile_addr(char *p, struct s_addr *a)
{
int icase;
icase = 0;
a->type = 0;
switch (*p) {
case '\\': /* Context address */
++p;
/* FALLTHROUGH */
case '/': /* Context address */
p = compile_delimited(p, re, 0);
if (p == NULL)
fatal(_("unterminated regular expression"));
/* Check for case insensitive regexp flag */
if (*p == 'I') {
icase = 1;
p++;
}
if (*re == '\0')
a->u.r = NULL;
else
return (p);
case '$': /* Last line */
return (p + 1);
case '+': /* Relative line number */
a->type = AT_RELLINE;
p++;
/* FALLTHROUGH */
/* Line number */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (a->type == 0)
return (end);
default:
fatal(_("expected context address"));
return (NULL);
}
}
/*
* duptoeol --
* Return a copy of all the characters up to \n or \0.
*/
static char *
{
int ws;
char *p, *start;
ws = 0;
*s = '\0';
if (ws)
}
/*
* Convert goto label names to addresses, and count a and r commands, in
* the given subset of the script. Free the memory used by labels in b
* and t commands (but not by :).
*
* TODO: Remove } nodes
*/
static void
{
case 'a':
case 'r':
appendnum++;
break;
case 'b':
case 't':
/* Resolve branch target. */
break;
}
break;
case '{':
/* Do interior commands. */
break;
}
}
/*
* Associate the given command label for later lookup.
*/
static void
{
uchar_t *p;
uint_t h, c;
h = (h << 5) + h + c;
}
/*
* Find the label contained in the command l in the command linked
* list cp. L is excluded from the search. Return NULL if not found.
*/
static struct s_command *
{
uchar_t *p;
uint_t h, c;
h = (h << 5) + h + c;
}
}
return (NULL);
}
/*
* Warn about any unused labels. As a side effect, release the label hash
* table space.
*/
static void
uselabel(void)
{
int i;
for (i = 0; i < LHSZ; i++) {
warnx(_("%lu: %s: unused label '%s'"),
}
}
}