sed1.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1995-2012 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
#include "sed.h"
#include <ctype.h>
unsigned char *succi(unsigned char*);
#if DEBUG
#endif
0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, /* <nl> */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 2, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* !# */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 1, 3, 3, /* := */
3, 3, 3, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, /* DGHN */
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* P */
3, 1, 2, 2, 2, 3, 3, 2, 2, 1, 3, 3, 2, 3, 2, 3, /* a-n */
2, 1, 2, 2, 2, 3, 3, 2, 2, 2, 3, 2, 3, 0, 3, 3, /* p-y{} */
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
unsigned char *synl; /* current line pointer for syntax errors */
/* COMMAND LAYOUT */
int
{
if(*t->w==' ' || *t->w=='\t' || *t->w=='\r') {
t->w++;
return 1;
} else
return 0;
}
word *
instr(unsigned char *p) /* get address of command word */
{
q++;
return q;
}
unsigned char *
succi(unsigned char *p)
{
if(code(*q) == '{')
return (unsigned char*)(q+1);
else
return p + (*q & LMASK);
}
{
if(length != l)
syntax("<command-list> or <text> too long");
}
void
{
*(word*)s->w = n;
s->w += sizeof(word);
}
int
{
unsigned n = 0;
while(isdigit(*t->w)) {
syntax("number too big");
n = n*10 + *t->w++ - '0';
}
return n;
}
int
{
word n;
if(reflags & REG_LENIENT)
while(*t->w == ' ' || *t->w == '\t' || *t->w == '\r')
t->w++;
switch(*t->w) {
default:
return 0;
case '$':
t->w++;
n = DOLLAR;
break;
case '\\':
t->w++;
case '/':
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
n = number(t);
if(n == 0)
syntax("address is zero");
}
if(reflags & REG_LENIENT)
while(*t->w == ' ' || *t->w == '\t' || *t->w == '\r')
t->w++;
return 1;
}
regex_t *
{
}
/* LABEL HANDLING */
/* the labels array consists of int values followed by strings.
value -1 means unassigned; other values are relative to the
beginning of the script
on the first pass, every script ref to a label becomes the
integer offset of that label in the labels array, or -1 if
it is a branch to the end of script
on the second pass (fixlabels), the script ref is replaced
by the value from the labels array. */
word *
{
unsigned char *p, *q;
word n, m;
q = p + sizeof(word);
if(ustrcmp(q, l) == 0)
return (word*)p;
q += ustrlen(q) + 1;
p = (unsigned char*)wordp(q);
}
n = ustrlen(l);
m = (p - labels->s);
p = labels->s + m;
*(word*)p = -1;
q = p + sizeof(word);
ustrcpy(q, l);
q += ustrlen(q) + 1;
return (word*)p;
}
/* find pos in label list; assign value i to label if i>=0 */
{
word *p;
unsigned char *u;
while(blank(t)); /* not exactly posix */
for(u=t->w; *t->w!='\n'; t->w++)
synwarn("invisible character in name");
if(u == t->w)
return -1;
*t->w = 0;
if(*p == -1)
*p = i;
else if(i != -1)
syntax("duplicate label");
*t->w = '\n';
return (unsigned char*)p - labels.s;
}
void
{
syntax("missing label");
}
void
{
word g;
}
void
{
unsigned char *p;
word *q;
q = instr(p);
switch(code(*q)) {
case 't':
case 'b':
if(q[1] == -1)
else
}
}
}
/* FILES */
void
{
unsigned char *u;
if(!blank(t))
synwarn("no space before file name");
while(blank(t)) ;
for(u=t->w; *t->w!='\n'; t->w++) ;
if(u == t->w)
syntax("missing file name");
*t->w = 0;
*t->w = '\n';
}
void
{
word *p;
if(*p != -1)
return;
if(*(Sfio_t**)p == 0)
syntax("can't open file for writing");
}
/* BRACKETS */
/* Lc() stacks (in brack) the location of the { command word.
Rc() stuffs into that word the offset of the } sequel
relative to the command word.
fixbrack() modifies the offset to be relative to the
beginning of the instruction, including addresses. */
void /* { */
{
while(blank(t));
}
void /* } */
{
word l;
word *p;
t = t;
syntax("unmatched }");
syntax("{command-list} too long)");
*p = (*p&~LMASK) | l;
}
void
{
unsigned char *p;
word *q;
if(brack.w == 0)
return;
syntax("unmatched {");
q = instr(p);
if(code(*q) == '{')
*q += (unsigned char*)q - p;
}
}
/* EASY COMMANDS */
void
{
if(t->s[1]=='n')
nflag = 1;
while(*t->w != '\n')
t->w++;
}
void
{
t->w--;
}
void
{
t = t;
}
void
{
t = t;
syntax("unknown command");
}
/* MISCELLANY */
void
{
if(*t->w++ != '\\' || *t->w++ != '\n')
syntax("\\<newline> missing after command");
for(;;) {
switch(*t->w) {
case 0:
case '\n':
*script->w++ = *t->w;
*script->w++ = 0;
return;
case '\\':
t->w++;
default:
*script->w++ = *t->w++;
}
}
}
void
{
t = t;
}
void
{
word n;
int c;
}
void
{
word i, m, x;
int delim;
unsigned char *p, *q, *o, *v, **w;
m = 0;
if(mbwide()) {
pb = t->w;
syntax("missing delimiter");
p = pb;
if(pc=='\n')
syntax("missing delimiter");
if(pc=='\\') {
o = p;
}
if((p-o)>1 && pc>m)
m = pc;
}
}
if(m) {
x = 0;
qb = p;
if(pc=='\\') {
o = p;
}
x += (p-o)+1;
}
m++;
w = (unsigned char**)script->w;
*w++ = (unsigned char*)0 + m;
script->w += (m+1)*sizeof(unsigned char*);
v = (unsigned char*)script->w;
script->w += x;
for(i=0; i<m; i++)
w[i] = 0;
p = pb;
q = qb;
if(pc=='\\') {
pb = p;
pc = '\n';
else
p = pb-1;
}
qb = q;
syntax("missing delimiter");
syntax("string lengths differ");
if(qc=='\\') {
qb = q;
*qb = '\n';
q = qb-1;
}
i = (q-qb);
if(w[pc]) {
syntax("ambiguous map");
synwarn("redundant map");
}
else {
w[pc] = v;
*v++ = (unsigned char)i;
v += i;
}
}
syntax("string lengths differ");
}
else {
syntax("missing delimiter");
w = (unsigned char**)script->w;
*w++ = 0;
s = (unsigned char*)w;
for(i=0; i<UCHAR_MAX+1; i++)
s[i] = 0;
if(qc == '\n')
syntax("missing delimiter");
q++;
}
if(pc=='\\') {
if(*p==delim || *p=='\\')
pc = *p++;
else if(*p=='n') {
p++;
pc = '\n';
}
}
if((qc = *q++) == '\n')
syntax("missing delimiter");
syntax("string lengths differ");
if(qc=='\\') {
if(*q==delim || *q=='\\')
qc = *q++;
else if(*q=='n') {
q++;
qc = '\n';
}
}
if(s[pc]) {
syntax("ambiguous map");
synwarn("redundant map");
}
}
if(*q++ != delim)
syntax("string lengths differ");
for(i=0; i<UCHAR_MAX+1; i++)
if(s[i] == 0)
s[i] = (unsigned char)i;
}
t->w = q;
}
void
synwarn(char *s)
{
}
void
syntax(char *s)
{
}
void
{
}
else
}
#if DEBUG
void
{
unsigned char *s;
word *q;
q = (word*)s;
if((*q®ADR) == 0)
printf("%d", *q);
else
q++;
}
if((*q®ADR) == 0)
printf(",%d", *q);
else
q += 2;
}
if(code(*q) == '\n')
continue;
}
}
#endif
#if DEBUG & 2
/* debugging code 2; execute stub.
prints the compiled script (without arguments)
then each input line with line numbers */
void
{
if(recno == 1)
}
#endif
};
void
{
int neg; /* ! in effect */
int cmd;
int naddr;
word *q; /* address of instruction word */
t->w = t->s; /* here w is a read pointer */
while(*t->w) {
synl = t->w;
naddr = 0;
while(blank(t)) ;
if(naddr && *t->w ==',') {
t->w++;
if(naddr < 2)
syntax("missing address");
}
if(naddr == 2)
*q++ = INACT;
script->w = (unsigned char*)(q+1);
neg = 0;
for(;;) {
while(blank(t));
cmd = *t->w++;
syntax("improper !");
if(cmd != '!')
break;
}
if(!neg) {
case 1:
if(naddr <= 1)
break;
case 0:
if(naddr == 0)
break;
syntax("too many addresses");
}
}
while(*t->w == ' ' || *t->w == '\t' || *t->w == '\r')
t->w++;
switch(*t->w) {
case 0:
break;
case ';':
case '\n':
t->w++;
break;
default:
if(cmd == '{')
break;
syntax("junk after command");
}
}
}