/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2011 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* posix regex compiler
*/
#include "reglib.h"
#if _PACKAGE_ast
#include "lclib.h"
#endif
#if _AST_REGEX_DEBUG
#define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_comp_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
static unsigned long debug;
static unsigned long debug_flag;
#else
#define DEBUG_INIT()
#define DEBUG_TEST(f,y,n) (n)
#define DEBUG_CODE(f,y,n) do {n} while(0)
#endif
#if _PACKAGE_ast
typedef struct Cchr_s
{
} Cchr_t;
#endif
/*
* determine whether greedy matching will work, i.e. produce
* the best match first. such expressions are "easy", and
* need no backtracking once a complete match is found.
* if an expression has backreferences or alts it's hard
* else if it has only one closure it's easy
* else if all closures are simple (i.e. one-character) it's easy
* else it's hard.
*/
typedef struct Stats_s
{
unsigned long l; /* min length to left of x */
unsigned long k; /* min length to left of y */
unsigned long m; /* min length */
unsigned long n; /* max length */
unsigned short a; /* number of alternations */
unsigned short b; /* number of backrefs */
unsigned short c; /* number of closures */
unsigned short e; /* $ */
unsigned short i; /* number of negations */
unsigned short p; /* number of named subexpressions */
unsigned short s; /* number of simple closures */
unsigned short t; /* number of tries */
unsigned short u; /* number of unnamed subexpressions */
Rex_t* x; /* max length REX_STRING */
Rex_t* y; /* max length REX_TRIE */
} Stats_t;
typedef struct Token_s
{
unsigned long min;
unsigned long max;
short lex;
short len;
short esc;
short att;
short push;
} Token_t;
typedef struct Cenv_s
{
/* paren[i]!=0 if \i defined */
} Cenv_t;
/*
* allocate a new Rex_t node
*/
#if _BLD_DEBUG
#endif
static Rex_t*
{
register Rex_t* e;
DEBUG_TEST(0x0800,(sfprintf(sfstdout, "%s:%d node(%d,%d,%d,%u)\n", file, line, type, lo, hi, sizeof(Rex_t) + extra)),(0));
{
e->marked = 0;
if (extra)
}
return e;
}
#if _BLD_DEBUG
#endif
/*
* free a Trie_node_t node
*/
static void
{
if (e)
{
}
}
/*
* free a Rex_t node
*/
void
{
int i;
Rex_t* x;
do
{
switch (e->type)
{
case REX_ALT:
case REX_CONJ:
break;
case REX_GROUP:
case REX_GROUP_AHEAD:
case REX_GROUP_AHEAD_NOT:
case REX_GROUP_BEHIND:
case REX_GROUP_BEHIND_NOT:
case REX_GROUP_CUT:
case REX_NEG:
case REX_REP:
break;
case REX_TRIE:
for (i = 0; i <= UCHAR_MAX; i++)
break;
}
x = e->next;
} while (e = x);
}
/*
* mark e and descendants minimal
*/
static void
{
if (e && !e->marked)
do
{
e->marked = 1;
if (set)
e->flags |= REG_MINIMAL;
else
e->flags &= ~REG_MINIMAL;
switch (e->type)
{
case REX_ALT:
case REX_CONJ:
case REX_GROUP_COND:
break;
case REX_GROUP:
case REX_GROUP_AHEAD:
case REX_GROUP_AHEAD_NOT:
case REX_GROUP_BEHIND:
case REX_GROUP_BEHIND_NOT:
case REX_GROUP_CUT:
case REX_NEG:
case REX_REP:
case REX_TRIE:
break;
}
} while (e = e->next);
}
/*
* assign subexpression numbers by a preorder tree walk
*/
static int
{
do
{
e->serial = n++;
switch (e->type)
{
case REX_ALT:
case REX_GROUP_COND:
break;
case REX_CONJ:
break;
case REX_GROUP:
case REX_GROUP_AHEAD:
case REX_GROUP_AHEAD_NOT:
case REX_GROUP_BEHIND:
case REX_GROUP_BEHIND_NOT:
case REX_GROUP_CUT:
case REX_NEG:
case REX_REP:
break;
}
} while (e = e->next);
return n;
}
/*
* catenate e and f into a sequence, collapsing them if possible
*/
static Rex_t*
{
Rex_t* g;
if (!f)
{
return 0;
}
{
return f;
}
{
g = f->next;
f->next = 0;
f = g;
}
{
if (m <= RE_DUP_MAX)
{
{
n = RE_DUP_INF;
goto combine;
}
else if (n <= RE_DUP_MAX)
{
e->lo = m;
e->hi = n;
g = f->next;
f->next = 0;
f = g;
}
}
}
e->next = f;
return e;
}
/*
* collect re statistics
*/
static int
{
register unsigned long n;
register unsigned long m;
unsigned long cm;
unsigned long nm;
unsigned long cn;
unsigned long nn;
unsigned long l;
unsigned long k;
unsigned long t;
Rex_t* q;
Rex_t* x;
Rex_t* y;
unsigned char c;
unsigned char b;
do
{
switch (e->type)
{
case REX_ALT:
return 1;
return 1;
return 1;
else
return 1;
else
return 1;
break;
case REX_BACK:
return 1;
break;
case REX_CLASS:
case REX_COLL_CLASS:
case REX_DOT:
case REX_ONECHAR:
return 1;
if (e->hi != RE_DUP_INF)
{
return 1;
}
{
return 1;
return 1;
}
break;
case REX_CONJ:
return 1;
return 1;
else
return 1;
else
return 1;
break;
case REX_END:
break;
case REX_GROUP:
return 1;
return 1;
break;
case REX_GROUP_AHEAD:
case REX_GROUP_AHEAD_NOT:
case REX_GROUP_BEHIND:
case REX_GROUP_BEHIND_NOT:
return 1;
switch (e->type)
{
case REX_GROUP_AHEAD:
case REX_GROUP_BEHIND:
return 1;
break;
}
break;
case REX_GROUP_COND:
return 1;
return 1;
return 1;
{
return 1;
return 1;
}
break;
case REX_GROUP_CUT:
return 1;
return 1;
break;
case REX_NEG:
return 1;
return 1;
break;
case REX_REP:
return 1;
return 1;
return 1;
if (e->lo < 1)
{
}
else
{
return 1;
return 1;
}
break;
case REX_STRING:
if (!e->map)
{
return 1;
return 1;
{
}
}
break;
case REX_TRIE:
return 1;
return 1;
return 1;
{
}
break;
}
} while (e = e->next);
return 0;
}
static int
{
register char* sp;
register int n;
int o = c;
short* mp;
char* ep;
{
if (c >= T_META)
{
switch (c)
{
case T_LEFT:
n = 0;
{
if (n > (INT_MAX / 10))
{
goto bad;
}
}
{
{
goto bad;
}
}
else if (n > RE_DUP_MAX)
{
goto bad;
}
if (*sp == ',')
{
n = 0;
{
if (n > (INT_MAX / 10))
{
goto bad;
}
}
n = RE_DUP_INF;
{
goto bad;
}
}
switch (*sp)
{
case 0:
goto bad;
case '\\':
if (!escaped)
{
goto bad;
}
sp++;
break;
default:
if (escaped)
{
goto bad;
}
break;
}
switch (*sp++)
{
case 0:
goto bad;
case '}':
break;
default:
goto bad;
}
break;
case T_RIGHT:
goto bad;
case T_OPEN:
{
goto group;
}
break;
case T_ESCAPE:
goto bad;
if (c >= T_META)
{
c = C_ESC;
}
return c;
case T_BACK+0:
case T_BACK+1:
case T_BACK+2:
case T_BACK+3:
case T_BACK+4:
case T_BACK+5:
case T_BACK+6:
case T_BACK+7:
{
return n;
}
/*FALLTHROUGH*/
case T_BACK+8:
case T_BACK+9:
{
goto bad;
}
{
c += T_BACK;
else
}
if (c == T_BACK)
c = 0;
break;
case T_BAD:
return c;
goto bad;
}
{
if (c == T_DOT)
c = '.';
else if (c < T_OPEN)
{
if (env->type == KRE && *(env->cursor + env->token.len) == '-' && *(env->cursor + env->token.len + 1) == '(')
{
}
{
switch (c)
{
case T_AT:
break;
case T_PERCENT:
goto group;
case T_TILDE:
goto group;
default:
break;
}
c = T_OPEN;
}
else if (c == T_STAR)
c = T_DOTSTAR;
else if (c == T_QUES)
c = T_DOT;
else
{
c = o;
}
}
else if (c > T_BACK)
{
}
{
if (c == T_AND)
c = '&';
else if (c == T_BAR)
c = '|';
else if (c == T_OPEN)
c = '(';
}
}
}
}
else if (escaped == 2)
{
if (env->type >= SRE && !(env->flags & REG_SHELL_ESCAPED) || (env->flags & REG_ESCAPE) && (c == '[' || c == '-' || c == ']' || env->delimiter && c == env->delimiter))
/*ok*/;
else
{
goto bad;
}
}
{
goto bad;
}
return c;
switch (*sp++)
{
case ')':
break;
case '#':
for (;;)
{
switch (*sp++)
{
case 0:
return T_BAD;
case ')':
break;
default:
continue;
}
break;
}
break;
default:
return T_GROUP;
}
bad:
if (escaped == 2)
return o;
{
if (mp || o == ']')
return o;
}
return T_BAD;
}
static int
{
int c;
int posixkludge;
for (;;)
{
return T_END;
break;
if (c == '#')
{
do
{
return T_END;
} while (c != '\n');
}
else if (!isspace(c))
break;
}
{
{
return T_BAD;
}
return T_BAR;
}
return c;
{
env->posixkludge = 0;
if (c == '*')
return c;
}
if (c == '\\')
{
return c;
{
{
if (c)
{
return c;
}
return '\\';
}
return T_BAD;
}
return '\n';
{
return T_BAD;
}
return c;
}
else if (c == '$')
{
if (env->type == BRE && (*(env->cursor + 1) == 0 || *(env->cursor + 1) == env->delimiter || *(env->cursor + 1) == env->terminator || *(env->cursor + 1) == '\\' && *(env->cursor + 2) == ')') || (env->flags & REG_MULTIPLE) && *(env->cursor + 1) == '\n')
return T_DOLL;
}
else if (c == '^')
{
{
return T_CFLX;
}
}
else if (c == ')')
{
return c;
}
{
return T_SLASHPLUS;
}
}
static Celt_t*
{
register char* s;
register unsigned char* k;
register unsigned char* e;
register int c;
register int cc;
int bt;
int et;
cc = 0;
for (;;)
{
k = key;
if (bw == 1)
{
c = bc;
if (ic)
{
if (isupper(c))
{
c = tolower(c);
cc = -1;
}
else if (islower(c))
{
c = toupper(c);
cc = -1;
}
}
*k++ = c;
}
else if (bw < COLL_KEY_MAX)
{
s = (char*)bp;
if (ic)
{
c = mbchar(s);
if (iswupper(c))
{
c = towlower(c);
cc = 1;
}
else if (iswlower(c))
{
c = towupper(c);
cc = 1;
}
}
if (cc > 0)
{
cc = -1;
k += mbconv((char*)k, c);
}
else
for (e = k + bw; k < e; *k++ = *s++);
}
*k = 0;
if (ep)
{
k = key;
c = mbchar(k);
if (iswupper(c))
bt = COLL_range_uc;
else if (iswlower(c))
bt = COLL_range_lc;
else
bt = COLL_range;
k = key;
if (ew == 1)
{
c = ec;
if (ic)
{
if (isupper(c))
{
c = tolower(c);
cc = -1;
}
else if (islower(c))
{
c = toupper(c);
cc = -1;
}
}
*k++ = c;
}
else if (ew < COLL_KEY_MAX)
{
s = (char*)ep;
if (ic)
{
c = mbchar(s);
if (iswupper(c))
{
c = towlower(c);
cc = 1;
}
else if (iswlower(c))
{
c = towupper(c);
cc = 1;
}
}
if (cc > 0)
{
cc = -1;
k += mbconv((char*)k, c);
}
else
for (e = k + ew; k < e; *k++ = *s++);
}
*k = 0;
k = key;
c = mbchar(k);
if (iswupper(c))
et = COLL_range_uc;
else if (iswlower(c))
et = COLL_range_lc;
else
et = COLL_range;
}
else
ce++;
break;
ic = 0;
}
return ce;
}
static Rex_t*
{
Rex_t* e;
int c;
int i;
int w;
int neg;
int last;
int inrange;
int complicated;
int collate;
int elements;
unsigned char* first;
unsigned char* start;
unsigned char* begin;
unsigned char* s;
regclass_t f;
#if _PACKAGE_ast
int ic;
#endif
return 0;
{
neg = 1;
}
else
neg = 0;
if (*env->cursor == 0 || *(env->cursor + 1) == 0 || *env->cursor == env->terminator || *(env->cursor + 1) == env->terminator || (env->flags & REG_ESCAPE) && (*env->cursor == env->delimiter || *env->cursor != '\\' && *(env->cursor + 1) == env->delimiter))
goto error;
/*
* inrange: 0=no, 1=possibly, 2=definitely
*/
inrange = 0;
for (;;)
{
if (!(c = *env->cursor) || c == env->terminator || c == env->delimiter && (env->flags & REG_ESCAPE))
goto error;
if (c == '\\' && ((env->flags & REG_CLASS_ESCAPE) || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE)))
{
{
{
c = '\n';
}
{
{
{
if (inrange > 1)
{
goto erange;
inrange = 0;
}
for (c = 0; c <= UCHAR_MAX; c++)
if ((*f)(c))
complicated++;
elements++;
continue;
}
{
c = w;
if (c > UCHAR_MAX)
{
goto erange;
c = UCHAR_MAX;
}
}
}
}
}
}
else if (c == ']')
{
{
last = c;
inrange = 1;
continue;
}
if (inrange != 0)
{
elements++;
if (inrange == 2)
{
elements++;
}
}
break;
}
else if (c == '-')
{
{
goto erange;
continue;
}
else if (inrange == 1)
{
inrange = 2;
complicated++;
continue;
}
}
else if (c == '[')
{
{
case 0:
goto error;
case ':':
goto normal;
if (inrange == 1)
{
elements++;
}
{
{
while (*++s && *s != ':');
if (*s == ':' && *(s + 1) == ']' && *(s + 2) == ']')
{
if ((i = (s - start)) == 1)
{
switch (c)
{
case '<':
i = REX_WBEG;
break;
case '>':
i = REX_WEND;
break;
default:
i = 0;
break;
}
if (i)
{
}
}
}
}
goto error;
}
for (c = 0; c <= UCHAR_MAX; c++)
if ((*f)(c))
inrange = 0;
complicated++;
elements++;
continue;
case '=':
goto normal;
if (inrange == 2)
goto erange;
if (inrange == 1)
{
elements++;
}
goto ecollate;
if (c > 1)
collate++;
else
c = buf[0];
inrange = 0;
complicated++;
elements++;
continue;
case '.':
goto normal;
goto ecollate;
if (c > 1)
collate++;
c = buf[0];
complicated++;
break;
default:
goto error;
break;
}
}
else if (w > 1)
complicated++;
if (inrange == 2)
{
if (last <= c)
{
for (i = last; i <= c; i++)
elements += 2;
}
{
elements += 2;
inrange = 1;
}
goto erange;
else
inrange = 0;
}
else if (inrange == 1)
{
elements++;
}
else
inrange = 1;
last = c;
}
#if _PACKAGE_ast
if (complicated && mbcoll())
{
int rw;
int rc;
unsigned char* rp;
unsigned char* pp;
char* wp;
{
{
{
}
}
else
{
if (cc)
return 0;
}
}
if (dt)
{
elements *= 2;
return 0;
inrange = 0;
for (;;)
{
if ((c = *env->cursor) == 0 || c == env->terminator || (env->flags & REG_ESCAPE) && c == env->delimiter)
goto error;
if (c == '\\' && ((env->flags & REG_CLASS_ESCAPE) || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE)))
{
{
{
c = '\n';
}
{
{
{
if (inrange > 1)
{
goto erange;
inrange = 0;
}
ce++;
continue;
}
{
c = w;
}
}
}
}
}
else if (c == ']')
{
{
rw = w;
inrange = 1;
continue;
}
if (inrange != 0)
{
if (inrange == 2)
}
break;
}
else if (c == '-')
{
{
goto erange;
continue;
}
else if (inrange == 1)
{
inrange = 2;
continue;
}
}
else if (c == '[')
{
{
case 0:
goto error;
case ':':
goto complicated_normal;
if (inrange == 1)
{
if (env->cursor == start && (c = *(env->cursor + 1)) && *(env->cursor + 2) == ':' && *(env->cursor + 3) == ']' && *(env->cursor + 4) == ']')
{
switch (c)
{
case '<':
i = REX_WBEG;
break;
case '>':
i = REX_WEND;
break;
default:
i = 0;
break;
}
if (i)
{
}
}
goto error;
}
ce++;
inrange = 0;
continue;
case '=':
goto complicated_normal;
if (inrange == 2)
goto erange;
if (inrange == 1)
goto ecollate;
c = 0;
if (ic)
{
{
c = 'u';
}
c = 'l';
}
i = 1;
for (;;)
{
{
if (i)
{
c = *pp;
goto singleton;
}
goto ecollate;
}
if (c == 'l' || c == 'L' && !(c = 0))
else if (c == 'u' || c == 'U' && !(c = 0))
else
{
if (i)
{
c = *pp;
goto singleton;
}
goto ecollate;
}
ce++;
if (!c)
break;
if (c == 'u')
{
c = 'L';
}
else
{
c = 'U';
}
i = 0;
}
inrange = 0;
c = *pp;
continue;
case '.':
goto complicated_normal;
goto ecollate;
c = *pp;
break;
default:
goto error;
break;
}
}
if (inrange == 2)
{
{
goto erange;
ce++;
}
}
else if (inrange == 1)
else
inrange = 1;
rw = w;
rc = c;
}
return e;
}
}
#endif
if (collate)
goto ecollate;
for (i = 0; i <= UCHAR_MAX; i++)
{
if (isupper(i))
c = tolower(i);
else if (islower(i))
c = toupper(i);
else
continue;
}
if (neg)
{
}
return e;
goto error;
return 0;
}
static Rex_t*
{
int i;
Rex_t* e;
regclass_t f;
{
return 0;
}
if (!mbcoll())
{
return 0;
for (i = 0; i <= UCHAR_MAX; i++)
if ((*f)(i))
}
else
{
return 0;
ce++;
}
return e;
}
static Rex_t*
{
Rex_t* f;
unsigned long m = 0;
unsigned long n = RE_DUP_INF;
if (!e)
return 0;
{
case T_BANG:
{
return 0;
}
return f;
case T_QUES:
n = 1;
break;
case T_STAR:
break;
case T_PLUS:
m = 1;
break;
case T_LEFT:
break;
default:
return e;
}
minimal = 1;
{
case T_QUES:
break;
case T_STAR: /*AST*/
break;
}
switch (e->type)
{
case REX_DOT:
case REX_CLASS:
case REX_COLL_CLASS:
case REX_ONECHAR:
e->lo = m;
e->hi = n;
if (minimal >= 0)
return e;
#if HUH_2002_08_07
case REX_BEG:
#endif
case REX_BEG_STR:
case REX_END_STR:
case REX_FIN_STR:
case REX_WBEG:
case REX_WEND:
case REX_WORD:
case REX_WORD_NOT:
return 0;
}
if (m == 1 && n == 1)
{
if (minimal >= 0)
return e;
}
{
return 0;
}
if (minimal >= 0)
if (m <= n && n)
{
}
return f;
}
static int
{
switch (e->type)
{
case REX_ONECHAR:
case REX_STRING:
return 1;
}
return 0;
}
static Trie_node_t*
{
Trie_node_t* t;
{
memset(t, 0, sizeof(Trie_node_t));
t->c = c;
}
return t;
}
static int
{
unsigned char* s;
unsigned char* e;
Trie_node_t* t;
int len;
switch (f->type)
{
case REX_ONECHAR:
e = s + 1;
break;
case REX_STRING:
break;
default:
return 1;
}
return 1;
for (len = 1;;)
{
if (t->c == *s)
{
if (++s >= e)
break;
return 1;
t = t->son;
len++;
}
else
{
return 1;
t = t->sib;
}
}
t->end = 1;
return 0;
}
/*
* trie() tries to combine nontrivial e and f into a REX_TRIE
* unless 0 is returned, e and f are deleted as far as possible
* also called by regcomb
*/
static Rex_t*
{
Rex_t* g;
return 0;
if (isstring(f))
{
return 0;
goto nospace;
}
return 0;
else
g = f;
goto nospace;
return g;
if (g != f)
return 0;
}
static int
{
unsigned char* p;
int c;
*escaped = 0;
return -1;
if (c == '\\')
{
return c;
{
return c ? c : '\\';
return -1;
}
}
return c;
}
/*
* open the perly gates
*/
static Rex_t*
{
Rex_t* e;
Rex_t* f;
int c;
int i;
int n;
int x;
int esc;
int typ;
int beg;
unsigned char* p;
typ = -1;
switch (c)
{
case '-':
case '+':
case 'a':
case 'g':
case 'i':
case 'l':
case 'm':
case 'p':
case 'r':
case 's':
case 'x':
case 'A':
case 'B':
case 'E':
case 'F':
case 'G':
case 'I':
case 'K':
case 'L':
case 'M': /* glob(3) */
case 'N': /* glob(3) */
case 'O': /* glob(3) */
case 'P':
case 'R': /* pcre */
case 'S':
case 'U': /* pcre */
case 'X': /* pcre */
x = REX_GROUP;
i = 1;
for (;;)
{
switch (c)
{
case ')':
{
return 0;
}
/*FALLTHROUGH*/
case 0:
case T_CLOSE:
x = 0;
goto done;
case ':':
x = 0;
goto done;
case '-':
i = 0;
break;
case '+':
i = 1;
break;
case 'a':
if (i)
else
break;
case 'g':
if (i)
else
break;
case 'i':
if (i)
else
break;
case 'l':
if (i)
else
break;
case 'm':
if (i)
else
break;
case 'p':
if (i)
else
break;
case 'r':
if (i)
else
break;
case 's':
if (i)
else
break;
case 'x':
if (i)
else
break;
case 'X':
break; /* PCRE_EXTRA */
/*FALLTHROUGH*/
case 'A':
break;
case 'B':
case 'G':
break;
case 'E':
break;
case 'F':
case 'L':
break;
case 'K':
break;
case 'M':
/* used by caller to disable glob(3) GLOB_BRACE */
break;
case 'N':
/* used by caller to disable glob(3) GLOB_NOCHECK */
break;
case 'O':
/* used by caller to disable glob(3) GLOB_STARSTAR */
break;
case 'P':
break;
case 'S':
break;
case 'U': /* PCRE_UNGREEDY */
{
if (i)
else
}
break;
default:
return 0;
}
}
done:
break;
case ':':
x = REX_GROUP;
break;
case '=':
x = REX_GROUP_AHEAD;
break;
case '!':
x = REX_GROUP_AHEAD_NOT;
break;
case '<':
{
case '=':
x = REX_GROUP_BEHIND;
break;
case '!':
case T_BANG:
x = REX_GROUP_BEHIND_NOT;
break;
default:
return 0;
}
break;
case '>':
x = REX_GROUP_CUT;
break;
case '%':
case T_PERCENT:
n = 1;
for (;;)
{
{
case -1:
case 0:
return 0;
case 'D':
x = REX_NEST_delimiter;
/*FALLTHROUGH*/
goto invalid;
goto invalid;
continue;
case 'E':
x = REX_NEST_escape;
goto delimiter;
case 'L':
x = REX_NEST_literal;
goto quote;
case 'O':
{
case 'T':
break;
default:
goto invalid;
}
continue;
case 'Q':
x = REX_NEST_quote;
/*FALLTHROUGH*/
goto invalid;
goto invalid;
continue;
case 'S':
x = REX_NEST_separator;
goto delimiter;
case 'T':
x = REX_NEST_terminator;
goto delimiter;
case '|':
case '&':
if (!esc)
goto invalid;
goto nesting;
case '(':
if (!esc)
n++;
goto nesting;
case ')':
if (!esc && !--n)
break;
goto nesting;
default:
if (isalnum(i) || (e->re.nest.type[i] & (REX_NEST_close|REX_NEST_escape|REX_NEST_literal|REX_NEST_quote|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)))
goto invalid;
if ((x = chr(env, &esc)) < 0 || (e->re.nest.type[x] & (REX_NEST_close|REX_NEST_escape|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)))
goto invalid;
if (!esc)
{
if (x == ')' && !--n)
goto invalid;
else if (x == '(')
n++;
}
continue;
}
break;
}
if (c == T_PERCENT)
for (n = 0; n < 2; n++)
{
{
return 0;
}
e = f;
}
return e;
case '(':
c = 0;
{
f = 0;
do
{
if (c > (INT_MAX / 10))
{
return 0;
}
{
return 0;
}
c = c * 2 - 1;
{
{
return 0;
}
if (c)
c = -1;
}
}
else
{
{
return 0;
}
return 0;
}
{
return 0;
}
{
return 0;
}
{
return 0;
}
case '{':
n = 1;
{
else if (c == '{')
n++;
else if (c == '}' && !--n)
break;
break;
}
if (c != '}')
{
return 0;
}
{
return 0;
}
{
return 0;
}
return 0;
return 0;
else
return e;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c -= '0';
{
if (c > (INT_MAX / 10))
{
return 0;
}
}
{
{
return 0;
}
}
/*FALLTHROUGH*/
default:
return 0;
}
return 0;
{
return 0;
}
if (typ >= 0)
{
if (beg)
}
if (!x)
return 0;
{
return 0;
}
if (x == REX_GROUP_BEHIND || x == REX_GROUP_BEHIND_NOT)
{
{
return 0;
}
}
switch (x)
{
case REX_GROUP:
case REX_GROUP_CUT:
break;
}
return f;
}
static Rex_t*
{
Rex_t* e;
Rex_t* f;
int c;
int i;
int n;
int x;
int parno;
int type;
unsigned char* s;
unsigned char* p;
unsigned char* t;
unsigned char* u;
for (;;)
{
s = buf;
{
x = c;
if (c >= 0)
{
n = 1;
}
{
c = towupper(c);
break;
if ((n = mbconv((char*)s, c)) < 0)
*s++ = c;
else if (n)
s += n;
else
{
n = 1;
*s++ = 0;
}
}
else
{
for (t = p, u = s + n; s < u; *s++ = *t++);
}
}
if (c == T_BAD)
return 0;
if (s > buf)
switch (c)
{
case T_STAR:
case T_PLUS:
case T_LEFT:
case T_QUES:
case T_BANG:
if ((s -= n) == buf)
e = 0;
else
{
i = s - buf;
return 0;
}
if (x >= 0)
{
{
return 0;
}
}
else
{
return 0;
}
{
return 0;
}
if (e)
return f;
default:
c = s - buf;
return 0;
}
else if (c > T_BACK)
{
c -= T_BACK;
{
return 0;
}
}
else
switch (c)
{
case T_AND:
case T_CLOSE:
case T_BAR:
case T_END:
case T_DOLL:
break;
case T_CFLX:
break;
case T_OPEN:
break;
{
env->error = (*env->cursor == 0 || *env->cursor == env->delimiter || *env->cursor == env->terminator) ? REG_EPAREN : REG_ENULL;
return 0;
}
{
return 0;
}
{
return 0;
}
{
}
return 0;
{
{
return 0;
}
e = f;
}
break;
case T_GROUP:
{
return 0;
continue;
}
break;
case T_BRA:
break;
case T_ALNUM:
case T_ALNUM_NOT:
case T_DIGIT:
case T_DIGIT_NOT:
case T_SPACE:
case T_SPACE_NOT:
break;
case T_LT:
break;
case T_GT:
break;
case T_DOT:
break;
case T_DOTSTAR:
break;
case T_SLASHPLUS:
{
}
break;
case T_WORD:
break;
case T_WORD_NOT:
break;
case T_BEG_STR:
break;
case T_END_STR:
break;
case T_FIN_STR:
break;
default:
return 0;
}
return e;
}
}
static Rex_t*
{
Rex_t* e;
Rex_t* f;
Rex_t* g;
return e;
{
return 0;
}
{
return 0;
}
return g;
}
static Rex_t*
{
Rex_t* e;
Rex_t* f;
Rex_t* g;
return 0;
{
if (!cond)
return e;
f = 0;
goto bad;
}
else
{
{
return 0;
}
goto bad;
return g;
}
{
goto bad;
}
return g;
bad:
return 0;
}
/*
* add v to REX_BM tables
*/
static void
{
int c;
int m;
size_t z;
for (m = 0; m < n; m++)
{
if (!(z = n - m - 1))
z = HIT;
c = v[m];
{
if (isupper(c))
c = tolower(c);
else if (islower(c))
c = toupper(c);
else
continue;
}
}
}
/*
* set up BM table from trie
*/
static int
{
do
{
v[m] = x->c;
if (m >= (n - 1))
{
if (!(b <<= 1))
{
b = 1;
}
else if (x->son)
}
else if (x->son)
} while (x = x->sib);
return b;
}
/*
* rewrite the expression tree for some special cases
* 1. it is a null expression - illegal
* 2. max length fixed string found -- use BM algorithm
* 3. it begins with an unanchored string - use KMP algorithm
* 0 returned on success
*/
static int
{
Rex_t* a;
Rex_t* e;
Rex_t* t;
Rex_t* x;
Rex_t* y;
unsigned char* s;
int* f;
int n;
int m;
int k;
DEBUG_INIT();
{
x = 0;
t = 0;
if (x && t)
{
t = 0;
else
x = 0;
}
if (x || t)
{
Bm_mask_t* h;
unsigned char* v;
size_t* q;
unsigned long l;
int i;
int j;
if (x)
{
y = x;
}
else
{
y = t;
}
return 1;
if (!(a = node(env, REX_BM, 0, 0, n * (sizeof(Bm_mask_t*) + (UCHAR_MAX + 1) * sizeof(Bm_mask_t)) + (UCHAR_MAX + n + 2) * sizeof(size_t))))
{
return 1;
}
a->re.bm.complete = (env->stats.e || y != e && (e->type != REX_GROUP || y != e->re.group.expr.rex) || e->next || ((a->re.bm.left + a->re.bm.right) >= 0)) ? 0 : n;
for (m = 0; m <= UCHAR_MAX; m++)
a->re.bm.skip[0] = a->re.bm.skip[env->mappednewline] = (y->next && y->next->type == REX_END) ? HIT : (n + a->re.bm.left);
for (i = 1; i <= n; i++)
for (m = 0; m < n; m++)
{
mask[m] = h;
h += UCHAR_MAX + 1;
}
if (x)
else
{
v = (unsigned char*)q;
memset(v, 0, n);
m = 1;
for (i = 0; i <= UCHAR_MAX; i++)
}
mask--;
memset(q, 0, n * sizeof(*q));
for (k = (j = n) + 1; j > 0; j--, k--)
{
for (q[j] = k; k <= n; k = q[k])
{
for (m = 0; m <= UCHAR_MAX; m++)
{
goto cut;
}
DEBUG_TEST(0x0010,sfprintf(sfstderr, "BM#2: fail[%d]=%d => %d\n", k, a->re.bm.fail[k], (a->re.bm.fail[k] > n - j) ? (n - j) : a->re.bm.fail[k]), (0));
}
cut: ;
}
for (i = 1; i <= n; i++)
{
DEBUG_TEST(0x0010,sfprintf(sfstderr, "BM#4: fail[%d]=%d => %d\n", i, a->re.bm.fail[i], n + k - i), (0));
}
#if _AST_REGEX_DEBUG
{
sfprintf(sfstderr, "STAT: complete=%d n=%d k=%d l=%d r=%d y=%d:%d e=%d:%d\n", a->re.bm.complete, n, k, a->re.bm.left, a->re.bm.right, y->type, y->next ? y->next->type : 0, e->type, e->next ? e->next->type : 0);
for (m = 0; m < n; m++)
for (i = 1; i <= UCHAR_MAX; i++)
for (i = ' '; i <= UCHAR_MAX; i++)
for (j = 31; j >= 0; j--)
{
next:
for (m = 0; m < n; m++)
{
for (i = 0040; i < 0177; i++)
{
break;
}
if (i >= 0177)
{
if (j > 0)
{
j--;
goto next;
}
}
}
}
for (m = 1; m <= n; m++)
}
#endif
a->next = e;
return 0;
}
switch (e->type)
{
case REX_BEG:
return 0;
break;
case REX_GROUP:
return 0;
return 0;
/*FALLTHROUGH*/
case REX_DOT:
break;
return 0;
case REX_NULL:
break;
return 1;
case REX_STRING:
return 0;
return 1;
f[0] = m = -1;
for (k = 1; k < n; k++)
{
while (m >= 0 && s[m+1] != s[k])
m = f[m];
if (s[m+1] == s[k])
m++;
f[k] = m;
}
e->next = 0;
break;
default:
return 0;
}
}
return 0;
}
int
{
Rex_t* e;
Rex_t* f;
unsigned char* fold;
int i;
if (!p)
return REG_BADPAT;
if (flags & REG_DISCIPLINE)
{
flags &= ~REG_DISCIPLINE;
}
else
if (!disc->re_errorlevel)
p->env = 0;
if (!pattern)
if (!state.initialized)
{
}
{
for (i = 0; i <= UCHAR_MAX; i++)
}
{
for (i = 0; i <= UCHAR_MAX; i++)
{
env.mappednewline = i;
env.mappedslash = i;
}
}
else
{
}
else
{
{
case 0:
case '\\':
case '\n':
case '\r':
goto bad;
}
}
goto bad;
{
goto bad;
}
{
{
regfree(p);
}
}
{
{
regfree(p);
}
e->next = f;
}
{
goto bad;
}
else if (!(env.flags & REG_FIRST) && (env.stats.a || env.stats.c > 1 && env.stats.c != env.stats.s || env.stats.t && (env.stats.t > 1 || env.stats.a || env.stats.c)))
else
{
}
goto bad;
p->re_nsub /= 2;
{
p->re_npat++;
{
goto bad;
}
else
}
return 0;
bad:
regfree(p);
{
flags |= REG_LITERAL;
goto again;
}
}
/*
* regcomp() on sized pattern
* the lazy way around adding and checking env.end
*/
int
{
char* s;
int r;
s[size] = 0;
free(s);
return r;
}
/*
* combine two compiled regular expressions if possible,
* replacing first with the combination and freeing second.
* return 0 on success.
* the only combinations handled are building a Trie
* from String|Kmp|Trie and String|Kmp
*/
int
{
Rex_t* g;
Rex_t* h;
if (!e || !f)
return REG_ESUBREG;
{
e->next = 0;
}
{
f->next = 0;
}
{
e->next = 0;
f->next = 0;
}
{
g->next = 0;
h->next = 0;
}
{
{
regfree(p);
}
}
{
{
{
regfree(p);
}
f->next = e;
}
}
{
regfree(p);
}
{
regfree(p);
}
return 0;
}
/*
* copy a reference of p into q
* p and q may then have separate regsubcomp() instantiations
*/
int
{
if (!p || !q)
return REG_BADPAT;
*q = *p;
q->re_sub = 0;
return 0;
}