regnexec.c revision 7c2fbfb345896881c631598ee3852ce9ce33fb07
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2008 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* posix regex executor
* single sized-record interface
*/
#include "reglib.h"
#if _AST_REGEX_DEBUG
#define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
#define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
#define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
static unsigned long debug;
static unsigned long debug_flag;
static const char* rexnames[] =
{
"REX_NULL",
"REX_ALT",
"REX_ALT_CATCH",
"REX_BACK",
"REX_BEG",
"REX_BEG_STR",
"REX_BM",
"REX_CAT",
"REX_CLASS",
"REX_COLL_CLASS",
"REX_CONJ",
"REX_CONJ_LEFT",
"REX_CONJ_RIGHT",
"REX_DONE",
"REX_DOT",
"REX_END",
"REX_END_STR",
"REX_EXEC",
"REX_FIN_STR",
"REX_GROUP",
"REX_GROUP_CATCH",
"REX_GROUP_AHEAD",
"REX_GROUP_AHEAD_CATCH",
"REX_GROUP_AHEAD_NOT",
"REX_GROUP_BEHIND",
"REX_GROUP_BEHIND_CATCH",
"REX_GROUP_BEHIND_NOT",
"REX_GROUP_BEHIND_NOT_CATCH",
"REX_GROUP_COND",
"REX_GROUP_COND_CATCH",
"REX_GROUP_CUT",
"REX_GROUP_CUT_CATCH",
"REX_KMP",
"REX_NEG",
"REX_NEG_CATCH",
"REX_NEST",
"REX_ONECHAR",
"REX_REP",
"REX_REP_CATCH",
"REX_STRING",
"REX_TRIE",
"REX_WBEG",
"REX_WEND",
"REX_WORD",
"REX_WORD_NOT"
};
static const char* rexname(Rex_t* rex)
{
if (!rex)
return "NIL";
if (rex->type >= elementsof(rexnames))
return "ERROR";
return rexnames[rex->type];
}
#else
#define DEBUG_INIT()
#define DEBUG_TEST(f,y,n) (n)
#define DEBUG_CODE(f,y,n) do {n} while(0)
#endif
#define BEG_ALT 1 /* beginning of an alt */
#define BEG_ONE 2 /* beginning of one iteration of a rep */
#define BEG_REP 3 /* beginning of a repetition */
#define BEG_SUB 4 /* beginning of a subexpression */
#define END_ANY 5 /* end of any of above */
/*
* returns from parse()
*/
#define NONE 0 /* no parse found */
#define GOOD 1 /* some parse was found */
#define CUT 2 /* no match and no backtrack */
#define BEST 3 /* an unbeatable parse was found */
#define BAD 4 /* error ocurred */
/*
* REG_SHELL_DOT test
*/
#define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
/*
* Pos_t is for comparing parses. An entry is made in the
* array at the beginning and at the end of each Group_t,
* each iteration in a Group_t, and each Binary_t.
*/
typedef struct
{
unsigned char* p; /* where in string */
size_t length; /* length in string */
short serial; /* preorder subpattern number */
short be; /* which end of pair */
} Pos_t;
/* ===== begin library support ===== */
#define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
static Vector_t*
vecopen(int inc, int siz)
{
Vector_t* v;
Stk_t* sp;
if (inc <= 0)
inc = 16;
if (!(sp = stkopen(STK_SMALL|STK_NULL)))
return 0;
if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
{
stkclose(sp);
return 0;
}
v->stk = sp;
v->vec = (char*)v + sizeof(Vector_t);
v->max = v->inc = inc;
v->siz = siz;
v->cur = 0;
return v;
}
static void*
vecseek(Vector_t** p, int index)
{
Vector_t* v = *p;
if (index >= v->max)
{
while ((v->max += v->inc) <= index);
if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
return 0;
*p = v;
v->vec = (char*)v + sizeof(Vector_t);
}
return v->vec + index * v->siz;
}
static void
vecclose(Vector_t* v)
{
if (v)
stkclose(v->stk);
}
typedef struct
{
Stk_pos_t pos;
char data[1];
} Stk_frame_t;
#define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
#define stkold(s,p) stkset(s,(p)->base,(p)->offset)
#define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
#define stkdata(s,t) ((t*)stkframe(s)->data)
#define stkpop(s) stkold(s,&(stkframe(s)->pos))
static void*
stkpush(Stk_t* sp, size_t size)
{
Stk_frame_t* f;
Stk_pos_t p;
stknew(sp, &p);
size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
return 0;
f->pos = p;
stkframe(sp) = f;
return f->data;
}
/* ===== end library support ===== */
/*
* Match_frame_t is for saving and restoring match records
* around alternate attempts, so that fossils will not be
* left in the match array. These are the only entries in
* the match array that are not otherwise guaranteed to
* have current data in them when they get used.
*/
typedef struct
{
size_t size;
regmatch_t* match;
regmatch_t save[1];
} Match_frame_t;
#define matchframe stkdata(stkstd,Match_frame_t)
#define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
#define matchcopy(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
#define matchpop(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
#define pospop(e) (--(e)->pos->cur)
/*
* allocate a frame and push a match onto the stack
*/
static int
_matchpush(Env_t* env, Rex_t* rex)
{
Match_frame_t* f;
regmatch_t* m;
regmatch_t* e;
regmatch_t* s;
int num;
if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
num = 0;
if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
{
env->error = REG_ESPACE;
return 1;
}
f->size = num * sizeof(regmatch_t);
f->match = m = env->match + rex->re.group.number;
e = m + num;
s = f->save;
while (m < e)
{
*s++ = *m;
*m++ = state.nomatch;
}
return 0;
}
/*
* allocate a frame and push a pos onto the stack
*/
static int
pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
{
Pos_t* pos;
if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
{
env->error = REG_ESPACE;
return 1;
}
pos->serial = rex->serial;
pos->p = p;
pos->be = be;
env->pos->cur++;
return 0;
}
/*
* two matches are known to have the same length
* os is start of old pos array, ns is start of new,
* oend and nend are end+1 pointers to ends of arrays.
* oe and ne are ends (not end+1) of subarrays.
* returns 1 if new is better, -1 if old, else 0.
*/
static int
better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
{
Pos_t* oe;
Pos_t* ne;
int k;
int n;
if (env->error)
return -1;
for (;;)
{
DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
if (ns >= nend)
return DEBUG_TEST(0x8000,(os < oend),(0));
if (os >= oend)
return DEBUG_TEST(0x8000,(-1),(1));
n = os->serial;
if (ns->serial > n)
return -1;
if (n > ns->serial)
{
env->error = REG_PANIC;
return -1;
}
if (ns->p > os->p)
return 1;
if (os->p > ns->p)
return -1;
oe = os;
k = 0;
for (;;)
if ((++oe)->serial == n)
{
if (oe->be != END_ANY)
k++;
else if (k-- <= 0)
break;
}
ne = ns;
k = 0;
for (;;)
if ((++ne)->serial == n)
{
if (ne->be != END_ANY)
k++;
else if (k-- <= 0)
break;
}
if (ne->p > oe->p)
return 1;
if (oe->p > ne->p)
return -1;
if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
return k;
os = oe + 1;
ns = ne + 1;
}
}
#if _AST_REGEX_DEBUG
static void
showmatch(regmatch_t* p)
{
sfputc(sfstdout, '(');
if (p->rm_so < 0)
sfputc(sfstdout, '?');
else
sfprintf(sfstdout, "%d", p->rm_so);
sfputc(sfstdout, ',');
if (p->rm_eo < 0)
sfputc(sfstdout, '?');
else
sfprintf(sfstdout, "%d", p->rm_eo);
sfputc(sfstdout, ')');
}
static int
_better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
{
int i;
DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
i = better(env, os, ns, oend, nend, 0);
DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
return i;
}
#define better _better
#endif
#define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
static int
parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
{
int i;
int r = NONE;
Rex_t catcher;
DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->lo, n, rex->hi, env->end - s, s)),(0));
if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
{
if (env->stack && pospush(env, rex, s, END_ANY))
return BAD;
i = follow(env, rex, cont, s);
if (env->stack)
pospop(env);
switch (i)
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
}
if (n < rex->hi)
{
catcher.type = REX_REP_CATCH;
catcher.serial = rex->serial;
catcher.re.rep_catch.ref = rex;
catcher.re.rep_catch.cont = cont;
catcher.re.rep_catch.beg = s;
catcher.re.rep_catch.n = n + 1;
catcher.next = rex->next;
if (n == 0)
rex->re.rep_catch.beg = s;
if (env->stack)
{
DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
if (matchpush(env, rex))
return BAD;
if (pospush(env, rex, s, BEG_ONE))
return BAD;
DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
}
r = parse(env, rex->re.group.expr.rex, &catcher, s);
DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d `%-.*s'\n", __LINE__, debug_flag, r, env->end - s, s)),(0));
if (env->stack)
{
DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
pospop(env);
matchpop(env, rex);
DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
}
switch (r)
{
case BAD:
return BAD;
case BEST:
return BEST;
case CUT:
r = NONE;
break;
case GOOD:
if (rex->flags & REG_MINIMAL)
return BEST;
r = GOOD;
break;
}
}
if (n < rex->lo)
return r;
if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
{
if (env->stack && pospush(env, rex, s, END_ANY))
return BAD;
i = follow(env, rex, cont, s);
if (env->stack)
pospop(env);
switch (i)
{
case BAD:
r = BAD;
break;
case CUT:
r = CUT;
break;
case BEST:
r = BEST;
break;
case GOOD:
r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
break;
}
}
return r;
}
static int
parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
{
unsigned char* p;
int r;
if (p = rex->map)
{
for (;;)
{
if (s >= env->end)
return NONE;
while (x->c != p[*s])
if (!(x = x->sib))
return NONE;
if (x->end)
break;
x = x->son;
s++;
}
}
else
{
for (;;)
{
if (s >= env->end)
return NONE;
while (x->c != *s)
if (!(x = x->sib))
return NONE;
if (x->end)
break;
x = x->son;
s++;
}
}
s++;
if (rex->flags & REG_MINIMAL)
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (x->son)
switch (parsetrie(env, x->son, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
return BEST;
case GOOD:
if (rex->flags & REG_MINIMAL)
return BEST;
r = GOOD;
break;
default:
r = NONE;
break;
}
else
r = NONE;
if (!(rex->flags & REG_MINIMAL))
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
return BEST;
case GOOD:
return GOOD;
}
return r;
}
static int
collelt(register Celt_t* ce, char* key, int c, int x)
{
Ckey_t elt;
mbxfrm(elt, key, COLL_KEY_MAX);
for (;; ce++)
{
switch (ce->typ)
{
case COLL_call:
if (!x && (*ce->fun)(c))
return 1;
continue;
case COLL_char:
if (!strcmp((char*)ce->beg, (char*)elt))
return 1;
continue;
case COLL_range:
if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
return 1;
continue;
case COLL_range_lc:
if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
return 1;
continue;
case COLL_range_uc:
if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
return 1;
continue;
}
break;
}
return 0;
}
static int
collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
{
if (!x)
{
if (collelt(ce, key, c, x))
return 1;
if (iswlower(c))
c = towupper(c);
else if (iswupper(c))
c = towlower(c);
else
return 0;
x = wctomb(key, c);
key[x] = 0;
return collelt(ce, key, c, 0);
}
while (*nxt)
{
if (collic(ce, key, nxt + 1, c, x))
return 1;
if (islower(*nxt))
*nxt = toupper(*nxt);
else if (isupper(*nxt))
*nxt = tolower(*nxt);
else
return 0;
nxt++;
}
return collelt(ce, key, c, x);
}
static int
collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
{
unsigned char* t;
wchar_t c;
int w;
int r;
int x;
int ic;
Ckey_t key;
Ckey_t elt;
ic = (rex->flags & REG_ICASE);
if ((w = MBSIZE(s)) > 1)
{
memcpy((char*)key, (char*)s, w);
key[w] = 0;
t = s;
c = mbchar(t);
#if !_lib_wctype
c &= 0xff;
#endif
x = 0;
}
else
{
c = s[0];
if (ic && isupper(c))
c = tolower(c);
key[0] = c;
key[1] = 0;
if (isalpha(c))
{
x = e - s;
if (x > COLL_KEY_MAX)
x = COLL_KEY_MAX;
while (w < x)
{
c = s[w];
if (!isalpha(c))
break;
r = mbxfrm(elt, key, COLL_KEY_MAX);
if (ic && isupper(c))
c = tolower(c);
key[w] = c;
key[w + 1] = 0;
if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
break;
w++;
}
}
key[w] = 0;
c = key[0];
x = w - 1;
}
r = 1;
for (;;)
{
if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
break;
if (!x)
{
r = 0;
break;
}
w = x--;
key[w] = 0;
}
*p = s + w;
return rex->re.collate.invert ? !r : r;
}
static unsigned char*
nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
{
register int c;
register int cc;
unsigned int n;
int oc;
if (type[co] & (REX_NEST_literal|REX_NEST_quote))
{
n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
while (s < e)
{
c = *s++;
if (c == co)
return s;
else if (type[c] & n)
{
if (s >= e || (type[c] & REX_NEST_terminator))
break;
s++;
}
}
}
else
{
cc = type[co] >> REX_NEST_SHIFT;
oc = type[co] & (REX_NEST_open|REX_NEST_close);
n = 1;
while (s < e)
{
c = *s++;
switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
{
case REX_NEST_delimiter:
case REX_NEST_terminator:
return oc ? 0 : s;
case REX_NEST_separator:
if (!oc)
return s;
break;
case REX_NEST_escape:
if (s >= e)
return 0;
s++;
break;
case REX_NEST_open|REX_NEST_close:
if (c == cc)
{
if (!--n)
return s;
}
/*FALLTHROUGH*/
case REX_NEST_open:
if (c == co)
{
if (!++n)
return 0;
}
else if (!(s = nestmatch(s, e, type, c)))
return 0;
break;
case REX_NEST_close:
if (c != cc)
return 0;
if (!--n)
return s;
break;
}
}
return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
}
return 0;
}
static int
parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
{
int c;
int d;
int i;
int m;
int n;
int r;
int* f;
unsigned char* p;
unsigned char* t;
unsigned char* b;
unsigned char* e;
char* u;
regmatch_t* o;
Trie_node_t* x;
Rex_t* q;
Rex_t catcher;
Rex_t next;
for (;;)
{
DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
switch (rex->type)
{
case REX_ALT:
if (env->stack)
{
if (matchpush(env, rex))
return BAD;
if (pospush(env, rex, s, BEG_ALT))
return BAD;
catcher.type = REX_ALT_CATCH;
catcher.serial = rex->serial;
catcher.re.alt_catch.cont = cont;
catcher.next = rex->next;
r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
if (r < BEST || (rex->flags & REG_MINIMAL))
{
matchcopy(env, rex);
((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
if (n != NONE)
r = n;
}
pospop(env);
matchpop(env, rex);
}
else
{
if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
r = parse(env, rex->re.group.expr.binary.right, cont, s);
if (r == GOOD)
r = BEST;
}
return r;
case REX_ALT_CATCH:
if (pospush(env, rex, s, END_ANY))
return BAD;
r = follow(env, rex, rex->re.alt_catch.cont, s);
pospop(env);
return r;
case REX_BACK:
o = &env->match[rex->lo];
if (o->rm_so < 0)
return NONE;
i = o->rm_eo - o->rm_so;
e = s + i;
if (e > env->end)
return NONE;
t = env->beg + o->rm_so;
if (!(p = rex->map))
{
while (s < e)
if (*s++ != *t++)
return NONE;
}
else if (!mbwide())
{
while (s < e)
if (p[*s++] != p[*t++])
return NONE;
}
else
{
while (s < e)
{
c = mbchar(s);
d = mbchar(t);
if (towupper(c) != towupper(d))
return NONE;
}
}
break;
case REX_BEG:
if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
return NONE;
break;
case REX_CLASS:
if (LEADING(env, rex, s))
return NONE;
n = rex->hi;
if (n > env->end - s)
n = env->end - s;
m = rex->lo;
if (m > n)
return NONE;
r = NONE;
if (!(rex->flags & REG_MINIMAL))
{
for (i = 0; i < n; i++)
if (!settst(rex->re.charclass, s[i]))
{
n = i;
break;
}
for (s += n; n-- >= m; s--)
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
return BEST;
case GOOD:
r = GOOD;
break;
}
}
else
{
for (e = s + m; s < e; s++)
if (!settst(rex->re.charclass, *s))
return r;
e += n - m;
for (;;)
{
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (s >= e || !settst(rex->re.charclass, *s))
break;
s++;
}
}
return r;
case REX_COLL_CLASS:
if (LEADING(env, rex, s))
return NONE;
n = rex->hi;
if (n > env->end - s)
n = env->end - s;
m = rex->lo;
if (m > n)
return NONE;
r = NONE;
e = env->end;
if (!(rex->flags & REG_MINIMAL))
{
if (!(b = (unsigned char*)stkpush(stkstd, n)))
{
env->error = REG_ESPACE;
return BAD;
}
for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
{
b[i] = t - s;
s = t;
}
for (; i-- >= rex->lo; s -= b[i])
switch (follow(env, rex, cont, s))
{
case BAD:
stkpop(stkstd);
return BAD;
case CUT:
stkpop(stkstd);
return CUT;
case BEST:
stkpop(stkstd);
return BEST;
case GOOD:
r = GOOD;
break;
}
stkpop(stkstd);
}
else
{
for (i = 0; i < m && s < e; i++, s = t)
if (!collmatch(rex, s, e, &t))
return r;
while (i++ <= n)
{
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (s >= e || !collmatch(rex, s, e, &s))
break;
}
}
return r;
case REX_CONJ:
next.type = REX_CONJ_RIGHT;
next.re.conj_right.cont = cont;
next.next = rex->next;
catcher.type = REX_CONJ_LEFT;
catcher.re.conj_left.right = rex->re.group.expr.binary.right;
catcher.re.conj_left.cont = &next;
catcher.re.conj_left.beg = s;
catcher.next = 0;
return parse(env, rex->re.group.expr.binary.left, &catcher, s);
case REX_CONJ_LEFT:
rex->re.conj_left.cont->re.conj_right.end = s;
cont = rex->re.conj_left.cont;
s = rex->re.conj_left.beg;
rex = rex->re.conj_left.right;
continue;
case REX_CONJ_RIGHT:
if (rex->re.conj_right.end != s)
return NONE;
cont = rex->re.conj_right.cont;
break;
case REX_DONE:
if (!env->stack)
return BEST;
n = s - env->beg;
r = env->nsub;
DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
if ((i = env->best[0].rm_eo) >= 0)
{
if (rex->flags & REG_MINIMAL)
{
if (n > i)
return GOOD;
}
else
{
if (n < i)
return GOOD;
}
if (n == i && better(env,
(Pos_t*)env->bestpos->vec,
(Pos_t*)env->pos->vec,
(Pos_t*)env->bestpos->vec+env->bestpos->cur,
(Pos_t*)env->pos->vec+env->pos->cur,
0) <= 0)
return GOOD;
}
env->best[0].rm_eo = n;
memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
n = env->pos->cur;
if (!vector(Pos_t, env->bestpos, n))
{
env->error = REG_ESPACE;
return BAD;
}
env->bestpos->cur = n;
memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
return GOOD;
case REX_DOT:
if (LEADING(env, rex, s))
return NONE;
n = rex->hi;
if (n > env->end - s)
n = env->end - s;
m = rex->lo;
if (m > n)
return NONE;
if ((c = rex->explicit) >= 0 && !mbwide())
for (i = 0; i < n; i++)
if (s[i] == c)
{
n = i;
break;
}
r = NONE;
if (!(rex->flags & REG_MINIMAL))
{
if (!mbwide())
{
for (s += n; n-- >= m; s--)
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
return BEST;
case GOOD:
r = GOOD;
break;
}
}
else
{
if (!(b = (unsigned char*)stkpush(stkstd, n)))
{
env->error = REG_ESPACE;
return BAD;
}
e = env->end;
for (i = 0; s < e && i < n && *s != c; i++)
s += b[i] = MBSIZE(s);
for (; i-- >= m; s -= b[i])
switch (follow(env, rex, cont, s))
{
case BAD:
stkpop(stkstd);
return BAD;
case CUT:
stkpop(stkstd);
return CUT;
case BEST:
stkpop(stkstd);
return BEST;
case GOOD:
r = GOOD;
break;
}
stkpop(stkstd);
}
}
else
{
if (!mbwide())
{
e = s + n;
for (s += m; s <= e; s++)
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
}
else
{
e = env->end;
for (i = 0; s < e && i < m && *s != c; i++)
s += MBSIZE(s);
if (i >= m)
for (; s <= e && i <= n; s += MBSIZE(s), i++)
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
}
}
return r;
case REX_END:
if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
return NONE;
break;
case REX_GROUP:
DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
if (env->stack)
{
if (rex->re.group.number)
env->match[rex->re.group.number].rm_so = s - env->beg;
if (pospush(env, rex, s, BEG_SUB))
return BAD;
catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
}
catcher.type = REX_GROUP_CATCH;
catcher.serial = rex->serial;
catcher.re.group_catch.cont = cont;
catcher.next = rex->next;
r = parse(env, rex->re.group.expr.rex, &catcher, s);
if (env->stack)
{
pospop(env);
if (rex->re.group.number)
env->match[rex->re.group.number].rm_so = -1;
}
return r;
case REX_GROUP_CATCH:
DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
if (env->stack)
{
if (rex->re.group_catch.eo)
*rex->re.group_catch.eo = s - env->beg;
if (pospush(env, rex, s, END_ANY))
return BAD;
}
r = follow(env, rex, rex->re.group_catch.cont, s);
if (env->stack)
{
pospop(env);
if (rex->re.group_catch.eo)
*rex->re.group_catch.eo = -1;
}
return r;
case REX_GROUP_AHEAD:
catcher.type = REX_GROUP_AHEAD_CATCH;
catcher.flags = rex->flags;
catcher.serial = rex->serial;
catcher.re.rep_catch.beg = s;
catcher.re.rep_catch.cont = cont;
catcher.next = rex->next;
return parse(env, rex->re.group.expr.rex, &catcher, s);
case REX_GROUP_AHEAD_CATCH:
return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
case REX_GROUP_AHEAD_NOT:
r = parse(env, rex->re.group.expr.rex, NiL, s);
if (r == NONE)
r = follow(env, rex, cont, s);
else if (r != BAD)
r = NONE;
return r;
case REX_GROUP_BEHIND:
if ((s - env->beg) < rex->re.group.size)
return NONE;
catcher.type = REX_GROUP_BEHIND_CATCH;
catcher.flags = rex->flags;
catcher.serial = rex->serial;
catcher.re.behind_catch.beg = s;
catcher.re.behind_catch.end = e = env->end;
catcher.re.behind_catch.cont = cont;
catcher.next = rex->next;
for (t = s - rex->re.group.size; t >= env->beg; t--)
{
env->end = s;
r = parse(env, rex->re.group.expr.rex, &catcher, t);
env->end = e;
if (r != NONE)
return r;
}
return NONE;
case REX_GROUP_BEHIND_CATCH:
if (s != rex->re.behind_catch.beg)
return NONE;
env->end = rex->re.behind_catch.end;
return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
case REX_GROUP_BEHIND_NOT:
if ((s - env->beg) < rex->re.group.size)
r = NONE;
else
{
catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
catcher.re.neg_catch.beg = s;
catcher.next = 0;
e = env->end;
env->end = s;
for (t = s - rex->re.group.size; t >= env->beg; t--)
{
r = parse(env, rex->re.group.expr.rex, &catcher, t);
if (r != NONE)
break;
}
env->end = e;
}
if (r == NONE)
r = follow(env, rex, cont, s);
else if (r != BAD)
r = NONE;
return r;
case REX_GROUP_BEHIND_NOT_CATCH:
return s == rex->re.neg_catch.beg ? GOOD : NONE;
case REX_GROUP_COND:
if (q = rex->re.group.expr.binary.right)
{
catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
}
else
catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
if (q = rex->re.group.expr.binary.left)
{
catcher.type = REX_GROUP_COND_CATCH;
catcher.flags = rex->flags;
catcher.serial = rex->serial;
catcher.re.cond_catch.yes = 0;
catcher.re.cond_catch.beg = s;
catcher.re.cond_catch.cont = cont;
catcher.next = rex->next;
r = parse(env, q, &catcher, s);
if (r == BAD || catcher.re.cond_catch.yes)
return r;
}
else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
r = GOOD;
else
r = NONE;
if (q = catcher.re.cond_catch.next[r != NONE])
{
catcher.type = REX_CAT;
catcher.flags = q->flags;
catcher.serial = q->serial;
catcher.re.group_catch.cont = cont;
catcher.next = rex->next;
return parse(env, q, &catcher, s);
}
return follow(env, rex, cont, s);
case REX_GROUP_COND_CATCH:
rex->re.cond_catch.yes = 1;
catcher.type = REX_CAT;
catcher.flags = rex->flags;
catcher.serial = rex->serial;
catcher.re.group_catch.cont = rex->re.cond_catch.cont;
catcher.next = rex->next;
return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
case REX_CAT:
return follow(env, rex, rex->re.group_catch.cont, s);
case REX_GROUP_CUT:
catcher.type = REX_GROUP_CUT_CATCH;
catcher.flags = rex->flags;
catcher.serial = rex->serial;
catcher.re.group_catch.cont = cont;
catcher.next = rex->next;
return parse(env, rex->re.group.expr.rex, &catcher, s);
case REX_GROUP_CUT_CATCH:
switch (r = follow(env, rex, rex->re.group_catch.cont, s))
{
case GOOD:
r = BEST;
break;
case NONE:
r = CUT;
break;
}
return r;
case REX_KMP:
f = rex->re.string.fail;
b = rex->re.string.base;
n = rex->re.string.size;
t = s;
e = env->end;
if (p = rex->map)
{
while (t + n <= e)
{
for (i = -1; t < e; t++)
{
while (i >= 0 && b[i+1] != p[*t])
i = f[i];
if (b[i+1] == p[*t])
i++;
if (i + 1 == n)
{
t++;
if (env->stack)
env->best[0].rm_so = t - s - n;
switch (follow(env, rex, cont, t))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
t -= n - 1;
break;
}
}
}
}
else
{
while (t + n <= e)
{
for (i = -1; t < e; t++)
{
while (i >= 0 && b[i+1] != *t)
i = f[i];
if (b[i+1] == *t)
i++;
if (i + 1 == n)
{
t++;
if (env->stack)
env->best[0].rm_so = t - s - n;
switch (follow(env, rex, cont, t))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
t -= n - 1;
break;
}
}
}
}
return NONE;
case REX_NEG:
if (LEADING(env, rex, s))
return NONE;
i = env->end - s;
n = ((i + 7) >> 3) + 1;
catcher.type = REX_NEG_CATCH;
catcher.re.neg_catch.beg = s;
if (!(p = (unsigned char*)stkpush(stkstd, n)))
return BAD;
memset(catcher.re.neg_catch.index = p, 0, n);
catcher.next = rex->next;
if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
r = BAD;
else
{
r = NONE;
for (; i >= 0; i--)
if (!bittst(p, i))
{
switch (follow(env, rex, cont, s + i))
{
case BAD:
r = BAD;
break;
case BEST:
r = BEST;
break;
case CUT:
r = CUT;
break;
case GOOD:
r = GOOD;
/*FALLTHROUGH*/
default:
continue;
}
break;
}
}
stkpop(stkstd);
return r;
case REX_NEG_CATCH:
bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
return NONE;
case REX_NEST:
if (s >= env->end)
return NONE;
do
{
if ((c = *s++) == rex->re.nest.primary)
{
if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
return NONE;
break;
}
if (rex->re.nest.primary >= 0)
return NONE;
if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
break;
if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
return NONE;
} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
break;
case REX_NULL:
break;
case REX_ONECHAR:
n = rex->hi;
if (n > env->end - s)
n = env->end - s;
m = rex->lo;
if (m > n)
return NONE;
r = NONE;
c = rex->re.onechar;
if (!(rex->flags & REG_MINIMAL))
{
if (!mbwide())
{
if (p = rex->map)
{
for (i = 0; i < n; i++, s++)
if (p[*s] != c)
break;
}
else
{
for (i = 0; i < n; i++, s++)
if (*s != c)
break;
}
for (; i-- >= m; s--)
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case BEST:
return BEST;
case CUT:
return CUT;
case GOOD:
r = GOOD;
break;
}
}
else
{
if (!(b = (unsigned char*)stkpush(stkstd, n)))
{
env->error = REG_ESPACE;
return BAD;
}
e = env->end;
if (!(rex->flags & REG_ICASE))
{
for (i = 0; s < e && i < n; i++, s = t)
{
t = s;
if (mbchar(t) != c)
break;
b[i] = t - s;
}
}
else
{
for (i = 0; s < e && i < n; i++, s = t)
{
t = s;
if (towupper(mbchar(t)) != c)
break;
b[i] = t - s;
}
}
for (; i-- >= m; s -= b[i])
switch (follow(env, rex, cont, s))
{
case BAD:
stkpop(stkstd);
return BAD;
case BEST:
stkpop(stkstd);
return BEST;
case CUT:
stkpop(stkstd);
return CUT;
case GOOD:
r = GOOD;
break;
}
stkpop(stkstd);
}
}
else
{
if (!mbwide())
{
e = s + m;
if (p = rex->map)
{
for (; s < e; s++)
if (p[*s] != c)
return r;
e += n - m;
for (;;)
{
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (s >= e || p[*s++] != c)
break;
}
}
else
{
for (; s < e; s++)
if (*s != c)
return r;
e += n - m;
for (;;)
{
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (s >= e || *s++ != c)
break;
}
}
}
else
{
if (!(rex->flags & REG_ICASE))
{
for (i = 0; i < m && s < e; i++, s = t)
{
t = s;
if (mbchar(t) != c)
return r;
}
while (i++ <= n)
{
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (s >= e || mbchar(s) != c)
break;
}
}
else
{
for (i = 0; i < m && s < e; i++, s = t)
{
t = s;
if (towupper(mbchar(t)) != c)
return r;
}
while (i++ <= n)
{
switch (follow(env, rex, cont, s))
{
case BAD:
return BAD;
case CUT:
return CUT;
case BEST:
case GOOD:
return BEST;
}
if (s >= e || towupper(mbchar(s)) != c)
break;
}
}
}
}
return r;
case REX_REP:
if (env->stack && pospush(env, rex, s, BEG_REP))
return BAD;
r = parserep(env, rex, cont, s, 0);
if (env->stack)
pospop(env);
return r;
case REX_REP_CATCH:
DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
if (env->stack && pospush(env, rex, s, END_ANY))
return BAD;
if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
{
/*
* optional empty iteration
*/
DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
r = NONE;
else if (pospush(env, rex, s, END_ANY))
r = BAD;
else
{
r = follow(env, rex, rex->re.rep_catch.cont, s);
pospop(env);
}
}
else
r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
if (env->stack)
pospop(env);
return r;
case REX_STRING:
DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
if (rex->re.string.size > (env->end - s))
return NONE;
t = rex->re.string.base;
e = t + rex->re.string.size;
if (!(p = rex->map))
{
while (t < e)
if (*s++ != *t++)
return NONE;
}
else if (!mbwide())
{
while (t < e)
if (p[*s++] != *t++)
return NONE;
}
else
{
while (t < e)
{
c = mbchar(s);
d = mbchar(t);
if (towupper(c) != d)
return NONE;
}
}
break;
case REX_TRIE:
if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
return NONE;
return parsetrie(env, x, rex, cont, s);
case REX_EXEC:
u = 0;
r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
e = (unsigned char*)u;
if (e >= s && e <= env->end)
s = e;
switch (r)
{
case 0:
break;
case REG_NOMATCH:
return NONE;
default:
env->error = r;
return BAD;
}
break;
case REX_WBEG:
if (!isword(*s) || s > env->beg && isword(*(s - 1)))
return NONE;
break;
case REX_WEND:
if (isword(*s) || s > env->beg && !isword(*(s - 1)))
return NONE;
break;
case REX_WORD:
if (s > env->beg && isword(*(s - 1)) == isword(*s))
return NONE;
break;
case REX_WORD_NOT:
if (s == env->beg || isword(*(s - 1)) != isword(*s))
return NONE;
break;
case REX_BEG_STR:
if (s != env->beg)
return NONE;
break;
case REX_END_STR:
for (t = s; t < env->end && *t == '\n'; t++);
if (t < env->end)
return NONE;
break;
case REX_FIN_STR:
if (s < env->end)
return NONE;
break;
}
if (!(rex = rex->next))
{
if (!(rex = cont))
break;
cont = 0;
}
}
return GOOD;
}
#if _AST_REGEX_DEBUG
static void
listnode(Rex_t* e, int level)
{
int i;
if (e)
{
do
{
for (i = 0; i < level; i++)
sfprintf(sfstderr, " ");
sfprintf(sfstderr, "%s\n", rexname(e));
switch (e->type)
{
case REX_ALT:
case REX_CONJ:
listnode(e->re.group.expr.binary.left, level + 1);
listnode(e->re.group.expr.binary.right, level + 1);
break;
case REX_GROUP:
case REX_GROUP_AHEAD:
case REX_GROUP_AHEAD_NOT:
case REX_GROUP_BEHIND:
case REX_GROUP_BEHIND_NOT:
case REX_GROUP_CUT:
case REX_NEG:
case REX_REP:
listnode(e->re.group.expr.rex, level + 1);
break;
}
} while (e = e->next);
}
}
static int
list(Env_t* env, Rex_t* rex)
{
sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
if (rex)
listnode(rex, 1);
return 0;
}
#endif
/*
* returning REG_BADPAT or REG_ESPACE is not explicitly
* countenanced by the standard
*/
int
regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
{
register int n;
register int i;
int j;
int k;
int m;
int advance;
Env_t* env;
Rex_t* e;
DEBUG_INIT();
DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
if (!p || !(env = p->env))
return REG_BADPAT;
if (!s)
return fatal(env->disc, REG_BADPAT, NiL);
if (len < env->min)
{
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
return REG_NOMATCH;
}
env->regex = p;
env->beg = (unsigned char*)s;
env->end = env->beg + len;
stknew(stkstd, &env->stk);
env->flags &= ~REG_EXEC;
env->flags |= (flags & REG_EXEC);
advance = 0;
if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
{
n = env->nsub;
if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
!env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
!env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
{
k = REG_ESPACE;
goto done;
}
env->pos->cur = env->bestpos->cur = 0;
env->best = &env->match[n + 1];
env->best[0].rm_so = 0;
env->best[0].rm_eo = -1;
for (i = 0; i <= n; i++)
env->match[i] = state.nomatch;
if (flags & REG_ADVANCE)
advance = 1;
}
DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
k = REG_NOMATCH;
if ((e = env->rex)->type == REX_BM)
{
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
if (len < e->re.bm.right)
{
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
goto done;
}
else if (!(flags & REG_LEFT))
{
register unsigned char* buf = (unsigned char*)s;
register size_t index = e->re.bm.left + e->re.bm.size;
register size_t mid = len - e->re.bm.right;
register size_t* skip = e->re.bm.skip;
register size_t* fail = e->re.bm.fail;
register Bm_mask_t** mask = e->re.bm.mask;
Bm_mask_t m;
size_t x;
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
for (;;)
{
while ((index += skip[buf[index]]) < mid);
if (index < HIT)
{
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
goto done;
}
index -= HIT;
m = mask[n = e->re.bm.size - 1][buf[index]];
do
{
if (!n--)
{
if (e->re.bm.back < 0)
goto possible;
if (advance)
{
i = index - e->re.bm.back;
s += i;
if (env->stack)
env->best[0].rm_so += i;
goto possible;
}
x = index;
if (index < e->re.bm.back)
index = 0;
else
index -= e->re.bm.back;
while (index <= x)
{
if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
{
if (env->stack)
env->best[0].rm_so = index;
n = env->nsub;
goto hit;
}
index++;
}
index += e->re.bm.size;
break;
}
} while (m &= mask[n][buf[--index]]);
if ((index += fail[n + 1]) >= len)
goto done;
}
}
possible:
n = env->nsub;
e = e->next;
}
j = env->once || (flags & REG_LEFT);
DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
{
if (j)
goto done;
i = MBSIZE(s);
s += i;
if ((unsigned char*)s > env->end - env->min)
goto done;
if (env->stack)
env->best[0].rm_so += i;
}
if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
goto done;
hit:
if (k = env->error)
goto done;
if (i == CUT)
{
k = env->error = REG_NOMATCH;
goto done;
}
if (!(env->flags & REG_NOSUB))
{
k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
for (i = j = m = 0; j < nmatch; i++)
if (!i || !k || (i & 1))
{
if (i > n)
match[j] = state.nomatch;
else
match[m = j] = env->best[i];
j++;
}
if (k)
{
while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
m--;
((regex_t*)p)->re_nsub = m;
}
}
k = 0;
done:
stkold(stkstd, &env->stk);
env->stk.base = 0;
if (k > REG_NOMATCH)
fatal(p->env->disc, k, NiL);
return k;
}
void
regfree(regex_t* p)
{
Env_t* env;
if (p && (env = p->env))
{
#if _REG_subcomp
if (env->sub)
{
regsubfree(p);
p->re_sub = 0;
}
#endif
p->env = 0;
if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
{
drop(env->disc, env->rex);
if (env->pos)
vecclose(env->pos);
if (env->bestpos)
vecclose(env->bestpos);
if (env->stk.base)
stkold(stkstd, &env->stk);
alloc(env->disc, env, 0);
}
}
}