/*
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright 2012 Milan Jurik. All rights reserved.
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* The matching engine and friends. This file is #included by regexec.c
* after suitable #defines of a variety of macros used herein, so that
* different state representations can be used without duplicating masses
* of code.
*/
#ifdef SNAMES
#endif
#ifdef LNAMES
#endif
#ifdef MNAMES
#endif
/* another structure passed up and down to avoid zillions of parameters */
struct match {
struct re_guts *g;
int eflags;
};
/* ========= begin header generated by ./mkh ========= */
#ifdef __cplusplus
extern "C" {
#endif
/* === engine.c === */
sopno);
sopno);
#ifdef REDEBUG
#endif
#ifdef REDEBUG
#endif
#ifdef REDEBUG
#endif
#ifdef __cplusplus
}
#endif
/* ========= end header generated by ./mkh ========= */
#ifdef REDEBUG
#else
#endif
/*
* matcher - the actual matching engine
*/
static int /* 0 success, REG_NOMATCH failure */
const char *string,
regmatch_t pmatch[],
int eflags)
{
const char *endp;
int i;
const char *dp;
const char *start;
const char *stop;
/* Boyer-Moore algorithms variables */
const char *pp;
const char *mustfirst;
const char *mustlast;
int *matchjump;
int *charjump;
/* simplify the situation where possible */
nmatch = 0;
if (eflags®_STARTEND) {
} else {
}
return (REG_EFATAL);
/* prescreening; this does wonders for this rather slow code */
/* Fast skip non-matches */
break;
/* Greedy matcher */
/*
* We depend on not being used for
* for strings of length 1
*/
;
break;
/* Jump to next possible match */
}
return (REG_NOMATCH);
} else {
break;
return (REG_NOMATCH);
}
}
/* match struct setup */
m->g = g;
STATESETUP(m, 4);
/* Adjust start according to moffset, to speed things up */
if (g->moffset > -1)
/* this loop does only one repetition except for backrefs */
for (;;) {
STATETEARDOWN(m);
return (REG_NOMATCH);
}
break; /* no further info needed */
/* where? */
for (;;) {
NOTE("finding start");
break;
}
break; /* no further info needed */
/* oh my, he wants the subexpressions... */
sizeof (regmatch_t));
STATETEARDOWN(m);
return (REG_ESPACE);
}
for (i = 1; i <= m->g->nsub; i++)
/* NB: FreeBSD has REG_BACKR, we do not */
if (!g->backrefs /* && !(m->eflags®_BACKR) */) {
NOTE("dissecting");
} else {
sizeof (const char *));
STATETEARDOWN(m);
return (REG_ESPACE);
}
NOTE("backref dissect");
}
break;
/* uh-oh... we couldn't find a subexpression-level match */
for (;;) {
break; /* defeat */
NOTE("backoff");
break; /* defeat */
/* try it on a shorter possibility */
#ifndef NDEBUG
for (i = 1; i <= m->g->nsub; i++) {
}
#endif
NOTE("backoff dissect");
}
break;
/* despite initial appearances, there is no match here */
NOTE("false alarm");
/* recycle starting later */
}
/* fill in the details if requested */
if (nmatch > 0) {
}
if (nmatch > 1) {
for (i = 1; i < nmatch; i++)
if (i <= m->g->nsub)
else {
}
}
STATETEARDOWN(m);
return (0);
}
/*
* dissect - figure out what matched what, no back references
*/
static const char *
{
int i;
const char *dp;
/* identify end of subRE */
case OPLUS_:
case OQUEST_:
break;
case OCH_:
break;
}
es++;
/* figure out what it matched */
case OEND:
assert(0);
break;
case OCHAR:
break;
case OBOL:
case OEOL:
case OBOW:
case OEOW:
break;
case OANY:
case OANYOF:
break;
case OBACK_:
case O_BACK:
assert(0);
break;
/* cases where length of match is hard to find */
case OQUEST_:
for (;;) {
/* how long could this one be? */
/* could the rest match the rest? */
break; /* yes! */
/* no -- try a shorter match for this one */
}
/* did innards match? */
#if defined(__lint)
(void) dp;
#endif
} else /* no */
break;
case OPLUS_:
for (;;) {
/* how long could this one be? */
/* could the rest match the rest? */
break; /* yes! */
/* no -- try a shorter match for this one */
}
for (;;) { /* find last match of innards */
break; /* failed or matched null */
}
/* last successful match */
}
break;
case OCH_:
for (;;) {
/* how long could this one be? */
/* could the rest match the rest? */
break; /* yes! */
/* no -- try a shorter match for this one */
}
for (;;) { /* find first matching branch */
break; /* it matched all of it */
/* that one missed, try next one */
esub++;
esub--;
else
}
break;
case O_PLUS:
case O_QUEST:
case OOR1:
case OOR2:
case O_CH:
assert(0);
break;
case OLPAREN:
break;
case ORPAREN:
break;
default: /* uh oh */
assert(0);
break;
}
}
return (sp);
}
/*
* backref - figure out what matched what, figuring in back references
*/
static const char *
int rec)
{
int i;
const char *dp;
int hard;
sop s;
/* get as far as we can with easy stuff */
hard = 0;
case OCHAR:
return (NULL);
return (NULL);
break;
case OANY:
return (NULL);
return (NULL);
break;
case OANYOF:
return (NULL);
return (NULL);
break;
case OBOL:
(m->g->cflags®_NEWLINE))) {
break;
}
return (NULL);
case OEOL:
(m->g->cflags®_NEWLINE))) {
break;
}
return (NULL);
case OBOW:
(m->g->cflags®_NEWLINE)) ||
break;
}
return (NULL);
case OEOW:
(m->g->cflags®_NEWLINE)) ||
break;
}
return (NULL);
case O_QUEST:
break;
case OOR1: /* matches null but needs to skip */
ss++;
do {
/* note that the ss++ gets us past the O_CH */
break;
default: /* have to make a choice */
hard = 1;
break;
}
if (!hard) { /* that was it! */
return (NULL);
return (sp);
}
ss--; /* adjust for the for's final increment */
/* the hard stuff */
switch (OP(s)) {
case OBACK_: /* the vilest depths */
i = OPND(s);
return (NULL);
return (NULL);
return (NULL); /* not enough left to match */
return (NULL);
ss++;
case OQUEST_: /* to null or not */
return (dp); /* not */
case OPLUS_:
case O_PLUS:
/* try another pass */
return (dp);
case OCH_: /* find the right one, if any */
for (;;) { /* find first matching branch */
return (dp);
/* that one missed, try next one */
return (NULL); /* there is none */
esub++;
esub--;
else
}
/* NOTREACHED */
break;
case OLPAREN: /* must undo assignment if rest fails */
i = OPND(s);
return (dp);
return (NULL);
case ORPAREN: /* must undo assignment if rest fails */
i = OPND(s);
return (dp);
return (NULL);
default: /* uh oh */
assert(0);
break;
}
/* "can't happen" */
assert(0);
return (NULL);
}
/*
* fast - step through the string at top speed
*/
static const char *
{
const char *p = start;
wint_t c;
int i;
c = OUT;
else {
/*
* XXX Wrong if the previous character was multi-byte.
* Newline never is (in encodings supported by FreeBSD),
* so this only breaks the ISWORD tests below.
*/
}
for (;;) {
/* next character */
lastc = c;
if (p == m->endp) {
clen = 0;
c = OUT;
} else
coldp = p;
flagch = '\0';
i = 0;
i = m->g->nbol;
}
i += m->g->neol;
}
if (i != 0) {
for (; i > 0; i--)
}
/* how about a word boundary? */
}
}
}
/* are we done? */
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
p += clen;
}
else
return (NULL);
}
/*
* slow - step through the string more deliberately
*/
static const char *
{
const char *p = start;
wint_t c;
int i;
c = OUT;
else {
/*
* XXX Wrong if the previous character was multi-byte.
* Newline never is (in encodings supported by FreeBSD),
* so this only breaks the ISWORD tests below.
*/
}
for (;;) {
/* next character */
lastc = c;
if (p == m->endp) {
c = OUT;
clen = 0;
} else
flagch = '\0';
i = 0;
i = m->g->nbol;
}
i += m->g->neol;
}
if (i != 0) {
for (; i > 0; i--)
}
/* how about a word boundary? */
}
}
}
/* are we done? */
matchp = p;
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
p += clen;
}
return (matchp);
}
/*
* step - map set of states reachable before char to set reachable after
*/
static states
{
sop s;
int i;
switch (OP(s)) {
case OEND:
break;
case OCHAR:
/* only characters can match */
break;
case OBOL:
break;
case OEOL:
break;
case OBOW:
break;
case OEOW:
break;
case OANY:
break;
case OANYOF:
break;
case OBACK_: /* ignored here */
case O_BACK:
break;
case OPLUS_: /* forward, this is just an empty */
break;
case O_PLUS: /* both forward and back */
/* oho, must reconsider loop body */
}
break;
case OQUEST_: /* two branches, both forward */
break;
case O_QUEST: /* just an empty */
break;
case OLPAREN: /* not significant here */
case ORPAREN:
break;
case OCH_: /* mark the first two branches */
break;
case OOR1: /* done a branch, find the O_CH */
for (look = 1;
}
break;
case OOR2: /* propagate OCH_'s marking */
}
break;
case O_CH: /* just empty */
break;
default: /* ooooops... */
assert(0);
break;
}
}
return (aft);
}
#ifdef REDEBUG
/*
* print - print a set of states
*/
static void
{
struct re_guts *g = m->g;
int i;
return;
if (ch != '\0')
for (i = 0; i < g->nstates; i++)
first = 0;
}
(void) fprintf(d, "\n");
}
/*
* at - print current situation
*/
static void
{
return;
}
#ifndef PCHARDONE
/*
* pchar - make a character printable
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
* duplicate here avoids having a debugging-capable regexec.o tied to
* a matching debug.o, and this is convenient. It all disappears in
* the non-debug compilation anyway, so it doesn't matter much.
*/
static const char *
{
else
return (pbuf);
}
#endif
#endif