port/locale/engine.c

	engine.c revision 84441f85b19f6b8080883f30109e58e43c893709
1N/A/*
1N/A * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
1N/A * Copyright (c) 1992, 1993, 1994 Henry Spencer.
1N/A * Copyright (c) 1992, 1993, 1994
1N/A *  The Regents of the University of California.  All rights reserved.
1N/A *
1N/A * This code is derived from software contributed to Berkeley by
1N/A * Henry Spencer.
1N/A *
1N/A * Redistribution and use in source and binary forms, with or without
1N/A * modification, are permitted provided that the following conditions
1N/A * are met:
1N/A * 1. Redistributions of source code must retain the above copyright
1N/A *    notice, this list of conditions and the following disclaimer.
1N/A * 2. Redistributions in binary form must reproduce the above copyright
1N/A *    notice, this list of conditions and the following disclaimer in the
1N/A *    documentation and/or other materials provided with the distribution.
1N/A * 4. Neither the name of the University nor the names of its contributors
1N/A *    may be used to endorse or promote products derived from this software
1N/A *    without specific prior written permission.
1N/A *
1N/A * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1N/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1N/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1N/A * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1N/A * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1N/A * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1N/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1N/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1N/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1N/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1N/A * SUCH DAMAGE.
1N/A */
1N/A
1N/A/*
1N/A * The matching engine and friends.  This file is #included by regexec.c
1N/A * after suitable #defines of a variety of macros used herein, so that
1N/A * different state representations can be used without duplicating masses
1N/A * of code.
1N/A */
1N/A
1N/A#ifdef SNAMES
1N/A#define matcher smatcher
1N/A#define fast    sfast
1N/A#define slow    sslow
1N/A#define dissect sdissect
1N/A#define backref sbackref
1N/A#define step    sstep
1N/A#define print   sprint
1N/A#define at  sat
1N/A#define match   smat
1N/A#endif
1N/A#ifdef LNAMES
1N/A#define matcher lmatcher
1N/A#define fast    lfast
1N/A#define slow    lslow
1N/A#define dissect ldissect
1N/A#define backref lbackref
1N/A#define step    lstep
1N/A#define print   lprint
1N/A#define at  lat
1N/A#define match   lmat
1N/A#endif
1N/A#ifdef MNAMES
1N/A#define matcher mmatcher
1N/A#define fast    mfast
1N/A#define slow    mslow
1N/A#define dissect mdissect
1N/A#define backref mbackref
1N/A#define step    mstep
1N/A#define print   mprint
1N/A#define at  mat
1N/A#define match   mmat
1N/A#endif
1N/A
1N/A/* another structure passed up and down to avoid zillions of parameters */
1N/Astruct match {
1N/A    struct re_guts *g;
1N/A    int eflags;
1N/A    regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
1N/A    const char *offp;   /* offsets work from here */
1N/A    const char *beginp; /* start of string -- virtual NUL precedes */
1N/A    const char *endp;   /* end of string -- virtual NUL here */
1N/A    const char *coldp;  /* can be no match starting before here */
1N/A    const char **lastpos;   /* [nplus+1] */
1N/A    STATEVARS;
1N/A    states st;      /* current states */
1N/A    states fresh;       /* states for a fresh start */
1N/A    states tmp;     /* temporary */
1N/A    states empty;       /* empty set of states */
1N/A    mbstate_t mbs;      /* multibyte conversion state */
1N/A};
1N/A
1N/A/* ========= begin header generated by ./mkh ========= */
1N/A#ifdef __cplusplus
1N/Aextern "C" {
1N/A#endif
1N/A
1N/A/* === engine.c === */
1N/Astatic int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int);
1N/Astatic const char *dissect(struct match *, const char *, const char *,
1N/A    sopno, sopno);
1N/Astatic const char *backref(struct match *, const char *, const char *, sopno,
1N/A    sopno, sopno, int);
1N/Astatic const char *fast(struct match *, const char *, const char *, sopno,
1N/A    sopno);
1N/Astatic const char *slow(struct match *, const char *, const char *, sopno,
1N/A    sopno);
1N/Astatic states step(struct re_guts *, sopno, sopno, states, wint_t, states);
1N/A#define MAX_RECURSION   100
1N/A#define BOL (OUT-1)
1N/A#define EOL (BOL-1)
1N/A#define BOLEOL  (BOL-2)
1N/A#define NOTHING (BOL-3)
1N/A#define BOW (BOL-4)
1N/A#define EOW (BOL-5)
1N/A#define BADCHAR (BOL-6)
1N/A#define NONCHAR(c)  ((c) <= OUT)
1N/A#ifdef REDEBUG
1N/Astatic void print(struct match *, const char *, states, int, FILE *);
1N/A#endif
1N/A#ifdef REDEBUG
1N/Astatic void at(struct match *, const char *, const char *, const char *,
1N/A    sopno, sopno);
1N/A#endif
1N/A#ifdef REDEBUG
1N/Astatic const char *pchar(int ch);
1N/A#endif
1N/A
1N/A#ifdef __cplusplus
1N/A}
1N/A#endif
1N/A/* ========= end header generated by ./mkh ========= */
1N/A
1N/A#ifdef REDEBUG
1N/A#define SP(t, s, c) print(m, t, s, c, stdout)
1N/A#define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
1N/A#define NOTE(str)   { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
1N/A#else
1N/A#define SP(t, s, c) /* nothing */
1N/A#define AT(t, p1, p2, s1, s2)   /* nothing */
1N/A#define NOTE(s) /* nothing */
1N/A#endif
1N/A
1N/A/*
1N/A * matcher - the actual matching engine
1N/A */
1N/Astatic int          /* 0 success, REG_NOMATCH failure */
1N/Amatcher(struct re_guts *g,
1N/A    const char *string,
1N/A    size_t nmatch,
1N/A    regmatch_t pmatch[],
1N/A    int eflags)
1N/A{
1N/A    const char *endp;
1N/A    int i;
1N/A    struct match mv;
1N/A    struct match *m = &mv;
1N/A    const char *dp;
1N/A    const sopno gf = g->firststate+1;   /* +1 for OEND */
1N/A    const sopno gl = g->laststate;
1N/A    const char *start;
1N/A    const char *stop;
1N/A    /* Boyer-Moore algorithms variables */
1N/A    const char *pp;
1N/A    int cj, mj;
1N/A    const char *mustfirst;
1N/A    const char *mustlast;
1N/A    int *matchjump;
1N/A    int *charjump;
1N/A
1N/A    /* simplify the situation where possible */
1N/A    if (g->cflags&REG_NOSUB)
1N/A        nmatch = 0;
1N/A
1N/A    if (eflags&REG_STARTEND) {
1N/A        start = string + pmatch[0].rm_so;
1N/A        stop = string + pmatch[0].rm_eo;
1N/A    } else {
1N/A        start = string;
1N/A        stop = start + strlen(start);
1N/A    }
1N/A
1N/A    if (stop < start)
1N/A        return (REG_EFATAL);
1N/A
1N/A    /* prescreening; this does wonders for this rather slow code */
1N/A    if (g->must != NULL) {
1N/A        if (g->charjump != NULL && g->matchjump != NULL) {
1N/A            mustfirst = g->must;
1N/A            mustlast = g->must + g->mlen - 1;
1N/A            charjump = g->charjump;
1N/A            matchjump = g->matchjump;
1N/A            pp = mustlast;
1N/A            for (dp = start+g->mlen-1; dp < stop; ) {
1N/A                /* Fast skip non-matches */
1N/A                while (dp < stop && charjump[(int)*dp])
1N/A                    dp += charjump[(int)*dp];
1N/A
1N/A                if (dp >= stop)
1N/A                    break;
1N/A
1N/A                /* Greedy matcher */
1N/A                /*
1N/A                 * We depend on not being used for
1N/A                 * for strings of length 1
1N/A                 */
1N/A                while (*--dp == *--pp && pp != mustfirst)
1N/A                    ;
1N/A
1N/A                if (*dp == *pp)
1N/A                    break;
1N/A
1N/A                /* Jump to next possible match */
1N/A                mj = matchjump[pp - mustfirst];
1N/A                cj = charjump[(int)*dp];
1N/A                dp += (cj < mj ? mj : cj);
1N/A                pp = mustlast;
1N/A            }
1N/A            if (pp != mustfirst)
1N/A                return (REG_NOMATCH);
1N/A        } else {
1N/A            for (dp = start; dp < stop; dp++)
1N/A                if (*dp == g->must[0] &&
1N/A                    stop - dp >= g->mlen &&
1N/A                    memcmp(dp, g->must, (size_t)g->mlen) == 0)
1N/A                    break;
1N/A            if (dp == stop)     /* we didn't find g->must */
1N/A                return (REG_NOMATCH);
1N/A        }
1N/A    }
1N/A
1N/A    /* match struct setup */
1N/A    m->g = g;
1N/A    m->eflags = eflags;
1N/A    m->pmatch = NULL;
1N/A    m->lastpos = NULL;
1N/A    m->offp = string;
1N/A    m->beginp = start;
1N/A    m->endp = stop;
1N/A    STATESETUP(m, 4);
1N/A    SETUP(m->st);
1N/A    SETUP(m->fresh);
1N/A    SETUP(m->tmp);
1N/A    SETUP(m->empty);
1N/A    CLEAR(m->empty);
1N/A    ZAPSTATE(&m->mbs);
1N/A
1N/A    /* Adjust start according to moffset, to speed things up */
1N/A    if (g->moffset > -1)
1N/A        start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
1N/A
1N/A    SP("mloop", m->st, *start);
1N/A
1N/A    /* this loop does only one repetition except for backrefs */
1N/A    for (;;) {
1N/A        endp = fast(m, start, stop, gf, gl);
1N/A        if (endp == NULL) {     /* a miss */
1N/A            if (m->pmatch != NULL)
1N/A                free((char *)m->pmatch);
1N/A            if (m->lastpos != NULL)
1N/A                free((char *)m->lastpos);
1N/A            STATETEARDOWN(m);
1N/A            return (REG_NOMATCH);
1N/A        }
1N/A        if (nmatch == 0 && !g->backrefs)
1N/A            break;      /* no further info needed */
1N/A
1N/A        /* where? */
1N/A        assert(m->coldp != NULL);
1N/A        for (;;) {
1N/A            NOTE("finding start");
1N/A            endp = slow(m, m->coldp, stop, gf, gl);
1N/A            if (endp != NULL)
1N/A                break;
1N/A            assert(m->coldp < m->endp);
1N/A            m->coldp += XMBRTOWC(NULL, m->coldp,
1N/A                m->endp - m->coldp, &m->mbs, 0);
1N/A        }
1N/A        if (nmatch == 1 && !g->backrefs)
1N/A            break;      /* no further info needed */
1N/A
1N/A        /* oh my, he wants the subexpressions... */
1N/A        if (m->pmatch == NULL)
1N/A            m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
1N/A                sizeof (regmatch_t));
1N/A        if (m->pmatch == NULL) {
1N/A            STATETEARDOWN(m);
1N/A            return (REG_ESPACE);
1N/A        }
1N/A        for (i = 1; i <= m->g->nsub; i++)
1N/A            m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
1N/A        /* NB: FreeBSD has REG_BACKR, we do not */
1N/A        if (!g->backrefs /* && !(m->eflags&REG_BACKR) */) {
1N/A            NOTE("dissecting");
1N/A            dp = dissect(m, m->coldp, endp, gf, gl);
1N/A        } else {
1N/A            if (g->nplus > 0 && m->lastpos == NULL)
1N/A                m->lastpos = malloc((g->nplus+1) *
1N/A                    sizeof (const char *));
1N/A            if (g->nplus > 0 && m->lastpos == NULL) {
1N/A                free(m->pmatch);
1N/A                STATETEARDOWN(m);
1N/A                return (REG_ESPACE);
1N/A            }
1N/A            NOTE("backref dissect");
1N/A            dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
1N/A        }
1N/A        if (dp != NULL)
1N/A            break;
1N/A
1N/A        /* uh-oh... we couldn't find a subexpression-level match */
1N/A        assert(g->backrefs);    /* must be back references doing it */
1N/A        assert(g->nplus == 0 || m->lastpos != NULL);
1N/A        for (;;) {
1N/A            if (dp != NULL || endp <= m->coldp)
1N/A                break;      /* defeat */
1N/A            NOTE("backoff");
1N/A            endp = slow(m, m->coldp, endp-1, gf, gl);
1N/A            if (endp == NULL)
1N/A                break;      /* defeat */
1N/A            /* try it on a shorter possibility */
1N/A#ifndef NDEBUG
1N/A            for (i = 1; i <= m->g->nsub; i++) {
1N/A                assert(m->pmatch[i].rm_so == -1);
1N/A                assert(m->pmatch[i].rm_eo == -1);
1N/A            }
1N/A#endif
1N/A            NOTE("backoff dissect");
1N/A            dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
1N/A        }
1N/A        assert(dp == NULL || dp == endp);
1N/A        if (dp != NULL)     /* found a shorter one */
1N/A            break;
1N/A
1N/A        /* despite initial appearances, there is no match here */
1N/A        NOTE("false alarm");
1N/A        /* recycle starting later */
1N/A        start = m->coldp + XMBRTOWC(NULL, m->coldp,
1N/A            stop - m->coldp, &m->mbs, 0);
1N/A        assert(start <= stop);
1N/A    }
1N/A
1N/A    /* fill in the details if requested */
1N/A    if (nmatch > 0) {
1N/A        pmatch[0].rm_so = m->coldp - m->offp;
1N/A        pmatch[0].rm_eo = endp - m->offp;
1N/A    }
1N/A    if (nmatch > 1) {
1N/A        assert(m->pmatch != NULL);
1N/A        for (i = 1; i < nmatch; i++)
1N/A            if (i <= m->g->nsub)
1N/A                pmatch[i] = m->pmatch[i];
1N/A            else {
1N/A                pmatch[i].rm_so = -1;
1N/A                pmatch[i].rm_eo = -1;
1N/A            }
1N/A    }
1N/A
1N/A    if (m->pmatch != NULL)
1N/A        free((char *)m->pmatch);
1N/A    if (m->lastpos != NULL)
1N/A        free((char *)m->lastpos);
1N/A    STATETEARDOWN(m);
1N/A    return (0);
1N/A}
1N/A
1N/A/*
1N/A * dissect - figure out what matched what, no back references
1N/A */
1N/Astatic const char *
1N/Adissect(struct match *m, const char *start, const char *stop, sopno startst,
1N/A    sopno stopst)
1N/A{
1N/A    int i;
1N/A    sopno ss;       /* start sop of current subRE */
1N/A    sopno es;       /* end sop of current subRE */
1N/A    const char *sp;     /* start of string matched by it */
1N/A    const char *stp;    /* string matched by it cannot pass here */
1N/A    const char *rest;   /* start of rest of string */
1N/A    const char *tail;   /* string unmatched by rest of RE */
1N/A    sopno ssub;     /* start sop of subsubRE */
1N/A    sopno esub;     /* end sop of subsubRE */
1N/A    const char *ssp;    /* start of string matched by subsubRE */
1N/A    const char *sep;    /* end of string matched by subsubRE */
1N/A    const char *oldssp; /* previous ssp */
1N/A    const char *dp;
1N/A
1N/A    AT("diss", start, stop, startst, stopst);
1N/A    sp = start;
1N/A    for (ss = startst; ss < stopst; ss = es) {
1N/A        /* identify end of subRE */
1N/A        es = ss;
1N/A        switch (OP(m->g->strip[es])) {
1N/A        case OPLUS_:
1N/A        case OQUEST_:
1N/A            es += OPND(m->g->strip[es]);
1N/A            break;
1N/A        case OCH_:
1N/A            while (OP(m->g->strip[es]) != O_CH)
1N/A                es += OPND(m->g->strip[es]);
1N/A            break;
1N/A        }
1N/A        es++;
1N/A
1N/A        /* figure out what it matched */
1N/A        switch (OP(m->g->strip[ss])) {
1N/A        case OEND:
1N/A            assert(0);
1N/A            break;
1N/A        case OCHAR:
1N/A            sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
1N/A            break;
1N/A        case OBOL:
1N/A        case OEOL:
1N/A        case OBOW:
1N/A        case OEOW:
1N/A            break;
1N/A        case OANY:
1N/A        case OANYOF:
1N/A            sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
1N/A            break;
1N/A        case OBACK_:
1N/A        case O_BACK:
1N/A            assert(0);
1N/A            break;
1N/A        /* cases where length of match is hard to find */
1N/A        case OQUEST_:
1N/A            stp = stop;
1N/A            for (;;) {
1N/A                /* how long could this one be? */
1N/A                rest = slow(m, sp, stp, ss, es);
1N/A                assert(rest != NULL);   /* it did match */
1N/A                /* could the rest match the rest? */
1N/A                tail = slow(m, rest, stop, es, stopst);
1N/A                if (tail == stop)
1N/A                    break;      /* yes! */
1N/A                /* no -- try a shorter match for this one */
1N/A                stp = rest - 1;
1N/A                assert(stp >= sp);  /* it did work */
1N/A            }
1N/A            ssub = ss + 1;
1N/A            esub = es - 1;
1N/A            /* did innards match? */
1N/A            if (slow(m, sp, rest, ssub, esub) != NULL) {
1N/A                dp = dissect(m, sp, rest, ssub, esub);
1N/A                assert(dp == rest);
1N/A#if defined(__lint)
1N/A                (void) dp;
1N/A#endif
1N/A            } else      /* no */
1N/A                assert(sp == rest);
1N/A            sp = rest;
1N/A            break;
1N/A        case OPLUS_:
1N/A            stp = stop;
1N/A            for (;;) {
1N/A                /* how long could this one be? */
1N/A                rest = slow(m, sp, stp, ss, es);
1N/A                assert(rest != NULL);   /* it did match */
1N/A                /* could the rest match the rest? */
1N/A                tail = slow(m, rest, stop, es, stopst);
1N/A                if (tail == stop)
1N/A                    break;      /* yes! */
1N/A                /* no -- try a shorter match for this one */
1N/A                stp = rest - 1;
1N/A                assert(stp >= sp);  /* it did work */
1N/A            }
1N/A            ssub = ss + 1;
1N/A            esub = es - 1;
1N/A            ssp = sp;
1N/A            oldssp = ssp;
1N/A            for (;;) {  /* find last match of innards */
1N/A                sep = slow(m, ssp, rest, ssub, esub);
1N/A                if (sep == NULL || sep == ssp)
1N/A                    break;  /* failed or matched null */
1N/A                oldssp = ssp;   /* on to next try */
1N/A                ssp = sep;
1N/A            }
1N/A            if (sep == NULL) {
1N/A                /* last successful match */
1N/A                sep = ssp;
1N/A                ssp = oldssp;
1N/A            }
1N/A            assert(sep == rest);    /* must exhaust substring */
1N/A            assert(slow(m, ssp, sep, ssub, esub) == rest);
1N/A            dp = dissect(m, ssp, sep, ssub, esub);
1N/A            assert(dp == sep);
1N/A            sp = rest;
1N/A            break;
1N/A        case OCH_:
1N/A            stp = stop;
1N/A            for (;;) {
1N/A                /* how long could this one be? */
1N/A                rest = slow(m, sp, stp, ss, es);
1N/A                assert(rest != NULL);   /* it did match */
1N/A                /* could the rest match the rest? */
1N/A                tail = slow(m, rest, stop, es, stopst);
1N/A                if (tail == stop)
1N/A                    break;      /* yes! */
1N/A                /* no -- try a shorter match for this one */
1N/A                stp = rest - 1;
1N/A                assert(stp >= sp);  /* it did work */
1N/A            }
1N/A            ssub = ss + 1;
1N/A            esub = ss + OPND(m->g->strip[ss]) - 1;
1N/A            assert(OP(m->g->strip[esub]) == OOR1);
1N/A            for (;;) {  /* find first matching branch */
1N/A                if (slow(m, sp, rest, ssub, esub) == rest)
1N/A                    break;  /* it matched all of it */
1N/A                /* that one missed, try next one */
1N/A                assert(OP(m->g->strip[esub]) == OOR1);
1N/A                esub++;
1N/A                assert(OP(m->g->strip[esub]) == OOR2);
1N/A                ssub = esub + 1;
1N/A                esub += OPND(m->g->strip[esub]);
1N/A                if (OP(m->g->strip[esub]) == OOR2)
1N/A                    esub--;
1N/A                else
1N/A                    assert(OP(m->g->strip[esub]) == O_CH);
1N/A            }
1N/A            dp = dissect(m, sp, rest, ssub, esub);
1N/A            assert(dp == rest);
1N/A            sp = rest;
1N/A            break;
1N/A        case O_PLUS:
1N/A        case O_QUEST:
1N/A        case OOR1:
1N/A        case OOR2:
1N/A        case O_CH:
1N/A            assert(0);
1N/A            break;
1N/A        case OLPAREN:
1N/A            i = OPND(m->g->strip[ss]);
1N/A            assert(0 < i && i <= m->g->nsub);
1N/A            m->pmatch[i].rm_so = sp - m->offp;
1N/A            break;
1N/A        case ORPAREN:
1N/A            i = OPND(m->g->strip[ss]);
1N/A            assert(0 < i && i <= m->g->nsub);
1N/A            m->pmatch[i].rm_eo = sp - m->offp;
1N/A            break;
1N/A        default:        /* uh oh */
1N/A            assert(0);
1N/A            break;
1N/A        }
1N/A    }
1N/A
1N/A    assert(sp == stop);
1N/A    return (sp);
1N/A}
1N/A
1N/A/*
1N/A * backref - figure out what matched what, figuring in back references
1N/A */
1N/Astatic const char *
1N/Abackref(struct match *m, const char *start, const char *stop, sopno startst,
1N/A    sopno stopst, sopno lev,        /* PLUS nesting level */
1N/A    int rec)
1N/A{
1N/A    int i;
1N/A    sopno ss;       /* start sop of current subRE */
1N/A    const char *sp;     /* start of string matched by it */
1N/A    sopno ssub;     /* start sop of subsubRE */
1N/A    sopno esub;     /* end sop of subsubRE */
1N/A    const char *ssp;    /* start of string matched by subsubRE */
1N/A    const char *dp;
1N/A    size_t len;
1N/A    int hard;
1N/A    sop s;
1N/A    regoff_t offsave;
1N/A    cset *cs;
1N/A    wint_t wc;
1N/A
1N/A    AT("back", start, stop, startst, stopst);
1N/A    sp = start;
1N/A
1N/A    /* get as far as we can with easy stuff */
1N/A    hard = 0;
1N/A    for (ss = startst; !hard && ss < stopst; ss++)
1N/A        switch (OP(s = m->g->strip[ss])) {
1N/A        case OCHAR:
1N/A            if (sp == stop)
1N/A                return (NULL);
1N/A            sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
1N/A            if (wc != OPND(s))
1N/A                return (NULL);
1N/A            break;
1N/A        case OANY:
1N/A            if (sp == stop)
1N/A                return (NULL);
1N/A            sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
1N/A            if (wc == BADCHAR)
1N/A                return (NULL);
1N/A            break;
1N/A        case OANYOF:
1N/A            if (sp == stop)
1N/A                return (NULL);
1N/A            cs = &m->g->sets[OPND(s)];
            sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
            if (wc == BADCHAR || !CHIN(cs, wc))
                return (NULL);
            break;
        case OBOL:
            if ((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
                (sp < m->endp && *(sp-1) == '\n' &&
                (m->g->cflags&REG_NEWLINE))) {
                break;
            }
            else
                return (NULL);
            break;
        case OEOL:
            if ((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
                (sp < m->endp && *sp == '\n' &&
                (m->g->cflags&REG_NEWLINE))) {
                break;
            }
            else
                return (NULL);
            break;
        case OBOW:
            if (((sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
                (sp < m->endp && *(sp-1) == '\n' &&
                (m->g->cflags&REG_NEWLINE)) ||
                (sp > m->beginp && !ISWORD(*(sp-1)))) &&
                (sp < m->endp && ISWORD(*sp))) {
                break;
            } else
                return (NULL);
            break;
        case OEOW:
            if (((sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
                (sp < m->endp && *sp == '\n' &&
                (m->g->cflags&REG_NEWLINE)) ||
                (sp < m->endp && !ISWORD(*sp))) &&
                (sp > m->beginp && ISWORD(*(sp-1)))) {
                break;
            } else
                return (NULL);
            break;
        case O_QUEST:
            break;
        case OOR1:  /* matches null but needs to skip */
            ss++;
            s = m->g->strip[ss];
            do {
                assert(OP(s) == OOR2);
                ss += OPND(s);
            } while (OP(s = m->g->strip[ss]) != O_CH);
            /* note that the ss++ gets us past the O_CH */
            break;
        default:    /* have to make a choice */
            hard = 1;
            break;
        }
    if (!hard) {        /* that was it! */
        if (sp != stop)
            return (NULL);
        return (sp);
    }
    ss--;           /* adjust for the for's final increment */

    /* the hard stuff */
    AT("hard", sp, stop, ss, stopst);
    s = m->g->strip[ss];
    switch (OP(s)) {
    case OBACK_:        /* the vilest depths */
        i = OPND(s);
        assert(0 < i && i <= m->g->nsub);
        if (m->pmatch[i].rm_eo == -1)
            return (NULL);
        assert(m->pmatch[i].rm_so != -1);
        len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
        if (len == 0 && rec++ > MAX_RECURSION)
            return (NULL);
        assert(stop - m->beginp >= len);
        if (sp > stop - len)
            return (NULL);  /* not enough left to match */
        ssp = m->offp + m->pmatch[i].rm_so;
        if (memcmp(sp, ssp, len) != 0)
            return (NULL);
        while (m->g->strip[ss] != SOP(O_BACK, i))
            ss++;
        return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
        break;
    case OQUEST_:       /* to null or not */
        dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
        if (dp != NULL)
            return (dp);    /* not */
        return (backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
        break;
    case OPLUS_:
        assert(m->lastpos != NULL);
        assert(lev+1 <= m->g->nplus);
        m->lastpos[lev+1] = sp;
        return (backref(m, sp, stop, ss+1, stopst, lev+1, rec));
        break;
    case O_PLUS:
        if (sp == m->lastpos[lev])  /* last pass matched null */
            return (backref(m, sp, stop, ss+1, stopst, lev-1, rec));
        /* try another pass */
        m->lastpos[lev] = sp;
        dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
        if (dp == NULL)
            return (backref(m, sp, stop, ss+1, stopst, lev-1, rec));
        else
            return (dp);
        break;
    case OCH_:      /* find the right one, if any */
        ssub = ss + 1;
        esub = ss + OPND(s) - 1;
        assert(OP(m->g->strip[esub]) == OOR1);
        for (;;) {  /* find first matching branch */
            dp = backref(m, sp, stop, ssub, esub, lev, rec);
            if (dp != NULL)
                return (dp);
            /* that one missed, try next one */
            if (OP(m->g->strip[esub]) == O_CH)
                return (NULL);  /* there is none */
            esub++;
            assert(OP(m->g->strip[esub]) == OOR2);
            ssub = esub + 1;
            esub += OPND(m->g->strip[esub]);
            if (OP(m->g->strip[esub]) == OOR2)
                esub--;
            else
                assert(OP(m->g->strip[esub]) == O_CH);
        }
        break;
    case OLPAREN:       /* must undo assignment if rest fails */
        i = OPND(s);
        assert(0 < i && i <= m->g->nsub);
        offsave = m->pmatch[i].rm_so;
        m->pmatch[i].rm_so = sp - m->offp;
        dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
        if (dp != NULL)
            return (dp);
        m->pmatch[i].rm_so = offsave;
        return (NULL);
        break;
    case ORPAREN:       /* must undo assignment if rest fails */
        i = OPND(s);
        assert(0 < i && i <= m->g->nsub);
        offsave = m->pmatch[i].rm_eo;
        m->pmatch[i].rm_eo = sp - m->offp;
        dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
        if (dp != NULL)
            return (dp);
        m->pmatch[i].rm_eo = offsave;
        return (NULL);
        break;
    default:        /* uh oh */
        assert(0);
        break;
    }

    /* "can't happen" */
    assert(0);
    return (NULL);
}

/*
 * fast - step through the string at top speed
 */
static const char *
fast(struct match *m, const char *start, const char *stop, sopno startst,
    sopno stopst)
{
    states st = m->st;
    states fresh = m->fresh;
    states tmp = m->tmp;
    const char *p = start;
    wint_t c;
    wint_t lastc;       /* previous c */
    wint_t flagch;
    int i;
    const char *coldp;  /* last p after which no match was underway */
    size_t clen;

    CLEAR(st);
    SET1(st, startst);
    SP("fast", st, *p);
    st = step(m->g, startst, stopst, st, NOTHING, st);
    ASSIGN(fresh, st);
    SP("start", st, *p);
    coldp = NULL;
    if (start == m->beginp)
        c = OUT;
    else {
        /*
         * XXX Wrong if the previous character was multi-byte.
         * Newline never is (in encodings supported by FreeBSD),
         * so this only breaks the ISWORD tests below.
         */
        c = (uch)*(start - 1);
    }
    for (;;) {
        /* next character */
        lastc = c;
        if (p == m->endp) {
            clen = 0;
            c = OUT;
        } else
            clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
        if (EQ(st, fresh))
            coldp = p;

        /* is there an EOL and/or BOL between lastc and c? */
        flagch = '\0';
        i = 0;
        if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
            (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
            flagch = BOL;
            i = m->g->nbol;
        }
        if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
            (c == OUT && !(m->eflags&REG_NOTEOL))) {
            flagch = (flagch == BOL) ? BOLEOL : EOL;
            i += m->g->neol;
        }
        if (i != 0) {
            for (; i > 0; i--)
                st = step(m->g, startst, stopst, st,
                    flagch, st);
            SP("boleol", st, c);
        }

        /* how about a word boundary? */
        if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
            (c != OUT && ISWORD(c))) {
            flagch = BOW;
        }
        if ((lastc != OUT && ISWORD(lastc)) &&
            (flagch == EOL || (c != OUT && !ISWORD(c)))) {
            flagch = EOW;
        }
        if (flagch == BOW || flagch == EOW) {
            st = step(m->g, startst, stopst, st, flagch, st);
            SP("boweow", st, c);
        }

        /* are we done? */
        if (ISSET(st, stopst) || p == stop || clen > stop - p)
            break;      /* NOTE BREAK OUT */

        /* no, we must deal with this character */
        ASSIGN(tmp, st);
        ASSIGN(st, fresh);
        assert(c != OUT);
        st = step(m->g, startst, stopst, tmp, c, st);
        SP("aft", st, c);
        assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
        p += clen;
    }

    assert(coldp != NULL);
    m->coldp = coldp;
    if (ISSET(st, stopst))
        return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
    else
        return (NULL);
}

/*
 * slow - step through the string more deliberately
 */
static const char *
slow(struct match *m, const char *start, const char *stop, sopno startst,
    sopno stopst)
{
    states st = m->st;
    states empty = m->empty;
    states tmp = m->tmp;
    const char *p = start;
    wint_t c;
    wint_t lastc;       /* previous c */
    wint_t flagch;
    int i;
    const char *matchp; /* last p at which a match ended */
    size_t clen;

    AT("slow", start, stop, startst, stopst);
    CLEAR(st);
    SET1(st, startst);
    SP("sstart", st, *p);
    st = step(m->g, startst, stopst, st, NOTHING, st);
    matchp = NULL;
    if (start == m->beginp)
        c = OUT;
    else {
        /*
         * XXX Wrong if the previous character was multi-byte.
         * Newline never is (in encodings supported by FreeBSD),
         * so this only breaks the ISWORD tests below.
         */
        c = (uch)*(start - 1);
    }
    for (;;) {
        /* next character */
        lastc = c;
        if (p == m->endp) {
            c = OUT;
            clen = 0;
        } else
            clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);

        /* is there an EOL and/or BOL between lastc and c? */
        flagch = '\0';
        i = 0;
        if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
            (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
            flagch = BOL;
            i = m->g->nbol;
        }
        if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
            (c == OUT && !(m->eflags&REG_NOTEOL))) {
            flagch = (flagch == BOL) ? BOLEOL : EOL;
            i += m->g->neol;
        }
        if (i != 0) {
            for (; i > 0; i--)
                st = step(m->g, startst, stopst, st,
                    flagch, st);
            SP("sboleol", st, c);
        }

        /* how about a word boundary? */
        if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
            (c != OUT && ISWORD(c))) {
            flagch = BOW;
        }
        if ((lastc != OUT && ISWORD(lastc)) &&
            (flagch == EOL || (c != OUT && !ISWORD(c)))) {
            flagch = EOW;
        }
        if (flagch == BOW || flagch == EOW) {
            st = step(m->g, startst, stopst, st, flagch, st);
            SP("sboweow", st, c);
        }

        /* are we done? */
        if (ISSET(st, stopst))
            matchp = p;
        if (EQ(st, empty) || p == stop || clen > stop - p)
            break;      /* NOTE BREAK OUT */

        /* no, we must deal with this character */
        ASSIGN(tmp, st);
        ASSIGN(st, empty);
        assert(c != OUT);
        st = step(m->g, startst, stopst, tmp, c, st);
        SP("saft", st, c);
        assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
        p += clen;
    }

    return (matchp);
}


/*
 * step - map set of states reachable before char to set reachable after
 */
static states
step(struct re_guts *g,
    sopno start,        /* start state within strip */
    sopno stop,     /* state after stop state within strip */
    states bef,     /* states reachable before */
    wint_t ch,      /* character or NONCHAR code */
    states aft)     /* states already known reachable after */
{
    cset *cs;
    sop s;
    sopno pc;
    onestate here;      /* note, macros know this name */
    sopno look;
    int i;

    for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
        s = g->strip[pc];
        switch (OP(s)) {
        case OEND:
            assert(pc == stop-1);
            break;
        case OCHAR:
            /* only characters can match */
            assert(!NONCHAR(ch) || ch != OPND(s));
            if (ch == OPND(s))
                FWD(aft, bef, 1);
            break;
        case OBOL:
            if (ch == BOL || ch == BOLEOL)
                FWD(aft, bef, 1);
            break;
        case OEOL:
            if (ch == EOL || ch == BOLEOL)
                FWD(aft, bef, 1);
            break;
        case OBOW:
            if (ch == BOW)
                FWD(aft, bef, 1);
            break;
        case OEOW:
            if (ch == EOW)
                FWD(aft, bef, 1);
            break;
        case OANY:
            if (!NONCHAR(ch))
                FWD(aft, bef, 1);
            break;
        case OANYOF:
            cs = &g->sets[OPND(s)];
            if (!NONCHAR(ch) && CHIN(cs, ch))
                FWD(aft, bef, 1);
            break;
        case OBACK_:        /* ignored here */
        case O_BACK:
            FWD(aft, aft, 1);
            break;
        case OPLUS_:        /* forward, this is just an empty */
            FWD(aft, aft, 1);
            break;
        case O_PLUS:        /* both forward and back */
            FWD(aft, aft, 1);
            i = ISSETBACK(aft, OPND(s));
            BACK(aft, aft, OPND(s));
            if (!i && ISSETBACK(aft, OPND(s))) {
                /* oho, must reconsider loop body */
                pc -= OPND(s) + 1;
                INIT(here, pc);
            }
            break;
        case OQUEST_:       /* two branches, both forward */
            FWD(aft, aft, 1);
            FWD(aft, aft, OPND(s));
            break;
        case O_QUEST:       /* just an empty */
            FWD(aft, aft, 1);
            break;
        case OLPAREN:       /* not significant here */
        case ORPAREN:
            FWD(aft, aft, 1);
            break;
        case OCH_:      /* mark the first two branches */
            FWD(aft, aft, 1);
            assert(OP(g->strip[pc+OPND(s)]) == OOR2);
            FWD(aft, aft, OPND(s));
            break;
        case OOR1:      /* done a branch, find the O_CH */
            if (ISSTATEIN(aft, here)) {
                for (look = 1;
                    OP(s = g->strip[pc+look]) != O_CH;
                    look += OPND(s))
                    assert(OP(s) == OOR2);
                FWD(aft, aft, look + 1);
            }
            break;
        case OOR2:      /* propagate OCH_'s marking */
            FWD(aft, aft, 1);
            if (OP(g->strip[pc+OPND(s)]) != O_CH) {
                assert(OP(g->strip[pc+OPND(s)]) == OOR2);
                FWD(aft, aft, OPND(s));
            }
            break;
        case O_CH:      /* just empty */
            FWD(aft, aft, 1);
            break;
        default:        /* ooooops... */
            assert(0);
            break;
        }
    }

    return (aft);
}

#ifdef REDEBUG
/*
 * print - print a set of states
 */
static void
print(struct match *m, const char *caption, states st, int ch, FILE *d)
{
    struct re_guts *g = m->g;
    int i;
    int first = 1;

    if (!(m->eflags&REG_TRACE))
        return;

    (void) fprintf(d, "%s", caption);
    if (ch != '\0')
        (void) fprintf(d, " %s", pchar(ch));
    for (i = 0; i < g->nstates; i++)
        if (ISSET(st, i)) {
            (void) fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
            first = 0;
        }
    (void) fprintf(d, "\n");
}

/*
 * at - print current situation
 */
static void
at(struct match *m, const char *title, const char *start, const char *stop,
    sopno startst, sopno stopst)
{
    if (!(m->eflags&REG_TRACE))
        return;

    (void) printf("%s %s-", title, pchar(*start));
    (void) printf("%s ", pchar(*stop));
    (void) printf("%ld-%ld\n", (long)startst, (long)stopst);
}

#ifndef PCHARDONE
#define PCHARDONE   /* never again */
/*
 * pchar - make a character printable
 *
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
 * duplicate here avoids having a debugging-capable regexec.o tied to
 * a matching debug.o, and this is convenient.  It all disappears in
 * the non-debug compilation anyway, so it doesn't matter much.
 */
static const char *
pchar(int ch)
{
    static char pbuf[10];

    if (isprint((uch)ch) || ch == ' ')
        (void) sprintf(pbuf, "%c", ch);
    else
        (void) sprintf(pbuf, "\\%o", ch);
    return (pbuf);
}
#endif
#endif

#undef  matcher
#undef  fast
#undef  slow
#undef  dissect
#undef  backref
#undef  step
#undef  print
#undef  at
#undef  match