regrexec.c revision 3e14f97f673e8a630f076077de35afdd43dc1587
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe/***********************************************************************
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* This software is part of the ast package *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* Copyright (c) 1985-2010 AT&T Intellectual Property *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* and is licensed under the *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* Common Public License, Version 1.0 *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* by AT&T Intellectual Property *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* A copy of the License is available at *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* http://www.opensource.org/licenses/cpl1.0.txt *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
ead9bb4b1be81d7bbf8ed86ee41d6c1e58b069a3Yuri Pankov* *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* Information and Software Systems Research *
7b1019a6d29ccb7999dc76cba3dde1c627e8e609Jerry Jelinek* AT&T Research *
bd93c05dbd9b8f1e8d2edf48c777bc881f927608Alexander Eremin* Florham Park NJ *
5ffb0c9b03b5149ff4f5821a62be4a52408ada2aToomas Soome* *
ead9bb4b1be81d7bbf8ed86ee41d6c1e58b069a3Yuri Pankov* Glenn Fowler <gsf@research.att.com> *
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe* David Korn <dgk@research.att.com> *
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov* Phong Vo <kpv@research.att.com> *
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov* *
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov***********************************************************************/
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov#pragma prototyped
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov/*
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * posix regex record executor
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * multiple record sized-buffer interface
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov */
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov#include "reglib.h"
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov/*
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * call regnexec() on records selected by Boyer-Moore
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov */
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankovint
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankovregrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov{
d0054224e0ba320a05d1c27de508dc5224ff99d1Adam Stevko register unsigned char* buf = (unsigned char*)s;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register unsigned char* beg;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register unsigned char* l;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register unsigned char* r;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register unsigned char* x;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register size_t* skip;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register size_t* fail;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register Bm_mask_t** mask;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register size_t index;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov register int n;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov unsigned char* end;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov size_t mid;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov int complete;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov int exactlen;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov int leftlen;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov int rightlen;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov int inv;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov Bm_mask_t m;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov Env_t* env;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov Rex_t* e;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov return REG_BADPAT;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov inv = (flags & REG_INVERT) != 0;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov buf = beg = (unsigned char*)s;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov end = buf + len;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov skip = e->re.bm.skip;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov fail = e->re.bm.fail;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov mask = e->re.bm.mask;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov complete = e->re.bm.complete && !nmatch;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov exactlen = e->re.bm.size;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov leftlen = e->re.bm.left + exactlen;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov rightlen = exactlen + e->re.bm.right;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov index = leftlen++;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov for (;;)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov while ((index += skip[buf[index]]) < mid);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (index < HIT)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto impossible;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov index -= HIT;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov m = mask[n = exactlen - 1][buf[index]];
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov do
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (!n--)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto possible;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov } while (m &= mask[n][buf[--index]]);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((index += fail[n + 1]) < len)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov continue;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov impossible:
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (inv)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov l = r = buf + len;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto invert;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov n = 0;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto done;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov possible:
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov r = (l = buf + index) + exactlen;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov while (l > beg)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (*--l == sep)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov l++;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov break;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((r - l) < leftlen)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto spanned;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov while (r < end && *r != sep)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov r++;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((r - (buf + index)) < rightlen)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto spanned;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (inv)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov invert:
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov x = beg;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov while (beg < l)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov while (x < l && *x != sep)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov x++;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if (n = (*record)(handle, (char*)beg, x - beg))
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto done;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov beg = ++x;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov else if (n = (*record)(handle, (char*)l, r - l))
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto done;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((index = (r - buf) + leftlen) >= len)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto done;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov beg = r + 1;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov else if (n != REG_NOMATCH)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto done;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov else
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov spanned:
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((index += exactlen) >= mid)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov goto impossible;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov }
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov done:
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov env->rex = e;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov return n;
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov}
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov