regcmp.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder/***********************************************************************
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* This software is part of the ast package *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* Copyright (c) 1985-2011 AT&T Intellectual Property *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* and is licensed under the *
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu Prodescu* Eclipse Public License, Version 1.0 *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* by AT&T Intellectual Property *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* A copy of the License is available at *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* http://www.eclipse.org/org/documents/epl-v10.html *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly* *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* Information and Software Systems Research *
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly* AT&T Research *
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly* Florham Park NJ *
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly* *
57221209d11b05aa0373cc3892d5df89ba96ebf9Christian Maeder* Glenn Fowler <gsf@research.att.com> *
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder* David Korn <dgk@research.att.com> *
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder* Phong Vo <kpv@research.att.com> *
e90dc723887d541f809007ae81c9bb73ced9592eChristian Maeder* *
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder***********************************************************************/
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly#pragma prototyped
56899f6457976a2ee20f6a23f088cb5655b15715Liam O'Reilly/*
66bc8d6e69cde43f1ccbeb76104cf7b8038acd6cChristian Maeder * regcmp implementation
a00461fcf7432205a79a0f12dbe6c1ebc58bc000Christian Maeder */
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly#include <ast.h>
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly#include <libgen.h>
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder#include <regex.h>
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder#include <align.h>
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly#define INC (2*1024)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder#define TOT (16*1024)
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder#define SUB 10
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
c0833539c8cf577dd3f2497792fbdd818442744cChristian Maedertypedef struct
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly{
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly char* cur;
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder regex_t re;
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly unsigned char sub[SUB];
fa373bc327620e08861294716b4454be8d25669fChristian Maeder int nsub;
036ecbd8f721096321f47cf6a354a9d1bf3d032fChristian Maeder size_t size;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder char buf[ALIGN_BOUND2];
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder} Regex_t;
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly__DEFINE__(char*, __loc1, 0);
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reilly
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reillystatic void*
53bd0c89aa4743dc41a6394db5a90717c1ca4517Liam O'Reillyblock(void* handle, void* data, size_t size)
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly{
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly register Regex_t* re = (Regex_t*)handle;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return 0;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder data = (void*)re->cur;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder re->cur += size;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder return data;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder}
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederchar*
648fe1220044aac847acbdfbc4155af5556063ebChristian Maederregcmp(const char* pattern, ...)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder{
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder register char* s;
4314e26a12954cb1c9be4dea10aa8103edac5bbbChristian Maeder register Regex_t* re;
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder register size_t n;
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder register int c;
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly register int p;
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly int b;
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly int e;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder int i;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder int j;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder int nsub;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder register Sfio_t* sp;
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder unsigned char paren[128];
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder unsigned char sub[SUB];
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly va_list ap;
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly
fa373bc327620e08861294716b4454be8d25669fChristian Maeder va_start(ap, pattern);
fa373bc327620e08861294716b4454be8d25669fChristian Maeder if (pattern || !*pattern || !(sp = sfstropen()))
fa373bc327620e08861294716b4454be8d25669fChristian Maeder e = 1;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder else
fa373bc327620e08861294716b4454be8d25669fChristian Maeder {
fa373bc327620e08861294716b4454be8d25669fChristian Maeder e = 0;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder memset(paren, 0, sizeof(paren));
fa373bc327620e08861294716b4454be8d25669fChristian Maeder n = 0;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder p = -1;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder b = 0;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder nsub = 0;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder s = (char*)pattern;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder do
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder {
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder while (c = *s++)
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder {
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder if (c == '\\')
9aeda2b3ae8ce0b018955521e4ca835a8ba8a27bLiam O'Reilly {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder sfputc(sp, c);
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if (!(c = *s++))
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder break;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder else if (b)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if (c == ']')
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder b = 0;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder else if (c == '[')
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder b = 1;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if (*s == '^')
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder sfputc(sp, c);
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder c = *s++;
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder }
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder if (*s == ']')
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder {
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder sfputc(sp, c);
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder c = *s++;
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder }
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder }
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder else if (c == '(')
eb48217dfa67ddb87b8fbd846de293d0636bd578Christian Maeder {
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder /*
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder * someone explain in one sentence why
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder * a cast is needed to make this work
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder */
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if (p < (int)(elementsof(paren) - 1))
4314e26a12954cb1c9be4dea10aa8103edac5bbbChristian Maeder p++;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder paren[p] = ++n;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder else if (c == ')' && p >= 0)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder for (i = p; i > 0; i--)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if (paren[i])
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder break;
4314e26a12954cb1c9be4dea10aa8103edac5bbbChristian Maeder if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder s += 2;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder j -= '0';
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly if (nsub <= j)
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder {
d5833d2ee7bafcbf2fdd2bdfd9a728c769b100c7Christian Maeder if (!nsub)
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder memset(sub, 0, sizeof(sub));
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder nsub = j + 1;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
e0f1794e365dd347e97b37d7d22b2fce27296fa1Christian Maeder sub[j] = paren[i] + 1;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder paren[i] = 0;
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder }
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder sfputc(sp, c);
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder }
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder } while (s = va_arg(ap, char*));
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder va_end(ap);
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder if (e)
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder return 0;
7857a35e3af533dfbd0f0e18638ebd211e6358a0Christian Maeder if (!(s = sfstruse(sp)))
7830e8fa7442fb7452af7ecdba102bc297ae367eChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder sfstrclose(sp);
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder return 0;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder }
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder re = 0;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder n = 0;
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder do
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
648fe1220044aac847acbdfbc4155af5556063ebChristian Maeder {
bcd914850de931848b86d7728192a149f9c0108bChristian Maeder if (re)
33bdce26495121cdbce30331ef90a1969126a840Liam O'Reilly free(re);
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly sfstrclose(sp);
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly return 0;
935613eb8e67d724f1c4a4d4a37be3324ef6708dChristian Maeder }
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly re->cur = re->buf;
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly regalloc(re, block, REG_NOFREE);
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly regalloc(NiL, NiL, 0);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly } while (c == REG_ESPACE);
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly sfstrclose(sp);
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly if (c)
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly {
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly free(re);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly return 0;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly }
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly if (re->nsub = nsub)
f21c7417bdd1c0282025cba0f5cb0ff5bc5c98eeLiam O'Reilly memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly return (char*)re;
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly}
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reillychar*
935613eb8e67d724f1c4a4d4a37be3324ef6708dChristian Maederregex(const char* handle, const char* subject, ...)
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly{
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly register Regex_t* re;
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly register int n;
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly register int i;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly register int k;
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly char* sub[SUB + 1];
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly regmatch_t match[SUB + 1];
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly va_list ap;
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly va_start(ap, subject);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly if (!(re = (Regex_t*)handle) || !subject)
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly k = 1;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly else
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly {
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly k = 0;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly for (n = 0; n < re->nsub; n++)
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly sub[n] = va_arg(ap, char*);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly }
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly va_end(ap);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly if (k)
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly return 0;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly if (regexec(&re->re, subject, SUB + 1, match, 0))
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly return 0;
7d96b1ef2b8597330aedee6713615ec15508edcfLiam O'Reilly for (n = 0; n < re->nsub; n++)
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly if (i = re->sub[n])
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly {
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly i--;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly k = match[i].rm_eo - match[i].rm_so;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly strlcpy(sub[n], subject + match[i].rm_so, k);
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly *(sub[n] + k) = 0;
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly }
f19dc06364e8d6ea36f7c170e1f7a0677de63184Liam O'Reilly __loc1 = (char*)subject + match[0].rm_so;
2a5b885d9350ec6dd8bc4992ee91d2f68aa592f4Christian Maeder return (char*)subject + match[0].rm_eo;
fa373bc327620e08861294716b4454be8d25669fChristian Maeder}
fa373bc327620e08861294716b4454be8d25669fChristian Maeder