1N/A/***********************************************************************
1N/A* *
1N/A* This software is part of the ast package *
1N/A* Copyright (c) 1985-2011 AT&T Intellectual Property *
1N/A* and is licensed under the *
1N/A* Common Public License, Version 1.0 *
1N/A* by AT&T Intellectual Property *
1N/A* *
1N/A* A copy of the License is available at *
1N/A* http://www.opensource.org/licenses/cpl1.0.txt *
1N/A* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
1N/A* *
1N/A* Information and Software Systems Research *
1N/A* AT&T Research *
1N/A* Florham Park NJ *
1N/A* *
1N/A* Glenn Fowler <gsf@research.att.com> *
1N/A* David Korn <dgk@research.att.com> *
1N/A* Phong Vo <kpv@research.att.com> *
1N/A* *
1N/A***********************************************************************/
1N/A#pragma prototyped
1N/A/*
1N/A * regcmp implementation
1N/A */
1N/A
1N/A#include <ast.h>
1N/A#include <libgen.h>
1N/A#include <regex.h>
1N/A#include <align.h>
1N/A
1N/A#define INC (2*1024)
1N/A#define TOT (16*1024)
1N/A#define SUB 10
1N/A
1N/Atypedef struct
1N/A{
1N/A char* cur;
1N/A regex_t re;
1N/A unsigned char sub[SUB];
1N/A int nsub;
1N/A size_t size;
1N/A char buf[ALIGN_BOUND2];
1N/A} Regex_t;
1N/A
1N/A__DEFINE__(char*, __loc1, 0);
1N/A
1N/Astatic void*
1N/Ablock(void* handle, void* data, size_t size)
1N/A{
1N/A register Regex_t* re = (Regex_t*)handle;
1N/A
1N/A if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
1N/A return 0;
1N/A data = (void*)re->cur;
1N/A re->cur += size;
1N/A return data;
1N/A}
1N/A
1N/Achar*
1N/Aregcmp(const char* pattern, ...)
1N/A{
1N/A register char* s;
1N/A register Regex_t* re;
1N/A register size_t n;
1N/A register int c;
1N/A register int p;
1N/A int b;
1N/A int i;
1N/A int j;
1N/A int nsub;
1N/A register Sfio_t* sp;
1N/A unsigned char paren[128];
1N/A unsigned char sub[SUB];
1N/A va_list ap;
1N/A
1N/A va_start(ap, pattern);
1N/A if (!pattern || !*pattern || !(sp = sfstropen()))
1N/A return 0;
1N/A memset(paren, 0, sizeof(paren));
1N/A n = 0;
1N/A p = -1;
1N/A b = 0;
1N/A nsub = 0;
1N/A s = (char*)pattern;
1N/A do
1N/A {
1N/A while (c = *s++)
1N/A {
1N/A if (c == '\\')
1N/A {
1N/A sfputc(sp, c);
1N/A if (!(c = *s++))
1N/A break;
1N/A }
1N/A else if (b)
1N/A {
1N/A if (c == ']')
1N/A b = 0;
1N/A }
1N/A else if (c == '[')
1N/A {
1N/A b = 1;
1N/A if (*s == '^')
1N/A {
1N/A sfputc(sp, c);
1N/A c = *s++;
1N/A }
1N/A if (*s == ']')
1N/A {
1N/A sfputc(sp, c);
1N/A c = *s++;
1N/A }
1N/A }
1N/A else if (c == '(')
1N/A {
1N/A /*
1N/A * someone explain in one sentence why
1N/A * a cast is needed to make this work
1N/A */
1N/A
1N/A if (p < (int)(elementsof(paren) - 1))
1N/A p++;
1N/A paren[p] = ++n;
1N/A }
1N/A else if (c == ')' && p >= 0)
1N/A {
1N/A for (i = p; i > 0; i--)
1N/A if (paren[i])
1N/A break;
1N/A if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
1N/A {
1N/A s += 2;
1N/A j -= '0';
1N/A if (nsub <= j)
1N/A {
1N/A if (!nsub)
1N/A memset(sub, 0, sizeof(sub));
1N/A nsub = j + 1;
1N/A }
1N/A sub[j] = paren[i] + 1;
1N/A }
1N/A paren[i] = 0;
1N/A }
1N/A sfputc(sp, c);
1N/A }
1N/A } while (s = va_arg(ap, char*));
1N/A va_end(ap);
1N/A if (!(s = sfstruse(sp)))
1N/A {
1N/A sfstrclose(sp);
1N/A return 0;
1N/A }
1N/A re = 0;
1N/A n = 0;
1N/A do
1N/A {
1N/A if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
1N/A {
1N/A if (re)
1N/A free(re);
1N/A sfstrclose(sp);
1N/A return 0;
1N/A }
1N/A re->cur = re->buf;
1N/A re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
1N/A regalloc(re, block, REG_NOFREE);
1N/A c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
1N/A regalloc(NiL, NiL, 0);
1N/A } while (c == REG_ESPACE);
1N/A sfstrclose(sp);
1N/A if (c)
1N/A {
1N/A free(re);
1N/A return 0;
1N/A }
1N/A if (re->nsub = nsub)
1N/A memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
1N/A return (char*)re;
1N/A}
1N/A
1N/Achar*
1N/Aregex(const char* handle, const char* subject, ...)
1N/A{
1N/A register Regex_t* re;
1N/A register int n;
1N/A register int i;
1N/A register int k;
1N/A char* sub[SUB + 1];
1N/A regmatch_t match[SUB + 1];
1N/A va_list ap;
1N/A
1N/A va_start(ap, subject);
1N/A if (!(re = (Regex_t*)handle) || !subject)
1N/A return 0;
1N/A for (n = 0; n < re->nsub; n++)
1N/A sub[n] = va_arg(ap, char*);
1N/A va_end(ap);
1N/A if (regexec(&re->re, subject, SUB + 1, match, 0))
1N/A return 0;
1N/A for (n = 0; n < re->nsub; n++)
1N/A if (i = re->sub[n])
1N/A {
1N/A i--;
1N/A k = match[i].rm_eo - match[i].rm_so;
1N/A strlcpy(sub[n], subject + match[i].rm_so, k);
1N/A *(sub[n] + k) = 0;
1N/A }
1N/A __loc1 = (char*)subject + match[0].rm_so;
1N/A return (char*)subject + match[0].rm_eo;
1N/A}