regexp.c revision 7c2fbfb345896881c631598ee3852ce9ce33fb07
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* Copyright (c) 1985-2008 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* http://www.opensource.org/licenses/cpl1.0.txt *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Phong Vo <kpv@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#pragma prototyped
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * <regexp.h> library support
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define _REGEXP_DECLARE
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <ast.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <regexp.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <regex.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <align.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regex_t re;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char* buf;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char* cur;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned int size;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Env_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic void*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinblock(void* handle, void* data, size_t size)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register Env_t* env = (Env_t*)handle;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (data || (size = roundof(size, ALIGN_BOUND2)) > (env->buf + env->size - env->cur))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin data = (void*)env->cur;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin env->cur += size;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return data;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinint
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin_re_comp(regexp_t* re, const char* pattern, char* handle, unsigned int size)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register Env_t* env = (Env_t*)handle;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (size <= sizeof(Env_t))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return 50;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin env->buf = env->cur = (char*)env + sizeof(Env_t);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin env->size = size - sizeof(Env_t);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regalloc(env, block, REG_NOFREE);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = regcomp(&env->re, pattern, REG_LENIENT|REG_NULL);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin switch (n)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case 0:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case REG_ERANGE:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 11;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case REG_BADBR:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 16;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case REG_ESUBREG:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 25;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case REG_EPAREN:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 42;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case REG_EBRACK:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 49;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin default:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 50;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin re->re_nbra = env->re.re_nsub;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinint
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin_re_exec(regexp_t* re, const char* subject, const char* handle, int anchor)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register Env_t* env = (Env_t*)handle;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regmatch_t match[elementsof(re->re_braslist)+1];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (regexec(&env->re, subject, elementsof(match), match, 0) || anchor && match[0].rm_so)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin re->re_loc1 = (char*)subject + match[0].rm_so;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin re->re_loc2 = (char*)subject + match[0].rm_eo;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (n = 1; n <= env->re.re_nsub; n++)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin re->re_braslist[n-1] = (char*)subject + match[n].rm_so;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin re->re_braelist[n-1] = (char*)subject + match[n].rm_eo;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return 1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinchar*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin_re_putc(int c)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin static Sfio_t* sp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!sp && !(sp = sfstropen()))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!c)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return sfstruse(sp);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfputc(sp, c);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}