da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner* Copyright (c) 1985-2010 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* http://www.opensource.org/licenses/cpl1.0.txt *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Phong Vo <kpv@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#pragma prototyped
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * posix regex implementation
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * based on Doug McIlroy's C++ implementation
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Knuth-Morris-Pratt adapted from Corman-Leiserson-Rivest
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Boyer-Moore from conversations with David Korn, Phong Vo, Andrew Hume
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef _REGLIB_H
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define _REGLIB_H
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REG_VERSION_EXEC 20020509L
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REG_VERSION_MAP 20030916L /* regdisc_t.re_map */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define re_info env
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define alloc _reg_alloc
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define classfun _reg_classfun
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define drop _reg_drop
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define fatal _reg_fatal
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define state _reg_state
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct regsubop_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int op; /* REG_SUB_LOWER,REG_SUB_UPPER */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int off; /* re_rhs or match[] offset */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int len; /* re_rhs len or len==0 match[] */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} regsubop_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define _REG_SUB_PRIVATE_ \
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char* re_cur; /* re_buf cursor */ \
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char* re_end; /* re_buf end */ \
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regsubop_t* re_ops; /* rhs ops */ \
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char re_rhs[1]; /* substitution rhs */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <ast.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <cdt.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <stk.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include "regex.h"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <ctype.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <errno.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz#if _BLD_DEBUG && !defined(_AST_REGEX_DEBUG)
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz#define _AST_REGEX_DEBUG 1
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz#endif
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define MBSIZE(p) ((ast.tmp_int=mbsize(p))>0?ast.tmp_int:1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#undef RE_DUP_MAX /* posix puts this in limits.h! */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define RE_DUP_MAX (INT_MAX/2-1) /* 2*RE_DUP_MAX won't overflow */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define RE_DUP_INF (RE_DUP_MAX+1) /* infinity, for * */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define BACK_REF_MAX 9
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin#define REG_COMP (REG_DELIMITED|REG_ESCAPE|REG_EXTENDED|REG_FIRST|REG_ICASE|REG_NOSUB|REG_NEWLINE|REG_SHELL|REG_AUGMENTED|REG_LEFT|REG_LITERAL|REG_MINIMAL|REG_MULTIREF|REG_NULL|REG_RIGHT|REG_LENIENT|REG_MUSTDELIM)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REG_EXEC (REG_ADVANCE|REG_INVERT|REG_NOTBOL|REG_NOTEOL|REG_STARTEND)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NULL 0 /* null string (internal) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_ALT 1 /* a|b */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_ALT_CATCH 2 /* REX_ALT catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_BACK 3 /* \1, \2, etc */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_BEG 4 /* initial ^ */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_BEG_STR 5 /* initial ^ w/ no newline */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_BM 6 /* Boyer-Moore */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_CAT 7 /* catenation catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_CLASS 8 /* [...] */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_COLL_CLASS 9 /* collation order [...] */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_CONJ 10 /* a&b */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_CONJ_LEFT 11 /* REX_CONJ left catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_CONJ_RIGHT 12 /* REX_CONJ right catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_DONE 13 /* completed match (internal) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_DOT 14 /* . */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_END 15 /* final $ */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_END_STR 16 /* final $ before tail newline */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_EXEC 17 /* call re.re_exec() */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_FIN_STR 18 /* final $ w/ no newline */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP 19 /* \(...\) */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_CATCH 20 /* REX_GROUP catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_AHEAD 21 /* 0-width lookahead */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_AHEAD_CATCH 22 /* REX_GROUP_AHEAD catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_AHEAD_NOT 23 /* inverted 0-width lookahead */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND 24 /* 0-width lookbehind */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND_CATCH 25 /* REX_GROUP_BEHIND catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND_NOT 26 /* inverted 0-width lookbehind */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND_NOT_CATCH 27 /* REX_GROUP_BEHIND_NOT catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_COND 28 /* conditional group */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_COND_CATCH 29 /* conditional group catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_CUT 30 /* don't backtrack over this */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_CUT_CATCH 31 /* REX_GROUP_CUT catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_KMP 32 /* Knuth-Morris-Pratt */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEG 33 /* negation */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEG_CATCH 34 /* REX_NEG catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST 35 /* nested match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_ONECHAR 36 /* a single-character literal */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_REP 37 /* Kleene closure */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_REP_CATCH 38 /* REX_REP catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_STRING 39 /* some chars */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_TRIE 40 /* alternation of strings */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_WBEG 41 /* \< */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_WEND 42 /* \> */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_WORD 43 /* word boundary */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_WORD_NOT 44 /* not word boundary */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_META ((int)UCHAR_MAX+1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_STAR (T_META+0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_PLUS (T_META+1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_QUES (T_META+2)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_BANG (T_META+3)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_AT (T_META+4)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_TILDE (T_META+5)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_PERCENT (T_META+6)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_LEFT (T_META+7)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_OPEN (T_META+8)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_CLOSE (T_OPEN+1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_RIGHT (T_OPEN+2)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_CFLX (T_OPEN+3)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_DOT (T_OPEN+4)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_DOTSTAR (T_OPEN+5)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_END (T_OPEN+6)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_BAD (T_OPEN+7)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_DOLL (T_OPEN+8)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_BRA (T_OPEN+9)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_BAR (T_OPEN+10)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_AND (T_OPEN+11)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_LT (T_OPEN+12)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_GT (T_OPEN+13)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_SLASHPLUS (T_OPEN+14)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_GROUP (T_OPEN+15)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_WORD (T_OPEN+16)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_WORD_NOT (T_WORD+1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_BEG_STR (T_WORD+2)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_END_STR (T_WORD+3)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_FIN_STR (T_WORD+4)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_ESCAPE (T_WORD+5)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_ALNUM (T_WORD+6)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_ALNUM_NOT (T_ALNUM+1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_DIGIT (T_ALNUM+2)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_DIGIT_NOT (T_ALNUM+3)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_SPACE (T_ALNUM+4)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_SPACE_NOT (T_ALNUM+5)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define T_BACK (T_ALNUM+6)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define BRE 0
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define ERE 3
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define ARE 6
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define SRE 9
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define KRE 12
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define HIT SSIZE_MAX
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define bitclr(p,c) ((p)[((c)>>3)&037]&=(~(1<<((c)&07))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define bitset(p,c) ((p)[((c)>>3)&037]|=(1<<((c)&07)))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define bittst(p,c) ((p)[((c)>>3)&037]&(1<<((c)&07)))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define setadd(p,c) bitset((p)->bits,c)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define setclr(p,c) bitclr((p)->bits,c)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define settst(p,c) bittst((p)->bits,c)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#if _hdr_wchar && _lib_wctype && _lib_iswctype
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <stdio.h> /* because <wchar.h> includes it and we generate it */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <wchar.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#if _hdr_wctype
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <wctype.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#if !defined(iswblank) && !_lib_iswblank
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define _need_iswblank 1
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswblank(x) _reg_iswblank(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinextern int _reg_iswblank(wint_t);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#if !defined(towupper) && !_lib_towupper
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define towupper(x) toupper(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#if !defined(towlower) && !_lib_towlower
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define towlower(x) tolower(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#else
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#undef _lib_wctype
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswalnum
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswalnum(x) isalnum(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswalpha
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswalpha(x) isalpha(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswcntrl
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswcntrl(x) iscntrl(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswdigit
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswdigit(x) isdigit(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswgraph
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswgraph(x) isgraph(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswlower
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswlower(x) islower(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswprint
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswprint(x) isprint(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswpunct
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswpunct(x) ispunct(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswspace
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswspace(x) isspace(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswupper
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswupper(x) isupper(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswxdigit
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswxdigit(x) isxdigit(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef towlower
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define towlower(x) tolower(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef towupper
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define towupper(x) toupper(x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswblank
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswblank(x) ((x)==' '||(x)=='\t')
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#ifndef iswgraph
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define iswgraph(x) (iswprint(x)&&!iswblank(x))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define isword(x) (isalnum(x)||(x)=='_')
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * collation element support
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin#define COLL_KEY_MAX 32
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#if COLL_KEY_MAX < MB_LEN_MAX
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#undef COLL_KEY_MAX
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_KEY_MAX MB_LEN_MAX
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef unsigned char Ckey_t[COLL_KEY_MAX+1];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_end 0
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_call 1
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_char 2
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_range 3
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_range_lc 4
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define COLL_range_uc 5
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Celt_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin short typ;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin short min;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin short max;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regclass_t fun;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Ckey_t beg;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Ckey_t end;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Celt_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * private stuff hanging off regex_t
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Stk_pos_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin off_t offset;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char* base;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Stk_pos_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Vector_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Stk_t* stk; /* stack pointer */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin char* vec; /* the data */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int inc; /* growth increment */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int siz; /* element size */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int max; /* max index */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int cur; /* current index -- user domain */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Vector_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Rex_t subtypes
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Cond_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* beg; /* beginning of next match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* next[2]; /* 0:no 1:yes next pattern */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont; /* right catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int yes; /* yes condition hit */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Cond_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Conj_left_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* beg; /* beginning of left match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* right; /* right pattern */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont; /* right catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Conj_left_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Conj_right_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* end; /* end of left match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont; /* ambient continuation */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Conj_right_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef unsigned int Bm_mask_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Bm_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Bm_mask_t** mask;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t* skip;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t* fail;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t size;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ssize_t back;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ssize_t left;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ssize_t right;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t complete;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Bm_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct String_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int* fail;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* base;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t size;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} String_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Set_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char bits[(UCHAR_MAX+1)/CHAR_BIT];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Set_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Collate_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int invert;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Celt_t* elements;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Collate_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Binary_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* left;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* right;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int serial;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Binary_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Group_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int number; /* group number */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int last; /* last contained group number */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int size; /* lookbehind size */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int back; /* backreferenced */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regflags_t flags; /* group flags */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin union
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Binary_t binary;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* rex;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin } expr;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Group_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Exec_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin void* data;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin const char* text;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t size;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Exec_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_open 0x01
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_close 0x02
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_escape 0x04
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_quote 0x08
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_literal 0x10
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_delimiter 0x20
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_terminator 0x40
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_separator 0x80
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_NEST_SHIFT 8
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Nest_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int primary;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned short none; /* for Nest_t.type[-1] */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned short type[1];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Nest_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * REX_ALT catcher, solely to get control at the end of an
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * alternative to keep records for comparing matches.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Alt_catch_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Alt_catch_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Group_catch_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regoff_t* eo;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Group_catch_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Behind_catch_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* beg;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* end;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Behind_catch_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * REX_NEG catcher determines what string lengths can be matched,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * then Neg investigates continuations of other lengths.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * This is inefficient. For !POSITIONS expressions, we can do better:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * since matches to rex will be enumerated in decreasing order,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * we can investigate continuations whenever a length is skipped.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Neg_catch_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* beg;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* index;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Neg_catch_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * REX_REP catcher. One is created on the stack for
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * each iteration of a complex repetition.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Rep_catch_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* cont;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* ref;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* beg;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Rep_catch_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * data structure for an alternation of pure strings
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * son points to a subtree of all strings with a common
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * prefix ending in character c. sib links alternate
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * letters in the same position of a word. end=1 if
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * some word ends with c. the order of strings is
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * irrelevant, except long words must be investigated
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * before short ones.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Trie_node_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char end;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Trie_node_s* son;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Trie_node_s* sib;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Trie_node_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Trie_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Trie_node_t** root;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int min;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int max;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Trie_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Rex_t is a node in a regular expression
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Rex_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char type; /* node type */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char marked; /* already marked */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin short serial; /* subpattern number */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regflags_t flags; /* scoped flags */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int explicit; /* scoped explicit match*/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* next; /* remaining parts */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int lo; /* lo dup count */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int hi; /* hi dup count */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* map; /* fold and/or ccode map*/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin union
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Alt_catch_t alt_catch; /* alt catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Bm_t bm; /* bm */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Behind_catch_t behind_catch; /* behind catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Set_t* charclass; /* char class */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Collate_t collate; /* collation class */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Cond_t cond_catch; /* cond catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Conj_left_t conj_left; /* conj left catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Conj_right_t conj_right; /* conj right catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin void* data; /* data after Rex_t */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Exec_t exec; /* re.re_exec() args */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Group_t group; /* a|b or rep */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Group_catch_t group_catch; /* group catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Neg_catch_t neg_catch; /* neg catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Nest_t nest; /* nested match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char onechar; /* single char */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Rep_catch_t rep_catch; /* rep catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin String_t string; /* string/kmp */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Trie_t trie; /* trie */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin } re;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Rex_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct reglib_s /* library private regex_t info */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct Rex_s* rex; /* compiled expression */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regdisc_t* disc; /* REG_DISCIPLINE discipline */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin const regex_t* regex; /* from regexec */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* beg; /* beginning of string */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* end; /* end of string */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Vector_t* pos; /* posns of certain subpatterns */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Vector_t* bestpos; /* ditto for best match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regmatch_t* match; /* subexrs in current match */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regmatch_t* best; /* ditto in best match yet */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Stk_pos_t stk; /* exec stack pos */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t min; /* minimum match length */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin size_t nsub; /* internal re_nsub */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regflags_t flags; /* flags from regcomp() */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int error; /* last error */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int explicit; /* explicit match on this char */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int leading; /* leading match on this char */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int refs; /* regcomp()+regdup() references*/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Rex_t done; /* the last continuation */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regstat_t stats; /* for regstat() */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char fold[UCHAR_MAX+1]; /* REG_ICASE map */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char hard; /* hard comp */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char once; /* if 1st parse fails, quit */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char separate; /* cannot combine */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char stack; /* hard comp or exec */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char sub; /* re_sub is valid */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char test; /* debug/test bitmask */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Env_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct State_s /* shared state */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regmatch_t nomatch;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin struct
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char key;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin short val[15];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin } escape[52];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin short* magic[UCHAR_MAX+1];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin regdisc_t disc;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int fatal;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int initialized;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Dt_t* attrs;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Dt_t* names;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Dtdisc_t dtdisc;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} State_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinextern State_t state;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinextern void* alloc(regdisc_t*, void*, size_t);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinextern regclass_t classfun(int);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinextern void drop(regdisc_t*, Rex_t*);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinextern int fatal(regdisc_t*, int, const char*);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#endif