da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner* Copyright (c) 1985-2010 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Phong Vo <kpv@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * posix regex implementation
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * based on Doug McIlroy's C++ implementation
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Knuth-Morris-Pratt adapted from Corman-Leiserson-Rivest
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Boyer-Moore from conversations with David Korn, Phong Vo, Andrew Hume
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REG_VERSION_MAP 20030916L /* regdisc_t.re_map */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define MBSIZE(p) ((ast.tmp_int=mbsize(p))>0?ast.tmp_int:1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define RE_DUP_MAX (INT_MAX/2-1) /* 2*RE_DUP_MAX won't overflow */
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin#define REG_COMP (REG_DELIMITED|REG_ESCAPE|REG_EXTENDED|REG_FIRST|REG_ICASE|REG_NOSUB|REG_NEWLINE|REG_SHELL|REG_AUGMENTED|REG_LEFT|REG_LITERAL|REG_MINIMAL|REG_MULTIREF|REG_NULL|REG_RIGHT|REG_LENIENT|REG_MUSTDELIM)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REG_EXEC (REG_ADVANCE|REG_INVERT|REG_NOTBOL|REG_NOTEOL|REG_STARTEND)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_END_STR 16 /* final $ before tail newline */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_AHEAD_CATCH 22 /* REX_GROUP_AHEAD catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_AHEAD_NOT 23 /* inverted 0-width lookahead */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND_CATCH 25 /* REX_GROUP_BEHIND catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND_NOT 26 /* inverted 0-width lookbehind */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_BEHIND_NOT_CATCH 27 /* REX_GROUP_BEHIND_NOT catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_COND_CATCH 29 /* conditional group catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_CUT 30 /* don't backtrack over this */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define REX_GROUP_CUT_CATCH 31 /* REX_GROUP_CUT catcher */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define bitclr(p,c) ((p)[((c)>>3)&037]&=(~(1<<((c)&07))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <stdio.h> /* because <wchar.h> includes it and we generate it */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * collation element support
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * private stuff hanging off regex_t
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Rex_t subtypes
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * REX_ALT catcher, solely to get control at the end of an
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * alternative to keep records for comparing matches.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * REX_NEG catcher determines what string lengths can be matched,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * then Neg investigates continuations of other lengths.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * This is inefficient. For !POSITIONS expressions, we can do better:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * since matches to rex will be enumerated in decreasing order,
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * we can investigate continuations whenever a length is skipped.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * REX_REP catcher. One is created on the stack for
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * each iteration of a complex repetition.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * data structure for an alternation of pure strings
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * son points to a subtree of all strings with a common
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * prefix ending in character c. sib links alternate
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * letters in the same position of a word. end=1 if
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * some word ends with c. the order of strings is
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * irrelevant, except long words must be investigated
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * before short ones.
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Rex_t is a node in a regular expression
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct reglib_s /* library private regex_t info */