reglib.h revision 7c2fbfb345896881c631598ee3852ce9ce33fb07
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2008 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* posix regex implementation
*
* based on Doug McIlroy's C++ implementation
* Knuth-Morris-Pratt adapted from Corman-Leiserson-Rivest
* Boyer-Moore from conversations with David Korn, Phong Vo, Andrew Hume
*/
#ifndef _REGLIB_H
#define _REGLIB_H
#define REG_VERSION_EXEC 20020509L
#define alloc _reg_alloc
#define classfun _reg_classfun
#define fatal _reg_fatal
#define state _reg_state
typedef struct regsubop_s
{
int op; /* REG_SUB_LOWER,REG_SUB_UPPER */
int off; /* re_rhs or match[] offset */
int len; /* re_rhs len or len==0 match[] */
} regsubop_t;
#define _REG_SUB_PRIVATE_ \
char* re_cur; /* re_buf cursor */ \
char* re_end; /* re_buf end */ \
#include <ast.h>
#include <cdt.h>
#include <stk.h>
#include "regex.h"
#include <ctype.h>
#include <errno.h>
#define BACK_REF_MAX 9
#define REG_COMP (REG_DELIMITED|REG_ESCAPE|REG_EXTENDED|REG_FIRST|REG_ICASE|REG_NOSUB|REG_NEWLINE|REG_SHELL|REG_AUGMENTED|REG_LEFT|REG_LITERAL|REG_MINIMAL|REG_MULTIREF|REG_NULL|REG_RIGHT|REG_LENIENT|REG_MUSTDELIM)
#define REX_NULL 0 /* null string (internal) */
#define BRE 0
#define ERE 3
#define ARE 6
#define SRE 9
#define KRE 12
#include <wchar.h>
#if _hdr_wctype
#include <wctype.h>
#endif
#if !defined(iswblank) && !_lib_iswblank
#define _need_iswblank 1
#define iswblank(x) _reg_iswblank(x)
extern int _reg_iswblank(wint_t);
#endif
#if !defined(towupper) && !_lib_towupper
#endif
#if !defined(towlower) && !_lib_towlower
#endif
#else
#ifndef iswalnum
#endif
#ifndef iswalpha
#endif
#ifndef iswcntrl
#endif
#ifndef iswdigit
#endif
#ifndef iswgraph
#endif
#ifndef iswlower
#endif
#ifndef iswprint
#endif
#ifndef iswpunct
#endif
#ifndef iswspace
#endif
#ifndef iswupper
#endif
#ifndef iswxdigit
#endif
#ifndef towlower
#endif
#ifndef towupper
#endif
#endif
#ifndef iswblank
#endif
#ifndef iswgraph
#endif
/*
* collation element support
*/
#define COLL_KEY_MAX 32
#if COLL_KEY_MAX < MB_LEN_MAX
#define COLL_KEY_MAX MB_LEN_MAX
#endif
#define COLL_end 0
#define COLL_call 1
#define COLL_char 2
#define COLL_range 3
#define COLL_range_lc 4
#define COLL_range_uc 5
typedef struct Celt_s
{
short typ;
short min;
short max;
} Celt_t;
/*
* private stuff hanging off regex_t
*/
typedef struct Stk_pos_s
{
char* base;
} Stk_pos_t;
typedef struct Vector_s
{
char* vec; /* the data */
int inc; /* growth increment */
int siz; /* element size */
int max; /* max index */
int cur; /* current index -- user domain */
} Vector_t;
/*
* Rex_t subtypes
*/
typedef struct Cond_s
{
unsigned char* beg; /* beginning of next match */
int yes; /* yes condition hit */
} Cond_t;
typedef struct Conj_left_s
{
unsigned char* beg; /* beginning of left match */
} Conj_left_t;
typedef struct Conj_right_s
{
unsigned char* end; /* end of left match */
} Conj_right_t;
typedef unsigned int Bm_mask_t;
typedef struct Bm_s
{
} Bm_t;
typedef struct String_s
{
int* fail;
unsigned char* base;
} String_t;
typedef struct Set_s
{
} Set_t;
typedef struct Collate_s
{
int invert;
} Collate_t;
typedef struct Binary_s
{
int serial;
} Binary_t;
typedef struct Group_s
{
int number; /* group number */
int last; /* last contained group number */
int size; /* lookbehind size */
int back; /* backreferenced */
union
{
} expr;
} Group_t;
typedef struct Exec_s
{
void* data;
const char* text;
} Exec_t;
#define REX_NEST_open 0x01
#define REX_NEST_close 0x02
#define REX_NEST_escape 0x04
#define REX_NEST_quote 0x08
#define REX_NEST_literal 0x10
#define REX_NEST_delimiter 0x20
#define REX_NEST_terminator 0x40
#define REX_NEST_separator 0x80
#define REX_NEST_SHIFT 8
typedef struct Nest_s
{
int primary;
unsigned short type[1];
} Nest_t;
/*
* REX_ALT catcher, solely to get control at the end of an
* alternative to keep records for comparing matches.
*/
typedef struct Alt_catch_s
{
} Alt_catch_t;
typedef struct Group_catch_s
{
typedef struct Behind_catch_s
{
unsigned char* beg;
unsigned char* end;
/*
* REX_NEG catcher determines what string lengths can be matched,
* then Neg investigates continuations of other lengths.
* This is inefficient. For !POSITIONS expressions, we can do better:
* since matches to rex will be enumerated in decreasing order,
* we can investigate continuations whenever a length is skipped.
*/
typedef struct Neg_catch_s
{
unsigned char* beg;
unsigned char* index;
} Neg_catch_t;
/*
* REX_REP catcher. One is created on the stack for
* each iteration of a complex repetition.
*/
typedef struct Rep_catch_s
{
unsigned char* beg;
int n;
} Rep_catch_t;
/*
* data structure for an alternation of pure strings
* son points to a subtree of all strings with a common
* prefix ending in character c. sib links alternate
* letters in the same position of a word. end=1 if
* some word ends with c. the order of strings is
* irrelevant, except long words must be investigated
* before short ones.
*/
typedef struct Trie_node_s
{
unsigned char c;
unsigned char end;
struct Trie_node_s* son;
struct Trie_node_s* sib;
} Trie_node_t;
typedef struct Trie_s
{
Trie_node_t** root;
int min;
int max;
} Trie_t;
/*
* Rex_t is a node in a regular expression
*/
typedef struct Rex_s
{
unsigned char type; /* node type */
unsigned char marked; /* already marked */
short serial; /* subpattern number */
int explicit; /* scoped explicit match*/
int lo; /* lo dup count */
int hi; /* hi dup count */
union
{
void* data; /* data after Rex_t */
unsigned char onechar; /* single char */
} re;
} Rex_t;
typedef struct reglib_s /* library private regex_t info */
{
unsigned char* beg; /* beginning of string */
unsigned char* end; /* end of string */
int error; /* last error */
int explicit; /* explicit match on this char */
int leading; /* leading match on this char */
int refs; /* regcomp()+regdup() references*/
unsigned char hard; /* hard comp */
unsigned char once; /* if 1st parse fails, quit */
unsigned char separate; /* cannot combine */
unsigned char stack; /* hard comp or exec */
unsigned char sub; /* re_sub is valid */
} Env_t;
typedef struct State_s /* shared state */
{
struct
{
unsigned char key;
short val[15];
} escape[52];
int fatal;
int initialized;
} State_t;
extern regclass_t classfun(int);
#endif