regclass.c revision da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2007 AT&T Knowledge Ventures *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Knowledge Ventures *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* RE character class support
*/
#include "reglib.h"
struct Ctype_s; typedef struct Ctype_s Ctype_t;
struct Ctype_s
{
const char* name;
size_t size;
regclass_t ctype;
Ctype_t* next;
#if _lib_wctype
wctype_t wtype;
#endif
};
static Ctype_t* ctypes;
#define CTYPES 12
#if _lib_wctype
#define WTYPES 8
#else
#define WTYPES 0
#endif
/*
* this stuff gets around posix failure to define isblank,
* and the fact that ctype functions are macros
* and any local extensions that may not even have functions or macros
*/
#if _need_iswblank
int
_reg_iswblank(wint_t wc)
{
static int initialized;
static wctype_t wt;
if (!initialized)
{
initialized = 1;
wt = wctype("blank");
}
return iswctype(wc, wt);
}
#endif
static int Isalnum(int c) { return iswalnum(c); }
static int Isalpha(int c) { return iswalpha(c); }
static int Isblank(int c) { return iswblank(c); }
static int Iscntrl(int c) { return iswcntrl(c); }
static int Isdigit(int c) { return iswdigit(c); }
static int Notdigit(int c) { return !iswdigit(c); }
static int Isgraph(int c) { return iswgraph(c); }
static int Islower(int c) { return iswlower(c); }
static int Isprint(int c) { return iswprint(c); }
static int Ispunct(int c) { return iswpunct(c); }
static int Isspace(int c) { return iswspace(c); }
static int Notspace(int c) { return !iswspace(c); }
static int Isupper(int c) { return iswupper(c); }
static int Isword(int c) { return iswalnum(c) || c == '_'; }
static int Notword(int c) { return !iswalnum(c) && c != '_'; }
static int Isxdigit(int c) { return iswxdigit(c);}
#if _lib_wctype
static int Is_wc_1(int);
static int Is_wc_2(int);
static int Is_wc_3(int);
static int Is_wc_4(int);
static int Is_wc_5(int);
static int Is_wc_6(int);
static int Is_wc_7(int);
static int Is_wc_8(int);
#endif
#define SZ(s) s,(sizeof(s)-1)
static Ctype_t ctype[] =
{
{ SZ("alnum"), Isalnum },
{ SZ("alpha"), Isalpha },
{ SZ("blank"), Isblank },
{ SZ("cntrl"), Iscntrl },
{ SZ("digit"), Isdigit },
{ SZ("graph"), Isgraph },
{ SZ("lower"), Islower },
{ SZ("print"), Isprint },
{ SZ("punct"), Ispunct },
{ SZ("space"), Isspace },
{ SZ("upper"), Isupper },
{ SZ("word"), Isword },
{ SZ("xdigit"),Isxdigit},
#if _lib_wctype
{ 0, 0, Is_wc_1 },
{ 0, 0, Is_wc_2 },
{ 0, 0, Is_wc_3 },
{ 0, 0, Is_wc_4 },
{ 0, 0, Is_wc_5 },
{ 0, 0, Is_wc_6 },
{ 0, 0, Is_wc_7 },
{ 0, 0, Is_wc_8 },
#endif
};
#if _lib_wctype
static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
#endif
/*
* return pointer to ctype function for :class:] in s
* s points to the first char after the initial [
* if e!=0 it points to next char in s
* 0 returned on error
*/
regclass_t
regclass(const char* s, char** e)
{
register Ctype_t* cp;
register int c;
register size_t n;
register const char* t;
if (c = *s++)
{
for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
if (*t != c)
return 0;
n = t - s;
for (cp = ctypes; cp; cp = cp->next)
if (n == cp->size && strneq(s, cp->name, n))
goto found;
for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
{
#if _lib_wctype
if (!cp->size && (cp->name = (const char*)memdup(s, n + 1)))
{
*((char*)cp->name + n) = 0;
/* mvs.390 needs the (char*) cast -- barf */
if (!(cp->wtype = wctype((char*)cp->name)))
{
free((char*)cp->name);
return 0;
}
cp->size = n;
goto found;
}
#endif
if (n == cp->size && strneq(s, cp->name, n))
goto found;
}
}
return 0;
found:
if (e)
*e = (char*)t + 2;
return cp->ctype;
}
/*
* associate the ctype function fun with name
*/
int
regaddclass(const char* name, regclass_t fun)
{
register Ctype_t* cp;
register Ctype_t* np;
register size_t n;
n = strlen(name);
for (cp = ctypes; cp; cp = cp->next)
if (cp->size == n && strneq(name, cp->name, n))
{
cp->ctype = fun;
return 0;
}
if (!(np = newof(0, Ctype_t, 1, n + 1)))
return REG_ESPACE;
np->size = n;
np->name = strcpy((char*)(np + 1), name);
np->ctype = fun;
np->next = ctypes;
ctypes = np;
return 0;
}
/*
* return pointer to ctype function for token
*/
regclass_t
classfun(int type)
{
switch (type)
{
case T_ALNUM: return Isword;
case T_ALNUM_NOT: return Notword;
case T_DIGIT: return Isdigit;
case T_DIGIT_NOT: return Notdigit;
case T_SPACE: return Isspace;
case T_SPACE_NOT: return Notspace;
}
return 0;
}