regexp.h revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
/*
* Copyright 1997-2002 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _REGEXP_H
#define _REGEXP_H
#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.9 */
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#define CBRA 2
#define CCHR 4
#define CDOT 8
#define CCL 12
#define CXCL 16
#define CDOL 20
#define CCEOF 22
#define CKET 24
#define CBACK 36
#define NCCL 40
#define STAR 01
#define RNGE 03
#define NBRA 9
#define PLACE(c) ep[c >> 3] |= bittab[c & 07]
#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
#define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
static char *braslist[NBRA];
static char *braelist[NBRA];
int sed, nbra;
char *loc1, *loc2, *locs;
static int nodelim;
int circf;
static int low;
static int size;
static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
#ifdef __STDC__
int advance(const char *lp, const char *ep);
static void getrnge(const char *str);
#else
int advance();
static void getrnge();
#endif
char *
#ifdef __STDC__
compile(char *instring, char *ep, const char *endbuf, int seof)
#else
compile(instring, ep, endbuf, seof)
register char *ep;
char *instring, *endbuf;
int seof;
#endif
{
INIT /* Dependent declarations and initializations */
register int c;
register int eof = seof;
char *lastep;
int cclcnt;
char bracket[NBRA], *bracketp;
int closed;
int neg;
int lc;
int i, cflg;
int iflag; /* used for non-ascii characters in brackets */
lastep = NULL;
if ((c = GETC()) == eof || c == '\n') {
if (c == '\n') {
UNGETC(c);
nodelim = 1;
}
if (*ep == 0 && !sed)
ERROR(41);
RETURN(ep);
}
bracketp = bracket;
circf = closed = nbra = 0;
if (c == '^')
circf++;
else
UNGETC(c);
while (1) {
if (ep >= endbuf)
ERROR(50);
c = GETC();
if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
lastep = ep;
if (c == eof) {
*ep++ = CCEOF;
if (bracketp != bracket)
ERROR(42);
RETURN(ep);
}
switch (c) {
case '.':
*ep++ = CDOT;
continue;
case '\n':
if (!sed) {
UNGETC(c);
*ep++ = CCEOF;
nodelim = 1;
if (bracketp != bracket)
ERROR(42);
RETURN(ep);
} else ERROR(36);
case '*':
if (lastep == NULL || *lastep == CBRA ||
*lastep == CKET)
goto defchar;
*lastep |= STAR;
continue;
case '$':
if (PEEKC() != eof && PEEKC() != '\n')
goto defchar;
*ep++ = CDOL;
continue;
case '[':
if (&ep[17] >= endbuf)
ERROR(50);
*ep++ = CCL;
lc = 0;
for (i = 0; i < 16; i++)
ep[i] = 0;
neg = 0;
if ((c = GETC()) == '^') {
neg = 1;
c = GETC();
}
iflag = 1;
do {
c &= 0377;
if (c == '\0' || c == '\n')
ERROR(49);
if ((c & 0200) && iflag) {
iflag = 0;
if (&ep[32] >= endbuf)
ERROR(50);
ep[-1] = CXCL;
for (i = 16; i < 32; i++)
ep[i] = 0;
}
if (c == '-' && lc != 0) {
if ((c = GETC()) == ']') {
PLACE('-');
break;
}
if ((c & 0200) && iflag) {
iflag = 0;
if (&ep[32] >= endbuf)
ERROR(50);
ep[-1] = CXCL;
for (i = 16; i < 32; i++)
ep[i] = 0;
}
while (lc < c) {
PLACE(lc);
lc++;
}
}
lc = c;
PLACE(c);
} while ((c = GETC()) != ']');
if (iflag)
iflag = 16;
else
iflag = 32;
if (neg) {
if (iflag == 32) {
for (cclcnt = 0; cclcnt < iflag;
cclcnt++)
ep[cclcnt] ^= 0377;
ep[0] &= 0376;
} else {
ep[-1] = NCCL;
/* make nulls match so test fails */
ep[0] |= 01;
}
}
ep += iflag;
continue;
case '\\':
switch (c = GETC()) {
case '(':
if (nbra >= NBRA)
ERROR(43);
*bracketp++ = (char)nbra;
*ep++ = CBRA;
*ep++ = (char)nbra++;
continue;
case ')':
if (bracketp <= bracket)
ERROR(42);
*ep++ = CKET;
*ep++ = *--bracketp;
closed++;
continue;
case '{':
if (lastep == NULL)
goto defchar;
*lastep |= RNGE;
cflg = 0;
nlim:
c = GETC();
i = 0;
do {
if ('0' <= c && c <= '9')
i = 10 * i + c - '0';
else
ERROR(16);
} while (((c = GETC()) != '\\') && (c != ','));
if (i >= 255)
ERROR(11);
*ep++ = (char)i;
if (c == ',') {
if (cflg++)
ERROR(44);
if ((c = GETC()) == '\\')
*ep++ = (char)255;
else {
UNGETC(c);
goto nlim;
/* get 2'nd number */
}
}
if (GETC() != '}')
ERROR(45);
if (!cflg) /* one number */
*ep++ = (char)i;
else if ((ep[-1] & 0377) < (ep[-2] & 0377))
ERROR(46);
continue;
case '\n':
ERROR(36);
case 'n':
c = '\n';
goto defchar;
default:
if (c >= '1' && c <= '9') {
if ((c -= '1') >= closed)
ERROR(25);
*ep++ = CBACK;
*ep++ = (char)c;
continue;
}
}
/* Drop through to default to use \ to turn off special chars */
defchar:
default:
lastep = ep;
*ep++ = CCHR;
*ep++ = (char)c;
}
}
}
#ifdef __STDC__
int
step(const char *p1, const char *p2)
#else
int
step(p1, p2)
register char *p1, *p2;
#endif
{
char c;
if (circf) {
loc1 = (char *)p1;
return (advance(p1, p2));
}
/* fast check for first character */
if (*p2 == CCHR) {
c = p2[1];
do {
if (*p1 != c)
continue;
if (advance(p1, p2)) {
loc1 = (char *)p1;
return (1);
}
} while (*p1++);
return (0);
}
/* regular algorithm */
do {
if (advance(p1, p2)) {
loc1 = (char *)p1;
return (1);
}
} while (*p1++);
return (0);
}
int
#ifdef __STDC__
advance(const char *lp, const char *ep)
#else
advance(lp, ep)
register char *lp, *ep;
#endif
{
#ifdef __STDC__
const char *curlp;
#else
register char *curlp;
#endif
int c;
char *bbeg;
register char neg;
size_t ct;
while (1) {
neg = 0;
switch (*ep++) {
case CCHR:
if (*ep++ == *lp++)
continue;
return (0);
/*FALLTHRU*/
case CDOT:
if (*lp++)
continue;
return (0);
/*FALLTHRU*/
case CDOL:
if (*lp == 0)
continue;
return (0);
/*FALLTHRU*/
case CCEOF:
loc2 = (char *)lp;
return (1);
/*FALLTHRU*/
case CXCL:
c = (unsigned char)*lp++;
if (ISTHERE(c)) {
ep += 32;
continue;
}
return (0);
/*FALLTHRU*/
case NCCL:
neg = 1;
/*FALLTHRU*/
case CCL:
c = *lp++;
if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
ep += 16;
continue;
}
return (0);
/*FALLTHRU*/
case CBRA:
braslist[*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
case CKET:
braelist[*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
case CCHR | RNGE:
c = *ep++;
getrnge(ep);
while (low--)
if (*lp++ != c)
return (0);
curlp = lp;
while (size--)
if (*lp++ != c)
break;
if (size < 0)
lp++;
ep += 2;
goto star;
/*FALLTHRU*/
case CDOT | RNGE:
getrnge(ep);
while (low--)
if (*lp++ == '\0')
return (0);
curlp = lp;
while (size--)
if (*lp++ == '\0')
break;
if (size < 0)
lp++;
ep += 2;
goto star;
/*FALLTHRU*/
case CXCL | RNGE:
getrnge(ep + 32);
while (low--) {
c = (unsigned char)*lp++;
if (!ISTHERE(c))
return (0);
}
curlp = lp;
while (size--) {
c = (unsigned char)*lp++;
if (!ISTHERE(c))
break;
}
if (size < 0)
lp++;
ep += 34; /* 32 + 2 */
goto star;
/*FALLTHRU*/
case NCCL | RNGE:
neg = 1;
/*FALLTHRU*/
case CCL | RNGE:
getrnge(ep + 16);
while (low--) {
c = *lp++;
if (((c & 0200) || !ISTHERE(c)) ^ neg)
return (0);
}
curlp = lp;
while (size--) {
c = *lp++;
if (((c & 0200) || !ISTHERE(c)) ^ neg)
break;
}
if (size < 0)
lp++;
ep += 18; /* 16 + 2 */
goto star;
/*FALLTHRU*/
case CBACK:
bbeg = braslist[*ep];
ct = braelist[*ep++] - bbeg;
if (ecmp(bbeg, lp, ct)) {
lp += ct;
continue;
}
return (0);
/*FALLTHRU*/
case CBACK | STAR:
bbeg = braslist[*ep];
ct = braelist[*ep++] - bbeg;
curlp = lp;
while (ecmp(bbeg, lp, ct))
lp += ct;
while (lp >= curlp) {
if (advance(lp, ep))
return (1);
lp -= ct;
}
return (0);
/*FALLTHRU*/
case CDOT | STAR:
curlp = lp;
while (*lp++);
goto star;
/*FALLTHRU*/
case CCHR | STAR:
curlp = lp;
while (*lp++ == *ep);
ep++;
goto star;
/*FALLTHRU*/
case CXCL | STAR:
curlp = lp;
do {
c = (unsigned char)*lp++;
} while (ISTHERE(c));
ep += 32;
goto star;
/*FALLTHRU*/
case NCCL | STAR:
neg = 1;
/*FALLTHRU*/
case CCL | STAR:
curlp = lp;
do {
c = *lp++;
} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
ep += 16;
goto star;
/*FALLTHRU*/
star:
do {
if (--lp == locs)
break;
if (advance(lp, ep))
return (1);
} while (lp > curlp);
return (0);
}
}
}
static void
#ifdef __STDC__
getrnge(const char *str)
#else
getrnge(str)
register char *str;
#endif
{
low = *str++ & 0377;
size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
}
#ifdef __cplusplus
}
#endif
#endif /* _REGEXP_H */