/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* routines to do regular expression matching
*
* Entry points:
*
* re_comp(s)
* char *s;
* ... returns 0 if the string s was compiled successfully,
* a pointer to an error message otherwise.
* If passed 0 or a null string returns without changing
* the currently compiled re (see note 11 below).
*
* re_exec(s)
* char *s;
* ... returns 1 if the string s matches the last compiled regular
* expression,
* 0 if the string s failed to match the last compiled
* regular expression, and
* -1 if the compiled regular expression was invalid
* (indicating an internal error).
*
* The strings passed to both re_comp and re_exec may have trailing or
* embedded newline characters; they are terminated by nulls.
*
* The identity of the author of these routines is lost in antiquity;
* this is essentially the same as the re code in the original V6 ed.
*
* The regular expressions recognized are described below. This description
* is essentially the same as that for ed.
*
* A regular expression specifies a set of strings of characters.
* A member of this set of strings is said to be matched by
* the regular expression. In the following specification for
* regular expressions the word `character' means any character but NUL.
*
* 1. Any character except a special character matches itself.
* Special characters are the regular expression delimiter plus
* \ [ . and sometimes ^ * $.
* 2. A . matches any character.
* 3. A \ followed by any character except a digit or ( )
* matches that character.
* 4. A nonempty string s bracketed [s] (or [^s]) matches any
* character in (or not in) s. In s, \ has no special meaning,
* and ] may only appear as the first letter. A substring
* a-b, with a and b in ascending ASCII order, stands for
* the inclusive range of ASCII characters.
* 5. A regular expression of form 1-4 followed by * matches a
* sequence of 0 or more matches of the regular expression.
* 6. A regular expression, x, of form 1-8, bracketed \(x\)
* matches what x matches.
* 7. A \ followed by a digit n matches a copy of the string that the
* bracketed regular expression beginning with the nth \( matched.
* 8. A regular expression of form 1-8, x, followed by a regular
* expression of form 1-7, y matches a match for x followed by
* a match for y, with the x match being as long as possible
* while still permitting a y match.
* 9. A regular expression of form 1-8 preceded by ^ (or followed
* by $), is constrained to matches that begin at the left
* (or end at the right) end of a line.
* 10. A regular expression of form 1-9 picks out the longest among
* the leftmost matches in a line.
* 11. An empty regular expression stands for a copy of the last
* regular expression encountered.
*/
#include "lint.h"
#include <stdlib.h>
#include <re_comp.h>
#include <stddef.h>
/*
* constants for re's
*/
static struct re_globals {
char _circf;
} *re_globals;
static int advance(const char *, char *);
static int backref(int, const char *);
static int cclass(char *, char, int);
/*
* compile the regular expression argument into a dfa
*/
char *
{
char c;
char *ep;
return ("Out of memory");
re_globals = _re;
}
if (*ep == 0)
return ("No previous regular expression");
return (NULL);
}
if (*sp == '^') {
circf = 1;
sp++;
}
else
circf = 0;
for (;;) {
if ((c = *sp++) == '\0') {
comerr("unmatched \\(");
*ep++ = 0;
return (NULL);
}
if (c != '*')
switch (c) {
case '.':
continue;
case '*':
goto defchar;
continue;
case '$':
if (*sp != '\0')
goto defchar;
continue;
case '[':
*ep++ = 0;
cclcnt = 1;
if ((c = *sp++) == '^') {
c = *sp++;
}
do {
if (c == '\0')
comerr("missing ]");
if ((c = *sp++) == ']') {
*ep++ = '-';
cclcnt++;
break;
}
while (ep[-1] < c) {
ep++;
cclcnt++;
}
}
*ep++ = c;
cclcnt++;
} while ((c = *sp++) != ']');
continue;
case '\\':
if ((c = *sp++) == '(') {
comerr("too many \\(\\) pairs");
continue;
}
if (c == ')') {
comerr("unmatched \\)");
continue;
}
*ep++ = c - '1';
continue;
}
*ep++ = c;
continue;
default:
*ep++ = c;
}
}
}
/*
* match the argument string against the compiled re
*/
int
{
char *p2;
int c;
int rv;
return (0);
for (c = 0; c < NBRA; c++) {
braslist[c] = 0;
braelist[c] = 0;
}
if (circf)
/*
* fast check for first character
*/
c = p2[1];
do {
if (*p1 != c)
continue;
return (rv);
} while (*p1++);
return (0);
}
/*
* regular algorithm
*/
do {
return (rv);
} while (*p1++);
return (0);
}
/*
* try to match the next thing in the dfa
*/
static int
{
const char *curlp;
int i;
int rv;
for (;;)
switch (*ep++) {
case CCHR:
continue;
return (0);
case CDOT:
if (*lp++)
continue;
return (0);
case CDOL:
if (*lp == '\0')
continue;
return (0);
case CEOF:
return (1);
case CCL:
continue;
}
return (0);
case NCCL:
continue;
}
return (0);
case CBRA:
continue;
case CKET:
continue;
case CBACK:
return (-1);
continue;
}
return (0);
return (-1);
return (rv);
}
continue;
while (*lp++)
;
goto star;
;
ep++;
goto star;
;
goto star;
star:
do {
lp--;
return (rv);
return (0);
default:
return (-1);
}
}
static int
{
char *bp;
return (1);
return (0);
}
static int
{
int n;
if (c == 0)
return (0);
n = *set++;
while (--n)
if (*set++ == c)
return (af);
return (! af);
}