/*
* Copyright (c) 1980 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <ctype.h>
typedef int boolean;
#define FALSE 0
#define NIL 0
extern char *l_idchars; /* set of characters legal in identifiers
in addition to letters and digits */
extern char *strchr();
static void expconv(void);
#define isidchr(c) \
/* STRNCMP - like strncmp except that we convert the
* first string to lower case before comparing
* if l_onecase is set.
*/
int
{
if (l_onecase) {
do
else {
s2++;
s1++;
}
while (--len);
} else {
do
else {
s2++;
s1++;
}
while (--len);
}
return(0);
}
/* The following routine converts an irregular expression to
* internal format.
*
* Either meta symbols (\a \d or \p) or character strings or
* operations ( alternation or parenthesizing ) can be
* specified. Each starts with a descriptor byte. The descriptor
* byte has STR set for strings, META set for meta symbols
* and OPER set for operations.
* The descriptor byte can also have the OPT bit set if the object
* defined is optional. Also ALT can be set to indicate an alternation.
*
* For metasymbols the byte following the descriptor byte identities
* the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
* strings the byte after the descriptor is a character count for
* the string:
*
* meta symbols := descriptor
* symbol
*
* strings := descriptor
* character count
* the string
*
* operations := descriptor
* symbol
* character count
*/
/*
* handy macros for accessing parts of match blocks
*/
/*
* bit flags in the descriptor
*/
char *malloc();
char *
/* re - unconverted irregular expression */
{
/* allocate room for the converted expression */
return (NIL);
if (*re == '\0')
return (NIL);
/* start the conversion with a \a */
/* start the conversion (its recursive) */
expconv ();
*ccre = 0;
return (cre);
}
static void
expconv(void)
{
char c; /* character being processed */
int temp;
/* let the conversion begin */
switch (c = *ure++) {
case '\\':
switch (c = *ure++) {
/* escaped characters are just characters */
default:
ccre += 2;
} else
*ccre++ = c;
break;
/* normal(?) metacharacters */
case 'a':
case 'd':
case 'e':
case 'p':
do {
} while (temp != 0);
}
break;
}
break;
/* just put the symbol in */
case '^':
case '$':
do {
} while (temp != 0);
}
break;
/* mark the last match sequence as optional */
case '?':
if (cs)
break;
/* recurse and define a subexpression */
case '(':
do {
} while (temp != 0);
}
expconv ();
break;
/* return from a recursion */
case ')':
do {
} while (temp != 0);
}
return;
/* mark the last match sequence as having an alternate */
/* the third byte will contain an offset to jump over the */
/* alternate match in case the first did not fail */
case '|':
else
break;
/* if its not a metasymbol just build a scharacter string */
default:
} else
*ccre++ = c;
break;
}
}
do {
} while (temp != 0);
}
}
/* end of convertre */
/*
* The following routine recognises an irregular expresion
* with the following special characters:
*
* \? - means last match was optional
* \a - matches any number of characters
* \d - matches any number of spaces and tabs
* \p - matches any number of alphanumeric
* characters. The
* characters matched will be copied into
* the area pointed to by 'name'.
* \| - alternation
* \( \) - grouping used mostly for alternation and
* optionality
*
* The irregular expression must be translated to internal form
* prior to calling this routine
*
* The value returned is the pointer to the first non \a
* character matched.
*/
char *
/* s - string to check for a match in */
/* re - a converted irregular expression */
/* mstring - where to put whatever matches a \p */
{
/* initial conditions */
return (NIL);
/* loop till expression string is exhausted (or at least pretty tired) */
while (*cs) {
/* try to match a string */
case STR:
if (matched) {
/* hoorah it matches */
/* alternation, skip to next expression */
/* the match is optional */
} else {
/* no match, error return */
return (NIL);
}
break;
/* an operator, do something fancy */
case OPER:
/* this is an alternation */
case '|':
if (matched)
/* last thing in the alternation was a match, skip ahead */
else
/* no match, keep trying */
break;
/* this is a grouping, recurse */
case '(':
/* the subexpression matched */
matched = 1;
s = ptr;
/* alternation, skip to next expression */
matched = 0;
/* the match is optional */
} else {
/* no match, error return */
return (NIL);
}
break;
}
break;
/* try to match a metasymbol */
case META:
/* try to match anything and remember what was matched */
case 'p':
/*
* This is really the same as trying the match the
* remaining parts of the expression to any subset
* of the string.
*/
s1 = s;
do {
/* we have a match, remember the match */
return (ptr);
/* it was aoptional so no match is ok */
return (ptr);
/* not optional and we still matched */
return (NIL);
}
return (NIL);
if (*s1 == '\\')
else
} while (*s1++);
return (NIL);
/* try to match anything */
case 'a':
/*
* This is really the same as trying the match the
* remaining parts of the expression to any subset
* of the string.
*/
s1 = s;
do {
/* we have a match */
return (ptr);
/* it was aoptional so no match is ok */
return (ptr);
/* not optional and we still matched */
return (NIL);
}
if (*s1 == '\\')
else
} while (*s1++);
return (NIL);
/* fail if we are currently _escaped */
case 'e':
if (_escaped)
return(NIL);
break;
/* match any number of tabs and spaces */
case 'd':
ptr = s;
while (*s == ' ' || *s == '\t')
s++;
/* match, be happy */
matched = 1;
} else if (*s == '\n' || *s == '\0') {
/* match, be happy */
matched = 1;
/* try the next part */
matched = 0;
/* doesn't matter */
matched = 1;
} else
/* no match, error return */
return (NIL);
break;
/* check for end of line */
case '$':
if (*s == '\0' || *s == '\n') {
/* match, be happy */
s++;
matched = 1;
/* try the next part */
matched = 0;
/* doesn't matter */
matched = 1;
} else
/* no match, error return */
return (NIL);
break;
/* check for start of line */
case '^':
if (s == Start) {
/* match, be happy */
matched = 1;
/* try the next part */
matched = 0;
/* doesn't matter */
matched = 1;
} else
/* no match, error return */
return (NIL);
break;
/* end of a subexpression, return success */
case ')':
return (s);
}
break;
}
}
return (s);
}