rexpr.c revision 4fd606d1f5abe38e1f42c38de1d2e895166bd0f4
/*
* This file contains code for
*
* int rexpr(char *expr, char *s);
*
* which answers
*
* 1 if 's' is in the language described by the regular expression 'expr'
* 0 if it is not
* -1 if the regular expression is invalid
*
* Language membership is determined by constructing a non-deterministic
* finite automata (NFA) from the regular expression. A depth-
* first-search is performed on the NFA (graph) to check for a match of 's'.
* Each non-epsilon arc consumes one character from 's'. Backtracking is
* performed to check all possible paths through the NFA.
*
* Regular expressions follow the meta-language:
*
* <regExpr> ::= <andExpr> ( '|' <andExpr> )*
*
* <andExpr> ::= <expr> ( <expr> )*
*
* <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
* | '(' <regExpr> ')' <repeatSymbol>
* | '{' <regExpr> '}' <repeatSymbol>
* | <atom> <repeatSymbol>
*
* <repeatSymbol> ::= { '*' | '+' }
*
* <atomList> ::= <atom> ( <atom> )*
* | { <atomList> } <atom> '-' <atom> { <atomList> }
*
* <atom> ::= Token[Atom]
*
* Notes:
* ~ means complement the set in [..]. i.e. all characters not listed
* * means match 0 or more times (can be on expression or atom)
* + means match 1 or more times (can be on expression or atom)
* {} optional
* () grouping
* [] set of atoms
* x-y all characters from x to y (found only in [..])
* \xx the character with value xx
*
* Examples:
* [a-z]+
* match 1 or more lower-case letters (e.g. variable)
*
* 0x[0-9A-Fa-f]+
* match a hex number with 0x on front (e.g. 0xA1FF)
*
* [0-9]+.[0-9]+{e[0-9]+}
* match a floating point number (e.g. 3.14e21)
*
* Code example:
* if ( rexpr("[a-zA-Z][a-zA-Z0-9]+", str) ) then str is keyword
*
* Terence Parr
* Purdue University
* April 1991
*/
#include <stdio.h>
#include <ctype.h>
#ifdef __STDC__
#include <stdlib.h>
#else
#include <malloc.h>
#endif
#include "rexpr.h"
#ifdef __USE_PROTOS
static int repeatSymbol( GraphPtr g );
static int atomList( char *p, int complement );
static void next( void );
static ArcPtr newGraphArc( void );
static Graph BuildNFA_set( char *s );
#else
static int regExpr();
static int andExpr();
static int expr();
static int repeatSymbol();
static int atomList();
static void next();
static ArcPtr newGraphArc();
static int ArcBetweenGraphNode();
static Graph BuildNFA_atom();
static Graph BuildNFA_AB();
static Graph BuildNFA_AorB();
static Graph BuildNFA_set();
static Graph BuildNFA_Astar();
static Graph BuildNFA_Aplus();
static Graph BuildNFA_Aoptional();
#endif
static char *_c;
/*
* return 1 if s in language described by expr
* 0 if s is not
* -1 if expr is an invalid regular expression
*/
#ifdef __USE_PROTOS
#else
char *expr, *s;
#endif
{
NodePtr p,q;
int result;
next();
/* free all your memory */
p = q = freelist;
return result;
}
/*
* do a depth-first-search on the NFA looking for a path from start to
* accept state labelled with the characters of 's'.
*/
#ifdef __USE_PROTOS
#else
char *s;
#endif
{
ArcPtr p;
{
{
}
else if ( p->label == *s )
}
return 0;
}
/*
* <regExpr> ::= <andExpr> ( '|' {<andExpr>} )*
*
* Return -1 if syntax error
* Return 0 if none found
* Return 1 if a regExrp was found
*/
#ifdef __USE_PROTOS
#else
static int regExpr(g)
GraphPtr g;
#endif
{
{
return -1;
}
while ( token == '|' )
{
int a;
next();
if ( a == -1 ) return -1; /* syntax error below */
else if ( !a ) return 1; /* empty alternative */
}
*g = g1;
return 1;
}
/*
* <andExpr> ::= <expr> ( <expr> )*
*/
#ifdef __USE_PROTOS
#else
static int andExpr(g)
GraphPtr g;
#endif
{
{
return -1;
}
{
}
*g = g1;
return 1;
}
/*
* <expr> ::= {'~'} '[' <atomList> ']' <repeatSymbol>
* | '(' <regExpr> ')' <repeatSymbol>
* | '{' <regExpr> '}' <repeatSymbol>
* | <atom> <repeatSymbol>
*/
#ifdef __USE_PROTOS
#else
static int expr(g)
GraphPtr g;
#endif
{
int complement = 0;
char s[257]; /* alloc space for string of char in [] */
{
next();
*g = BuildNFA_set( s );
next();
repeatSymbol( g );
return 1;
}
if ( token == '(' )
{
next();
next();
repeatSymbol( g );
return 1;
}
if ( token == '{' )
{
next();
next();
/* S p e c i a l C a s e O p t i o n a l { } */
{
*g = BuildNFA_Aoptional( *g );
}
repeatSymbol( g );
return 1;
}
{
*g = BuildNFA_atom( tokchar );
next();
repeatSymbol( g );
return 1;
}
return -1;
}
/*
* <repeatSymbol> ::= { '*' | '+' }
*/
#ifdef __USE_PROTOS
static int repeatSymbol(GraphPtr g)
#else
static int repeatSymbol(g)
GraphPtr g;
#endif
{
switch ( token )
{
}
return 1;
}
/*
* <atomList> ::= <atom> { <atom> }*
* { <atomList> } <atom> '-' <atom> { <atomList> }
*
* a-b is same as ab
* q-a is same as q
*/
#ifdef __USE_PROTOS
static int atomList(char *p, int complement)
#else
static int atomList(p, complement)
char *p;
int complement;
#endif
{
char *s = p;
for (i=0; i<256; i++) set[i] = 0;
{
next();
{
next();
else
{
}
{
}
next();
}
}
*s = '\0';
if ( complement )
{
*s = '\0';
}
return 1;
}
/* a somewhat stupid lexical analyzer */
#ifdef __USE_PROTOS
static void next(void)
#else
static void next()
#endif
{
if ( *_c=='\\' )
{
_c++;
{
int n=0;
{
}
if ( n>255 ) n=255;
tokchar = n;
}
else
{
switch (*_c)
{
}
_c++;
}
}
{
}
else
{
}
}
/* N F A B u i l d i n g R o u t i n e s */
#ifdef __USE_PROTOS
static ArcPtr newGraphArc(void)
#else
static ArcPtr newGraphArc()
#endif
{
ArcPtr p;
return p;
}
#ifdef __USE_PROTOS
#else
#endif
{
NodePtr p;
freelist = p;
return p;
}
#ifdef __USE_PROTOS
#else
static void ArcBetweenGraphNodes(i, j, label)
NodePtr i, j;
int label;
#endif
{
ArcPtr a;
a = newGraphArc();
a->target = j;
}
#ifdef __USE_PROTOS
#else
int label;
#endif
{
Graph g;
return( g );
}
#ifdef __USE_PROTOS
#else
static Graph BuildNFA_AB(A, B)
Graph A, B;
#endif
{
Graph g;
return( g );
}
#ifdef __USE_PROTOS
#else
static Graph BuildNFA_AorB(A, B)
Graph A, B;
#endif
{
Graph g;
return( g );
}
#ifdef __USE_PROTOS
static Graph BuildNFA_set(char *s)
#else
static Graph BuildNFA_set( s )
char *s;
#endif
{
Graph g;
if ( s == NULL ) return g;
while ( *s != '\0' )
{
}
return g;
}
#ifdef __USE_PROTOS
#else
static Graph BuildNFA_Astar( A )
Graph A;
#endif
{
Graph g;
return( g );
}
#ifdef __USE_PROTOS
#else
static Graph BuildNFA_Aplus( A )
Graph A;
#endif
{
return( A );
}
#ifdef __USE_PROTOS
#else
static Graph BuildNFA_Aoptional( A )
Graph A;
#endif
{
Graph g;
return( g );
}