regex.c revision f6ea2b1d09668564c764e5d32ada7652068f0562
/*
* Copyright (C) 2013, 2014 Internet Systems Consortium, Inc. ("ISC")
*
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include <config.h>
#else
#endif
/*
* Validate the regular expression 'C' locale.
*/
int
isc_regex_validate(const char *c) {
enum {
/* Well known character classes. */
const char *cc[] = {
":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
":print:", ":xdigit:"
};
int group = 0;
int range = 0;
int sub = 0;
unsigned int low = 0;
unsigned int high = 0;
int range_start = 0;
const char *reason = "";
#endif
if (c == NULL || *c == 0)
FAIL("empty string");
while (c != NULL && *c != 0) {
switch (state) {
case none:
switch (*c) {
case '\\': /* make literal */
++c;
switch (*c) {
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
if ((*c - '0') > sub)
FAIL("bad back reference");
break;
case 0:
FAIL("escaped end-of-string");
default:
goto literal;
}
++c;
break;
case '[': /* bracket start */
++c;
break;
case '{': /* bound start */
switch (c[1]) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9':
if (!have_atom)
FAIL("no atom");
if (was_multiple)
FAIL("was multiple");
state = parse_bound;
break;
default:
goto literal;
}
++c;
break;
case '}':
goto literal;
case '(': /* group start */
++group;
++sub;
++c;
break;
case ')': /* group end */
FAIL("empty alternative");
if (group != 0)
--group;
++c;
break;
case '|': /* alternative seperator */
if (!have_atom)
FAIL("no atom");
++c;
break;
case '^':
case '$':
++c;
break;
case '+':
case '*':
case '?':
if (was_multiple)
FAIL("was multiple");
if (!have_atom)
FAIL("no atom");
++c;
break;
case '.':
default:
++c;
break;
}
break;
case parse_bound:
switch (*c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (!seen_comma) {
if (low > 255)
FAIL("lower bound too big");
} else {
if (high > 255)
FAIL("upper bound too big");
}
++c;
break;
case ',':
if (seen_comma)
FAIL("multiple commas");
++c;
break;
default:
case '{':
case '}':
FAIL("bad parse bound");
++c;
break;
}
break;
case parse_bracket:
switch (*c) {
case '^':
++c;
break;
case '-':
if (range == 1)
FAIL("bad range");
range = 2;
++c;
break;
case '[':
++c;
switch (*c) {
case '.': /* collating element */
++c;
break;
case '=': /* equivalence class */
if (range == 2)
FAIL("equivalence class in range");
++c;
break;
case ':': /* character class */
if (range == 2)
FAIL("character class in range");
ccname = c;
++c;
break;
}
break;
case ']':
if (!c[1] && !seen_char)
FAIL("unfinished brace");
if (!seen_char)
goto inside;
++c;
range = 0;
break;
default:
FAIL("out of order range");
if (range != 0)
--range;
range_start = *c & 0xff;
++c;
break;
};
break;
case parse_ce:
switch (*c) {
case '.':
++c;
switch (*c) {
case ']':
if (!seen_ce)
FAIL("empty ce");
++c;
break;
default:
if (seen_ce)
range_start = 256;
else
range_start = '.';
break;
}
break;
default:
if (seen_ce)
range_start = 256;
else
range_start = *c;
++c;
break;
}
break;
case parse_ec:
switch (*c) {
case '=':
++c;
switch (*c) {
case ']':
if (!seen_ec)
FAIL("no ec");
++c;
break;
default:
break;
}
break;
default:
++c;
break;
}
break;
case parse_cc:
switch (*c) {
case ':':
++c;
switch (*c) {
case ']': {
unsigned int i;
for (i = 0;
i++)
{
unsigned int len;
if (len !=
(unsigned int)(c - ccname))
continue;
continue;
}
if (!found)
FAIL("unknown cc");
++c;
break;
}
default:
break;
}
break;
default:
++c;
break;
}
break;
}
}
if (group != 0)
FAIL("group open");
FAIL("incomplete");
if (!have_atom)
FAIL("no atom");
return (sub);
#endif
return (-1);
}