regcmp.c revision 7257d1b4d25bfac0c802847390e98a464fd787ac
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* IMPORTANT NOTE:
*
* regcmp() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
* IT IS **NOT** CHARACTER SET INDEPENDENT.
*
*/
#pragma weak _regcmp = regcmp
#include "lint.h"
#include "mtlib.h"
#include <limits.h>
#include <stdarg.h>
#include <stdlib.h>
#include <thread.h>
#include <wctype.h>
#include <widec.h>
#include <string.h>
#include "tsd.h"
/* CONSTANTS SHARED WITH regex() */
#include "regex.h"
/* PRIVATE CONSTANTS */
#define BACKSLASH '\\'
#define CIRCUMFLEX '^'
#define COMMA ','
#define DASH '-'
#define DOLLAR_SIGN '$'
#define DOT '.'
#define LEFT_CURLY_BRACE '{'
#define LEFT_PAREN '('
#define LEFT_SQUARE_BRACKET '['
#define PLUS '+'
#define RIGHT_CURLY_BRACE '}'
#define RIGHT_PAREN ')'
#define RIGHT_SQUARE_BRACKET ']'
#define SINGLE_BYTE_MASK 0xff
#define STRINGP_STACK_SIZE 50
#define STAR '*'
/* PRIVATE GLOBAL VARIABLES */
static char *compilep_stack[STRINGP_STACK_SIZE];
static char **compilep_stackp;
static mutex_t regcmp_lock = DEFAULTMUTEX;
/* DECLARATIONS OF PRIVATE FUNCTIONS */
static int add_char(char *compilep, wchar_t wchar);
static int add_single_char_expr(char *compilep, wchar_t wchar);
#define ERROR_EXIT(mutex_lockp, arg_listp, compile_startp) \
\
va_end(arg_listp); \
lmutex_unlock(mutex_lockp); \
if ((compile_startp) != (char *)0) \
free((void *)compile_startp); \
return ((char *)0)
static int get_count(int *countp, const char *regexp);
static int get_digit(const char *regexp);
static int get_wchar(wchar_t *wchar, const char *regexp);
static char *pop_compilep(void);
static char *push_compilep(char *compilep);
static boolean_t valid_range(wchar_t lower_char, wchar_t upper_char);
/* DEFINITIONS OF PUBLIC VARIABLES */
int __i_size;
/*
* define thread-specific storage for __i_size
*
*/
int *
___i_size(void)
{
if (thr_main())
return (&__i_size);
return ((int *)tsdalloc(_T_REGCMP_ISIZE, sizeof (int), NULL));
}
#define __i_size (*(___i_size()))
/* DEFINITION OF regcmp() */
extern char *
regcmp(const char *regexp, ...)
{
va_list arg_listp;
size_t arg_strlen;
boolean_t can_repeat;
int char_size;
unsigned int class_length;
char *compilep;
char *compile_startp = (char *)0;
int count_length;
wchar_t current_char;
int expr_length;
int groupn;
unsigned int group_length;
unsigned int high_bits;
boolean_t dash_indicates_range;
unsigned int low_bits;
int max_count;
int min_count;
const char *next_argp;
wchar_t first_char_in_range;
char *regex_typep;
int return_arg_number;
int substringn;
if (___i_size() == (int *)0)
return ((char *)0);
/*
* When compiling a regular expression, regcmp() generates at most
* two extra single-byte characters for each character in the
* expression, so allocating three times the number of bytes in all
* the strings that comprise the regular expression will ensure that
* regcmp() won't overwrite the end of the allocated block when
* compiling the expression.
*/
va_start(arg_listp, regexp);
next_argp = regexp;
arg_strlen = 0;
while (next_argp != (char *)0) {
arg_strlen += strlen(next_argp);
next_argp = va_arg(arg_listp, /* const */ char *);
}
va_end(arg_listp);
if (arg_strlen == 0)
return ((char *)0);
compile_startp = (char *)malloc(3 * arg_strlen);
if (compile_startp == (char *)0)
return ((char *)0);
lmutex_lock(&regcmp_lock);
__i_size = 0;
compilep = compile_startp;
compilep_stackp = &compilep_stack[STRINGP_STACK_SIZE];
/* GET THE FIRST CHARACTER IN THE REGULAR EXPRESSION */
va_start(arg_listp, regexp);
next_argp = va_arg(arg_listp, /* const */ char *);
char_size = get_wchar(&current_char, regexp);
if (char_size < 0) {
ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
} else if (char_size > 0) {
regexp += char_size;
} else /* (char_size == 0 ) */ {
regexp = next_argp;
next_argp = va_arg(arg_listp, /* const */ char *);
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
} else {
regexp += char_size;
}
}
/* FIND OUT IF THE EXPRESSION MUST START AT THE START OF A STRING */
if (current_char == CIRCUMFLEX) {
char_size = get_wchar(&current_char, regexp);
if (char_size < 0) {
ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
} else if (char_size > 0) {
regexp += char_size;
*compilep = (unsigned char)START_OF_STRING_MARK;
compilep++;
} else if /* (char_size == 0) && */ (next_argp != (char *)0) {
regexp = next_argp;
next_argp = va_arg(arg_listp, /* const */ char *);
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
regexp += char_size;
}
*compilep = (unsigned char)START_OF_STRING_MARK;
compilep++;
} else {
/* ((char_size==0) && (next_argp==(char *)0)) */
/*
* the regular expression is "^"
*/
*compilep = (unsigned char)START_OF_STRING_MARK;
compilep++;
*compilep = (unsigned char)END_REGEX;
compilep++;
*compilep = '\0';
compilep++;
__i_size = (int)(compilep - compile_startp);
va_end(arg_listp);
lmutex_unlock(&regcmp_lock);
return (compile_startp);
}
}
/* COMPILE THE REGULAR EXPRESSION */
groupn = 0;
substringn = 0;
can_repeat = B_FALSE;
for (;;) {
/*
* At the end of each iteration get the next character
* from the regular expression and increment regexp to
* point to the following character. Exit when all
* the characters in all the strings in the argument
* list have been read.
*/
switch (current_char) {
/*
* No fall-through. Each case ends with either
* a break or an error exit. Each case starts
* with compilep addressing the next location to
* be written in the compiled regular expression,
* and with regexp addressing the next character
* to be read from the regular expression being
* compiled. Each case that doesn't return
* increments regexp to address the next character
* to be read from the regular expression and
* increments compilep to address the next
* location to be written in the compiled
* regular expression.
*
* NOTE: The comments for each case give the meaning
* of the regular expression compiled by the case
* and the character string written to the compiled
* regular expression by the case. Each single
* character
* written to the compiled regular expression is
* shown enclosed in angle brackets (<>). Each
* compiled regular expression begins with a marker
* character which is shown as a named constant
* (e.g. <ASCII_CHAR>). Character constants are
* shown enclosed in single quotes (e.g. <'$'>).
* All other single characters written to the
* compiled regular expression are shown as lower
* case variable names (e.g. <ascii_char> or
* <multibyte_char>). Multicharacter
* strings written to the compiled regular expression
* are shown as variable names followed by elipses
* (e.g. <regex...>).
*/
case DOLLAR_SIGN:
/* end of string marker or simple dollar sign */
/* compiles to <END_OF_STRING_MARK> or */
/* <ASCII_CHAR><'$'> */
char_size = get_wchar(&current_char, regexp);
if ((char_size == 0) && (next_argp == (char *)0)) {
can_repeat = B_FALSE;
*compilep = (unsigned char)END_OF_STRING_MARK;
compilep++;
} else {
can_repeat = B_TRUE;
*compilep = (unsigned char)ASCII_CHAR;
regex_typep = compilep;
compilep++;
*compilep = DOLLAR_SIGN;
compilep++;
}
break; /* end case DOLLAR_SIGN */
case DOT: /* any character */
/* compiles to <ANY_CHAR> */
can_repeat = B_TRUE;
*compilep = (unsigned char)ANY_CHAR;
regex_typep = compilep;
compilep++;
break; /* end case DOT */
case BACKSLASH: /* escaped character */
/*
* compiles to <ASCII_CHAR><ascii_char> or
* <MULTIBYTE_CHAR><multibyte_char>
*/
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
regexp += char_size;
can_repeat = B_TRUE;
expr_length = add_single_char_expr(
compilep, current_char);
regex_typep = compilep;
compilep += expr_length;
}
break; /* end case '\\' */
case LEFT_SQUARE_BRACKET:
/* start of a character class expression */
/*
* [^...c...] compiles to
* <NOT_IN_CLASS><class_length><...c...>
* [^...a-z...] compiles to
* <NOT_IN_CLASS><class_length><...a<THRU>z...>
* [...c...] compiles to
* <IN_CLASS><class_length><...c...>
* [...a-z...] compiles to
* <IN_CLASS><class_length><...a<THRU>z...>
*
* NOTE: <class_length> includes the
* <class_length> byte
*/
can_repeat = B_TRUE;
regex_typep = compilep;
/* DETERMINE THE CLASS TYPE */
/*
* NOTE: This algorithm checks the value of the
* "multibyte"
* macro in <euc.h> (included in <widec.h> )
* to find out if regcmp()
* is compiling the regular expression in a
* multibyte locale.
*/
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else if (current_char == CIRCUMFLEX) {
regexp++;
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
} else {
regexp += char_size;
if (!multibyte) {
*compilep = (unsigned char)
NOT_IN_ASCII_CHAR_CLASS;
} else {
*compilep = (unsigned char)
NOT_IN_MULTIBYTE_CHAR_CLASS;
}
/* leave space for <class_length> */
compilep += 2;
}
} else {
regexp += char_size;
if (!multibyte) {
*compilep = (unsigned char)
IN_ASCII_CHAR_CLASS;
} else {
*compilep = (unsigned char)
IN_MULTIBYTE_CHAR_CLASS;
}
/* leave space for <class_length> */
compilep += 2;
}
/* COMPILE THE CLASS */
/*
* check for a leading right square bracket,
* which is allowed
*/
if (current_char == RIGHT_SQUARE_BRACKET) {
/*
* the leading RIGHT_SQUARE_BRACKET may
* be part of a character range
* expression like "[]-\]"
*/
dash_indicates_range = B_TRUE;
first_char_in_range = current_char;
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
} else {
regexp += char_size;
*compilep = RIGHT_SQUARE_BRACKET;
compilep++;
}
} else {
/*
* decode the character in the following
* while loop and decide then if it can
* be the first character
* in a character range expression
*/
dash_indicates_range = B_FALSE;
}
while (current_char != RIGHT_SQUARE_BRACKET) {
if (current_char != DASH) {
/*
* if a DASH follows current_char,
* current_char, the DASH and the
* character that follows the DASH
* may form a character range
* expression
*/
dash_indicates_range = B_TRUE;
first_char_in_range = current_char;
expr_length = add_char(
compilep, current_char);
compilep += expr_length;
} else if /* (current_char == DASH) && */
(dash_indicates_range == B_FALSE) {
/*
* current_char is a DASH, but
* either begins the entire
* character class or follows a
* character that's already
* part of a character range
* expression, so it simply
* represents the DASH character
* itself
*/
*compilep = DASH;
compilep ++;
/*
* if another DASH follows this
* one, this DASH is part
* of a character range expression
* like "[--\]"
*/
dash_indicates_range = B_TRUE;
first_char_in_range = current_char;
} else /* ((current_char == DASH && */
/* (dash_indicates_range == B_TRUE)) */ {
/*
* the DASH appears after a single
* character that isn't
* already part of a character
* range expression, so it
* and the characters preceding
* and following it can form a
* character range expression
* like "[a-z]"
*/
char_size = get_wchar(
&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
} else if (current_char ==
RIGHT_SQUARE_BRACKET) {
/*
* the preceding DASH is
* the last character in the
* class and represents the
* DASH character itself
*/
*compilep = DASH;
compilep++;
} else if (valid_range(
first_char_in_range,
current_char) == B_FALSE) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
} else {
/*
* the DASH is part of a
* character range
* expression; encode the
* rest of the expression
*/
regexp += char_size;
*compilep = (unsigned char)
THRU;
compilep++;
expr_length = add_char(
compilep, current_char);
compilep += expr_length;
/*
* if a DASH follows this
* character range
* expression,
* it represents the DASH
* character itself
*/
dash_indicates_range =
B_FALSE;
}
}
/* GET THE NEXT CHARACTER */
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
} else {
regexp += char_size;
}
}
/* end while (current_char != RIGHT_SQUARE_BRACKET) */
/* INSERT THE LENGTH OF THE CLASS INTO THE */
/* COMPILED EXPRESSION */
class_length = (unsigned int)
(compilep - regex_typep - 1);
if ((class_length < 2) ||
(class_length > MAX_SINGLE_BYTE_INT)) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
*(regex_typep + 1) = (unsigned char)
class_length;
}
break; /* end case LEFT_SQUARE_BRACKET */
case LEFT_PAREN:
/*
* start of a parenthesized group of regular
* expressions compiles to <'\0'><'\0'>, leaving
* space in the compiled regular expression for
* <group_type|ADDED_LENGTH_BITS><group_length>
*/
if (push_compilep(compilep) == (char *)0) {
/*
* groups can contain groups, so group
* start pointers
* must be saved and restored in sequence
*/
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
can_repeat = B_FALSE;
*compilep = '\0'; /* for debugging */
compilep++;
*compilep = '\0'; /* for debugging */
compilep++;
}
break; /* end case LEFT_PAREN */
case RIGHT_PAREN:
/* end of a marked group of regular expressions */
/*
* (<regex>)$0-9 compiles to
* <SAVED_GROUP><substringn><compiled_regex...>\
* <END_SAVED_GROUP><substringn><return_arg_number>
* (<regex>)* compiles to
* <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>
* <group_length> <compiled_regex...>
* <END_GROUP|ZERO_OR_MORE><groupn>
* (<regex>)+ compiles to
* <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>
* <group_length>\
* <compiled_regex...><END_GROUP|ONE_OR_MORE>
* <groupn>
* (<regex>){...} compiles to
* <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
* <compiled_regex...><END_GROUP|COUNT><groupn>\
* <minimum_repeat_count><maximum_repeat_count>
* otherwise (<regex>) compiles to
* <SIMPLE_GROUP><blank><compiled_regex...>
* <END_GROUP><groupn>
*
* NOTE:
*
* group_length + (256 * ADDED_LENGTH_BITS) ==
* length_of(<compiled_regex...><END_GROUP|...>
* <groupn>)
* which also ==
* length_of(<group_type|ADDED_LENGTH_BITS>
* <group_length>\ <compiled_regex...>)
* groupn no longer seems to be used, but the code
* still computes it to preserve backward
* compatibility
* with earlier versions of regex().
*/
/* RETRIEVE THE ADDRESS OF THE START OF THE GROUP */
regex_typep = pop_compilep();
if (regex_typep == (char *)0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
}
char_size = get_wchar(&current_char, regexp);
if (char_size < 0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else if (char_size == 0) {
*regex_typep = SIMPLE_GROUP;
can_repeat = B_TRUE;
*compilep = (unsigned char)END_GROUP;
regex_typep = compilep;
compilep++;
*compilep = (unsigned char)groupn;
groupn++;
compilep++;
} else if (current_char == DOLLAR_SIGN) {
*regex_typep = SAVED_GROUP;
regex_typep++;
*regex_typep = (char)substringn;
can_repeat = B_FALSE;
regexp ++;
return_arg_number = get_digit(regexp);
if ((return_arg_number < 0) ||
(substringn >= NSUBSTRINGS)) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
}
regexp++;
*compilep = (unsigned char)END_SAVED_GROUP;
compilep++;
*compilep = (unsigned char)substringn;
substringn++;
compilep++;
*compilep = (unsigned char)return_arg_number;
compilep++;
} else {
switch (current_char) {
case STAR:
*regex_typep = ZERO_OR_MORE_GROUP;
break;
case PLUS:
*regex_typep = ONE_OR_MORE_GROUP;
break;
case LEFT_CURLY_BRACE:
*regex_typep = COUNTED_GROUP;
break;
default:
*regex_typep = SIMPLE_GROUP;
}
if (*regex_typep != SIMPLE_GROUP) {
group_length = (unsigned int)
(compilep - regex_typep);
if (group_length >= 1024) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
}
high_bits = group_length >>
TIMES_256_SHIFT;
low_bits = group_length &
SINGLE_BYTE_MASK;
*regex_typep =
(unsigned char)
((unsigned int)
*regex_typep | high_bits);
regex_typep++;
*regex_typep =
(unsigned char)low_bits;
}
can_repeat = B_TRUE;
*compilep = (unsigned char)END_GROUP;
regex_typep = compilep;
compilep++;
*compilep = (unsigned char)groupn;
groupn++;
compilep++;
}
break; /* end case RIGHT_PAREN */
case STAR: /* zero or more repetitions of the */
/* preceding expression */
/*
* <regex...>* compiles to <regex_type|ZERO_OR_MORE>\
* <compiled_regex...>
* (<regex...>)* compiles to
* <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
* <group_length><compiled_regex...>\
* <END_GROUP|ZERO_OR_MORE><groupn>
*/
if (can_repeat == B_FALSE) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
can_repeat = B_FALSE;
*regex_typep = (unsigned char)
((unsigned int)*regex_typep | ZERO_OR_MORE);
}
break; /* end case '*' */
case PLUS:
/* one or more repetitions of the preceding */
/* expression */
/*
* <regex...>+ compiles to <regex_type|ONE_OR_MORE>\
* <compiled_regex...> (<regex...>)+ compiles to
* <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
* <group_length><compiled_regex...>\
* <END_GROUP|ONE_OR_MORE><groupn>
*/
if (can_repeat == B_FALSE) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
can_repeat = B_FALSE;
*regex_typep =
(unsigned char)((unsigned int)*
regex_typep | ONE_OR_MORE);
}
break; /* end case '+' */
case LEFT_CURLY_BRACE:
/*
* repeat the preceding regular expression
* at least min_count times
* and at most max_count times
*
* <regex...>{min_count} compiles to
* <regex type|COUNT><compiled_regex...>
* <min_count><min_count>
*
* <regex...>{min_count,} compiles to
* <regex type|COUNT><compiled_regex...>
* <min_count><UNLIMITED>
*
* <regex...>{min_count,max_count} compiles to
* <regex type>|COUNT><compiled_regex...>
* <min_count><max_count>
*
* (<regex...>){min_count,max_count} compiles to
* <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
* <compiled_regex...><END_GROUP|COUNT><groupn>\
* <minimum_match_count><maximum_match_count>
*/
if (can_repeat == B_FALSE) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
}
can_repeat = B_FALSE;
*regex_typep = (unsigned char)((unsigned int)*
regex_typep | COUNT);
count_length = get_count(&min_count, regexp);
if (count_length <= 0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
}
regexp += count_length;
if (*regexp == RIGHT_CURLY_BRACE) { /* {min_count} */
regexp++;
max_count = min_count;
} else if (*regexp == COMMA) { /* {min_count,..} */
regexp++;
/* {min_count,} */
if (*regexp == RIGHT_CURLY_BRACE) {
regexp++;
max_count = UNLIMITED;
} else { /* {min_count,max_count} */
count_length = get_count(
&max_count, regexp);
if (count_length <= 0) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
}
regexp += count_length;
if (*regexp != RIGHT_CURLY_BRACE) {
ERROR_EXIT(&regcmp_lock,
arg_listp, compile_startp);
}
regexp++;
}
} else { /* invalid expression */
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
}
if ((min_count > MAX_SINGLE_BYTE_INT) ||
((max_count != UNLIMITED) &&
(min_count > max_count))) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
*compilep = (unsigned char)min_count;
compilep++;
*compilep = (unsigned char)max_count;
compilep++;
}
break; /* end case LEFT_CURLY_BRACE */
default: /* a single non-special character */
/*
* compiles to <ASCII_CHAR><ascii_char> or
* <MULTIBYTE_CHAR><multibyte_char>
*/
can_repeat = B_TRUE;
regex_typep = compilep;
expr_length = add_single_char_expr(compilep,
current_char);
compilep += expr_length;
} /* end switch (current_char) */
/* GET THE NEXT CHARACTER FOR THE WHILE LOOP */
char_size = get_wchar(&current_char, regexp);
if (char_size < 0) {
ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
} else if (char_size > 0) {
regexp += char_size;
} else if /* (char_size == 0) && */ (next_argp != (char *)0) {
regexp = next_argp;
next_argp = va_arg(arg_listp, /* const */ char *);
char_size = get_wchar(&current_char, regexp);
if (char_size <= 0) {
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
} else {
regexp += char_size;
}
} else /* ((char_size == 0) && (next_argp == (char *)0)) */ {
if (pop_compilep() != (char *)0) {
/* unmatched parentheses */
ERROR_EXIT(&regcmp_lock, arg_listp,
compile_startp);
}
*compilep = (unsigned char)END_REGEX;
compilep++;
*compilep = '\0';
compilep++;
__i_size = (int)(compilep - compile_startp);
va_end(arg_listp);
lmutex_unlock(&regcmp_lock);
return (compile_startp);
}
} /* end for (;;) */
} /* regcmp() */
/* DEFINITIONS OF PRIVATE FUNCTIONS */
static int
add_char(char *compilep, wchar_t wchar)
{
int expr_length;
if ((unsigned int)wchar <= (unsigned int)0x7f) {
*compilep = (unsigned char)wchar;
expr_length = 1;
} else {
expr_length = wctomb(compilep, wchar);
}
return (expr_length);
}
static int
add_single_char_expr(char *compilep, wchar_t wchar)
{
int expr_length = 0;
if ((unsigned int)wchar <= (unsigned int)0x7f) {
*compilep = (unsigned char)ASCII_CHAR;
compilep++;
*compilep = (unsigned char)wchar;
expr_length += 2;
} else {
*compilep = (unsigned char)MULTIBYTE_CHAR;
compilep++;
expr_length++;
expr_length += wctomb(compilep, wchar);
}
return (expr_length);
}
static int
get_count(int *countp, const char *regexp)
{
char count_char = '0';
int count = 0;
int count_length = 0;
if (regexp == (char *)0) {
return ((int)0);
} else {
count_char = *regexp;
while (('0' <= count_char) && (count_char <= '9')) {
count = (10 * count) + (int)(count_char - '0');
count_length++;
regexp++;
count_char = *regexp;
}
}
*countp = count;
return (count_length);
}
static int
get_digit(const char *regexp)
{
char digit;
if (regexp == (char *)0) {
return ((int)-1);
} else {
digit = *regexp;
if (('0' <= digit) && (digit <= '9')) {
return ((int)(digit - '0'));
} else {
return ((int)-1);
}
}
}
static int
get_wchar(wchar_t *wcharp, const char *regexp)
{
int char_size;
if (regexp == (char *)0) {
char_size = 0;
*wcharp = (wchar_t)((unsigned int)'\0');
} else if (*regexp == '\0') {
char_size = 0;
*wcharp = (wchar_t)((unsigned int)*regexp);
} else if ((unsigned char)*regexp <= (unsigned char)0x7f) {
char_size = 1;
*wcharp = (wchar_t)((unsigned int)*regexp);
} else {
char_size = mbtowc(wcharp, regexp, MB_LEN_MAX);
}
return (char_size);
}
static char *
pop_compilep(void)
{
char *compilep;
if (compilep_stackp >= &compilep_stack[STRINGP_STACK_SIZE]) {
return ((char *)0);
} else {
compilep = *compilep_stackp;
compilep_stackp++;
return (compilep);
}
}
static char *
push_compilep(char *compilep)
{
if (compilep_stackp <= &compilep_stack[0]) {
return ((char *)0);
} else {
compilep_stackp--;
*compilep_stackp = compilep;
return (compilep);
}
}
static boolean_t
valid_range(wchar_t lower_char, wchar_t upper_char)
{
return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
!iswcntrl(lower_char) && !iswcntrl(upper_char) &&
(lower_char < upper_char)) ||
(((lower_char & WCHAR_CSMASK) ==
(upper_char & WCHAR_CSMASK)) &&
(lower_char < upper_char)));
}