2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2N/A/* All Rights Reserved */
2N/A
2N/A#pragma ident "%Z%%M% %I% %E% SMI"
2N/A
2N/A/*
2N/A * IMPORTANT NOTE:
2N/A *
2N/A * regcmp() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
2N/A * IT IS **NOT** CHARACTER SET INDEPENDENT.
2N/A *
2N/A */
2N/A
2N/A#pragma weak _regcmp = regcmp
2N/A
2N/A#include "lint.h"
2N/A#include "mtlib.h"
2N/A#include <limits.h>
2N/A#include <stdarg.h>
2N/A#include <stdlib.h>
2N/A#include <thread.h>
2N/A#include <wctype.h>
2N/A#include <widec.h>
2N/A#include <string.h>
2N/A#include "tsd.h"
2N/A
2N/A
2N/A/* CONSTANTS SHARED WITH regex() */
2N/A
2N/A#include "regex.h"
2N/A
2N/A/* PRIVATE CONSTANTS */
2N/A
2N/A#define BACKSLASH '\\'
2N/A#define CIRCUMFLEX '^'
2N/A#define COMMA ','
2N/A#define DASH '-'
2N/A#define DOLLAR_SIGN '$'
2N/A#define DOT '.'
2N/A#define LEFT_CURLY_BRACE '{'
2N/A#define LEFT_PAREN '('
2N/A#define LEFT_SQUARE_BRACKET '['
2N/A#define PLUS '+'
2N/A#define RIGHT_CURLY_BRACE '}'
2N/A#define RIGHT_PAREN ')'
2N/A#define RIGHT_SQUARE_BRACKET ']'
2N/A#define SINGLE_BYTE_MASK 0xff
2N/A#define STRINGP_STACK_SIZE 50
2N/A#define STAR '*'
2N/A
2N/A/* PRIVATE GLOBAL VARIABLES */
2N/A
2N/Astatic char *compilep_stack[STRINGP_STACK_SIZE];
2N/Astatic char **compilep_stackp;
2N/Astatic mutex_t regcmp_lock = DEFAULTMUTEX;
2N/A
2N/A/* DECLARATIONS OF PRIVATE FUNCTIONS */
2N/A
2N/Astatic int add_char(char *compilep, wchar_t wchar);
2N/Astatic int add_single_char_expr(char *compilep, wchar_t wchar);
2N/A
2N/A#define ERROR_EXIT(mutex_lockp, arg_listp, compile_startp) \
2N/A\
2N/A va_end(arg_listp); \
2N/A lmutex_unlock(mutex_lockp); \
2N/A if ((compile_startp) != (char *)0) \
2N/A free((void *)compile_startp); \
2N/A return ((char *)0)
2N/A
2N/Astatic int get_count(int *countp, const char *regexp);
2N/Astatic int get_digit(const char *regexp);
2N/Astatic int get_wchar(wchar_t *wchar, const char *regexp);
2N/Astatic char *pop_compilep(void);
2N/Astatic char *push_compilep(char *compilep);
2N/Astatic boolean_t valid_range(wchar_t lower_char, wchar_t upper_char);
2N/A
2N/A
2N/A/* DEFINITIONS OF PUBLIC VARIABLES */
2N/A
2N/Aint __i_size;
2N/A
2N/A/*
2N/A * define thread-specific storage for __i_size
2N/A *
2N/A */
2N/Aint *
2N/A___i_size(void)
2N/A{
2N/A if (thr_main())
2N/A return (&__i_size);
2N/A return ((int *)tsdalloc(_T_REGCMP_ISIZE, sizeof (int), NULL));
2N/A}
2N/A
2N/A#define __i_size (*(___i_size()))
2N/A
2N/A/* DEFINITION OF regcmp() */
2N/A
2N/Aextern char *
2N/Aregcmp(const char *regexp, ...)
2N/A{
2N/A va_list arg_listp;
2N/A size_t arg_strlen;
2N/A boolean_t can_repeat;
2N/A int char_size;
2N/A unsigned int class_length;
2N/A char *compilep;
2N/A char *compile_startp = (char *)0;
2N/A int count_length;
2N/A wchar_t current_char;
2N/A int expr_length;
2N/A int groupn;
2N/A unsigned int group_length;
2N/A unsigned int high_bits;
2N/A boolean_t dash_indicates_range;
2N/A unsigned int low_bits;
2N/A int max_count;
2N/A int min_count;
2N/A const char *next_argp;
2N/A wchar_t first_char_in_range;
2N/A char *regex_typep;
2N/A int return_arg_number;
2N/A int substringn;
2N/A
2N/A if (___i_size() == (int *)0)
2N/A return ((char *)0);
2N/A
2N/A /*
2N/A * When compiling a regular expression, regcmp() generates at most
2N/A * two extra single-byte characters for each character in the
2N/A * expression, so allocating three times the number of bytes in all
2N/A * the strings that comprise the regular expression will ensure that
2N/A * regcmp() won't overwrite the end of the allocated block when
2N/A * compiling the expression.
2N/A */
2N/A
2N/A va_start(arg_listp, regexp);
2N/A next_argp = regexp;
2N/A arg_strlen = 0;
2N/A while (next_argp != (char *)0) {
2N/A arg_strlen += strlen(next_argp);
2N/A next_argp = va_arg(arg_listp, /* const */ char *);
2N/A }
2N/A va_end(arg_listp);
2N/A
2N/A if (arg_strlen == 0)
2N/A return ((char *)0);
2N/A compile_startp = (char *)malloc(3 * arg_strlen);
2N/A if (compile_startp == (char *)0)
2N/A return ((char *)0);
2N/A
2N/A lmutex_lock(&regcmp_lock);
2N/A __i_size = 0;
2N/A compilep = compile_startp;
2N/A compilep_stackp = &compilep_stack[STRINGP_STACK_SIZE];
2N/A
2N/A /* GET THE FIRST CHARACTER IN THE REGULAR EXPRESSION */
2N/A va_start(arg_listp, regexp);
2N/A next_argp = va_arg(arg_listp, /* const */ char *);
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size < 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
2N/A } else if (char_size > 0) {
2N/A regexp += char_size;
2N/A } else /* (char_size == 0 ) */ {
2N/A regexp = next_argp;
2N/A next_argp = va_arg(arg_listp, /* const */ char *);
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A }
2N/A }
2N/A
2N/A /* FIND OUT IF THE EXPRESSION MUST START AT THE START OF A STRING */
2N/A
2N/A if (current_char == CIRCUMFLEX) {
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size < 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
2N/A } else if (char_size > 0) {
2N/A regexp += char_size;
2N/A *compilep = (unsigned char)START_OF_STRING_MARK;
2N/A compilep++;
2N/A } else if /* (char_size == 0) && */ (next_argp != (char *)0) {
2N/A regexp = next_argp;
2N/A next_argp = va_arg(arg_listp, /* const */ char *);
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A }
2N/A *compilep = (unsigned char)START_OF_STRING_MARK;
2N/A compilep++;
2N/A } else {
2N/A /* ((char_size==0) && (next_argp==(char *)0)) */
2N/A /*
2N/A * the regular expression is "^"
2N/A */
2N/A *compilep = (unsigned char)START_OF_STRING_MARK;
2N/A compilep++;
2N/A *compilep = (unsigned char)END_REGEX;
2N/A compilep++;
2N/A *compilep = '\0';
2N/A compilep++;
2N/A __i_size = (int)(compilep - compile_startp);
2N/A va_end(arg_listp);
2N/A lmutex_unlock(&regcmp_lock);
2N/A return (compile_startp);
2N/A }
2N/A }
2N/A
2N/A /* COMPILE THE REGULAR EXPRESSION */
2N/A
2N/A groupn = 0;
2N/A substringn = 0;
2N/A can_repeat = B_FALSE;
2N/A for (;;) {
2N/A
2N/A /*
2N/A * At the end of each iteration get the next character
2N/A * from the regular expression and increment regexp to
2N/A * point to the following character. Exit when all
2N/A * the characters in all the strings in the argument
2N/A * list have been read.
2N/A */
2N/A
2N/A switch (current_char) {
2N/A
2N/A /*
2N/A * No fall-through. Each case ends with either
2N/A * a break or an error exit. Each case starts
2N/A * with compilep addressing the next location to
2N/A * be written in the compiled regular expression,
2N/A * and with regexp addressing the next character
2N/A * to be read from the regular expression being
2N/A * compiled. Each case that doesn't return
2N/A * increments regexp to address the next character
2N/A * to be read from the regular expression and
2N/A * increments compilep to address the next
2N/A * location to be written in the compiled
2N/A * regular expression.
2N/A *
2N/A * NOTE: The comments for each case give the meaning
2N/A * of the regular expression compiled by the case
2N/A * and the character string written to the compiled
2N/A * regular expression by the case. Each single
2N/A * character
2N/A * written to the compiled regular expression is
2N/A * shown enclosed in angle brackets (<>). Each
2N/A * compiled regular expression begins with a marker
2N/A * character which is shown as a named constant
2N/A * (e.g. <ASCII_CHAR>). Character constants are
2N/A * shown enclosed in single quotes (e.g. <'$'>).
2N/A * All other single characters written to the
2N/A * compiled regular expression are shown as lower
2N/A * case variable names (e.g. <ascii_char> or
2N/A * <multibyte_char>). Multicharacter
2N/A * strings written to the compiled regular expression
2N/A * are shown as variable names followed by elipses
2N/A * (e.g. <regex...>).
2N/A */
2N/A
2N/A case DOLLAR_SIGN:
2N/A /* end of string marker or simple dollar sign */
2N/A /* compiles to <END_OF_STRING_MARK> or */
2N/A /* <ASCII_CHAR><'$'> */
2N/A
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if ((char_size == 0) && (next_argp == (char *)0)) {
2N/A can_repeat = B_FALSE;
2N/A *compilep = (unsigned char)END_OF_STRING_MARK;
2N/A compilep++;
2N/A } else {
2N/A can_repeat = B_TRUE;
2N/A *compilep = (unsigned char)ASCII_CHAR;
2N/A regex_typep = compilep;
2N/A compilep++;
2N/A *compilep = DOLLAR_SIGN;
2N/A compilep++;
2N/A }
2N/A break; /* end case DOLLAR_SIGN */
2N/A
2N/A case DOT: /* any character */
2N/A
2N/A /* compiles to <ANY_CHAR> */
2N/A
2N/A can_repeat = B_TRUE;
2N/A *compilep = (unsigned char)ANY_CHAR;
2N/A regex_typep = compilep;
2N/A compilep++;
2N/A
2N/A break; /* end case DOT */
2N/A
2N/A case BACKSLASH: /* escaped character */
2N/A
2N/A /*
2N/A * compiles to <ASCII_CHAR><ascii_char> or
2N/A * <MULTIBYTE_CHAR><multibyte_char>
2N/A */
2N/A
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A can_repeat = B_TRUE;
2N/A expr_length = add_single_char_expr(
2N/A compilep, current_char);
2N/A regex_typep = compilep;
2N/A compilep += expr_length;
2N/A }
2N/A break; /* end case '\\' */
2N/A
2N/A case LEFT_SQUARE_BRACKET:
2N/A /* start of a character class expression */
2N/A
2N/A /*
2N/A * [^...c...] compiles to
2N/A * <NOT_IN_CLASS><class_length><...c...>
2N/A * [^...a-z...] compiles to
2N/A * <NOT_IN_CLASS><class_length><...a<THRU>z...>
2N/A * [...c...] compiles to
2N/A * <IN_CLASS><class_length><...c...>
2N/A * [...a-z...] compiles to
2N/A * <IN_CLASS><class_length><...a<THRU>z...>
2N/A *
2N/A * NOTE: <class_length> includes the
2N/A * <class_length> byte
2N/A */
2N/A
2N/A can_repeat = B_TRUE;
2N/A regex_typep = compilep;
2N/A
2N/A /* DETERMINE THE CLASS TYPE */
2N/A
2N/A /*
2N/A * NOTE: This algorithm checks the value of the
2N/A * "multibyte"
2N/A * macro in <euc.h> (included in <widec.h> )
2N/A * to find out if regcmp()
2N/A * is compiling the regular expression in a
2N/A * multibyte locale.
2N/A */
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else if (current_char == CIRCUMFLEX) {
2N/A regexp++;
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A if (!multibyte) {
2N/A *compilep = (unsigned char)
2N/A NOT_IN_ASCII_CHAR_CLASS;
2N/A } else {
2N/A *compilep = (unsigned char)
2N/A NOT_IN_MULTIBYTE_CHAR_CLASS;
2N/A }
2N/A /* leave space for <class_length> */
2N/A compilep += 2;
2N/A }
2N/A } else {
2N/A regexp += char_size;
2N/A if (!multibyte) {
2N/A *compilep = (unsigned char)
2N/A IN_ASCII_CHAR_CLASS;
2N/A } else {
2N/A *compilep = (unsigned char)
2N/A IN_MULTIBYTE_CHAR_CLASS;
2N/A }
2N/A /* leave space for <class_length> */
2N/A compilep += 2;
2N/A }
2N/A
2N/A /* COMPILE THE CLASS */
2N/A /*
2N/A * check for a leading right square bracket,
2N/A * which is allowed
2N/A */
2N/A
2N/A if (current_char == RIGHT_SQUARE_BRACKET) {
2N/A /*
2N/A * the leading RIGHT_SQUARE_BRACKET may
2N/A * be part of a character range
2N/A * expression like "[]-\]"
2N/A */
2N/A dash_indicates_range = B_TRUE;
2N/A first_char_in_range = current_char;
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A *compilep = RIGHT_SQUARE_BRACKET;
2N/A compilep++;
2N/A }
2N/A } else {
2N/A /*
2N/A * decode the character in the following
2N/A * while loop and decide then if it can
2N/A * be the first character
2N/A * in a character range expression
2N/A */
2N/A dash_indicates_range = B_FALSE;
2N/A }
2N/A
2N/A while (current_char != RIGHT_SQUARE_BRACKET) {
2N/A if (current_char != DASH) {
2N/A /*
2N/A * if a DASH follows current_char,
2N/A * current_char, the DASH and the
2N/A * character that follows the DASH
2N/A * may form a character range
2N/A * expression
2N/A */
2N/A dash_indicates_range = B_TRUE;
2N/A first_char_in_range = current_char;
2N/A expr_length = add_char(
2N/A compilep, current_char);
2N/A compilep += expr_length;
2N/A
2N/A } else if /* (current_char == DASH) && */
2N/A (dash_indicates_range == B_FALSE) {
2N/A /*
2N/A * current_char is a DASH, but
2N/A * either begins the entire
2N/A * character class or follows a
2N/A * character that's already
2N/A * part of a character range
2N/A * expression, so it simply
2N/A * represents the DASH character
2N/A * itself
2N/A */
2N/A *compilep = DASH;
2N/A compilep ++;
2N/A /*
2N/A * if another DASH follows this
2N/A * one, this DASH is part
2N/A * of a character range expression
2N/A * like "[--\]"
2N/A */
2N/A dash_indicates_range = B_TRUE;
2N/A first_char_in_range = current_char;
2N/A
2N/A } else /* ((current_char == DASH && */
2N/A /* (dash_indicates_range == B_TRUE)) */ {
2N/A /*
2N/A * the DASH appears after a single
2N/A * character that isn't
2N/A * already part of a character
2N/A * range expression, so it
2N/A * and the characters preceding
2N/A * and following it can form a
2N/A * character range expression
2N/A * like "[a-z]"
2N/A */
2N/A char_size = get_wchar(
2N/A &current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A
2N/A } else if (current_char ==
2N/A RIGHT_SQUARE_BRACKET) {
2N/A /*
2N/A * the preceding DASH is
2N/A * the last character in the
2N/A * class and represents the
2N/A * DASH character itself
2N/A */
2N/A *compilep = DASH;
2N/A compilep++;
2N/A
2N/A } else if (valid_range(
2N/A first_char_in_range,
2N/A current_char) == B_FALSE) {
2N/A
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A
2N/A } else {
2N/A /*
2N/A * the DASH is part of a
2N/A * character range
2N/A * expression; encode the
2N/A * rest of the expression
2N/A */
2N/A regexp += char_size;
2N/A *compilep = (unsigned char)
2N/A THRU;
2N/A compilep++;
2N/A expr_length = add_char(
2N/A compilep, current_char);
2N/A compilep += expr_length;
2N/A /*
2N/A * if a DASH follows this
2N/A * character range
2N/A * expression,
2N/A * it represents the DASH
2N/A * character itself
2N/A */
2N/A dash_indicates_range =
2N/A B_FALSE;
2N/A }
2N/A }
2N/A
2N/A /* GET THE NEXT CHARACTER */
2N/A
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A }
2N/A
2N/A }
2N/A /* end while (current_char != RIGHT_SQUARE_BRACKET) */
2N/A
2N/A /* INSERT THE LENGTH OF THE CLASS INTO THE */
2N/A /* COMPILED EXPRESSION */
2N/A
2N/A class_length = (unsigned int)
2N/A (compilep - regex_typep - 1);
2N/A if ((class_length < 2) ||
2N/A (class_length > MAX_SINGLE_BYTE_INT)) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A *(regex_typep + 1) = (unsigned char)
2N/A class_length;
2N/A }
2N/A break; /* end case LEFT_SQUARE_BRACKET */
2N/A
2N/A case LEFT_PAREN:
2N/A
2N/A /*
2N/A * start of a parenthesized group of regular
2N/A * expressions compiles to <'\0'><'\0'>, leaving
2N/A * space in the compiled regular expression for
2N/A * <group_type|ADDED_LENGTH_BITS><group_length>
2N/A */
2N/A
2N/A if (push_compilep(compilep) == (char *)0) {
2N/A /*
2N/A * groups can contain groups, so group
2N/A * start pointers
2N/A * must be saved and restored in sequence
2N/A */
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A can_repeat = B_FALSE;
2N/A *compilep = '\0'; /* for debugging */
2N/A compilep++;
2N/A *compilep = '\0'; /* for debugging */
2N/A compilep++;
2N/A }
2N/A break; /* end case LEFT_PAREN */
2N/A
2N/A case RIGHT_PAREN:
2N/A /* end of a marked group of regular expressions */
2N/A
2N/A /*
2N/A * (<regex>)$0-9 compiles to
2N/A * <SAVED_GROUP><substringn><compiled_regex...>\
2N/A * <END_SAVED_GROUP><substringn><return_arg_number>
2N/A * (<regex>)* compiles to
2N/A * <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>
2N/A * <group_length> <compiled_regex...>
2N/A * <END_GROUP|ZERO_OR_MORE><groupn>
2N/A * (<regex>)+ compiles to
2N/A * <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>
2N/A * <group_length>\
2N/A * <compiled_regex...><END_GROUP|ONE_OR_MORE>
2N/A * <groupn>
2N/A * (<regex>){...} compiles to
2N/A * <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
2N/A * <compiled_regex...><END_GROUP|COUNT><groupn>\
2N/A * <minimum_repeat_count><maximum_repeat_count>
2N/A * otherwise (<regex>) compiles to
2N/A * <SIMPLE_GROUP><blank><compiled_regex...>
2N/A * <END_GROUP><groupn>
2N/A *
2N/A * NOTE:
2N/A *
2N/A * group_length + (256 * ADDED_LENGTH_BITS) ==
2N/A * length_of(<compiled_regex...><END_GROUP|...>
2N/A * <groupn>)
2N/A * which also ==
2N/A * length_of(<group_type|ADDED_LENGTH_BITS>
2N/A * <group_length>\ <compiled_regex...>)
2N/A * groupn no longer seems to be used, but the code
2N/A * still computes it to preserve backward
2N/A * compatibility
2N/A * with earlier versions of regex().
2N/A */
2N/A
2N/A /* RETRIEVE THE ADDRESS OF THE START OF THE GROUP */
2N/A
2N/A regex_typep = pop_compilep();
2N/A if (regex_typep == (char *)0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A }
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size < 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else if (char_size == 0) {
2N/A *regex_typep = SIMPLE_GROUP;
2N/A can_repeat = B_TRUE;
2N/A *compilep = (unsigned char)END_GROUP;
2N/A regex_typep = compilep;
2N/A compilep++;
2N/A *compilep = (unsigned char)groupn;
2N/A groupn++;
2N/A compilep++;
2N/A } else if (current_char == DOLLAR_SIGN) {
2N/A *regex_typep = SAVED_GROUP;
2N/A regex_typep++;
2N/A *regex_typep = (char)substringn;
2N/A can_repeat = B_FALSE;
2N/A regexp ++;
2N/A return_arg_number = get_digit(regexp);
2N/A if ((return_arg_number < 0) ||
2N/A (substringn >= NSUBSTRINGS)) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A }
2N/A regexp++;
2N/A *compilep = (unsigned char)END_SAVED_GROUP;
2N/A compilep++;
2N/A *compilep = (unsigned char)substringn;
2N/A substringn++;
2N/A compilep++;
2N/A *compilep = (unsigned char)return_arg_number;
2N/A compilep++;
2N/A } else {
2N/A switch (current_char) {
2N/A case STAR:
2N/A *regex_typep = ZERO_OR_MORE_GROUP;
2N/A break;
2N/A case PLUS:
2N/A *regex_typep = ONE_OR_MORE_GROUP;
2N/A break;
2N/A case LEFT_CURLY_BRACE:
2N/A *regex_typep = COUNTED_GROUP;
2N/A break;
2N/A default:
2N/A *regex_typep = SIMPLE_GROUP;
2N/A }
2N/A if (*regex_typep != SIMPLE_GROUP) {
2N/A group_length = (unsigned int)
2N/A (compilep - regex_typep);
2N/A if (group_length >= 1024) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A }
2N/A high_bits = group_length >>
2N/A TIMES_256_SHIFT;
2N/A low_bits = group_length &
2N/A SINGLE_BYTE_MASK;
2N/A *regex_typep =
2N/A (unsigned char)
2N/A ((unsigned int)
2N/A *regex_typep | high_bits);
2N/A regex_typep++;
2N/A *regex_typep =
2N/A (unsigned char)low_bits;
2N/A }
2N/A can_repeat = B_TRUE;
2N/A *compilep = (unsigned char)END_GROUP;
2N/A regex_typep = compilep;
2N/A compilep++;
2N/A *compilep = (unsigned char)groupn;
2N/A groupn++;
2N/A compilep++;
2N/A }
2N/A
2N/A break; /* end case RIGHT_PAREN */
2N/A
2N/A case STAR: /* zero or more repetitions of the */
2N/A /* preceding expression */
2N/A
2N/A /*
2N/A * <regex...>* compiles to <regex_type|ZERO_OR_MORE>\
2N/A * <compiled_regex...>
2N/A * (<regex...>)* compiles to
2N/A * <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
2N/A * <group_length><compiled_regex...>\
2N/A * <END_GROUP|ZERO_OR_MORE><groupn>
2N/A */
2N/A
2N/A if (can_repeat == B_FALSE) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A can_repeat = B_FALSE;
2N/A *regex_typep = (unsigned char)
2N/A ((unsigned int)*regex_typep | ZERO_OR_MORE);
2N/A }
2N/A break; /* end case '*' */
2N/A
2N/A case PLUS:
2N/A /* one or more repetitions of the preceding */
2N/A /* expression */
2N/A
2N/A /*
2N/A * <regex...>+ compiles to <regex_type|ONE_OR_MORE>\
2N/A * <compiled_regex...> (<regex...>)+ compiles to
2N/A * <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
2N/A * <group_length><compiled_regex...>\
2N/A * <END_GROUP|ONE_OR_MORE><groupn>
2N/A */
2N/A
2N/A if (can_repeat == B_FALSE) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A can_repeat = B_FALSE;
2N/A *regex_typep =
2N/A (unsigned char)((unsigned int)*
2N/A regex_typep | ONE_OR_MORE);
2N/A }
2N/A break; /* end case '+' */
2N/A
2N/A case LEFT_CURLY_BRACE:
2N/A
2N/A /*
2N/A * repeat the preceding regular expression
2N/A * at least min_count times
2N/A * and at most max_count times
2N/A *
2N/A * <regex...>{min_count} compiles to
2N/A * <regex type|COUNT><compiled_regex...>
2N/A * <min_count><min_count>
2N/A *
2N/A * <regex...>{min_count,} compiles to
2N/A * <regex type|COUNT><compiled_regex...>
2N/A * <min_count><UNLIMITED>
2N/A *
2N/A * <regex...>{min_count,max_count} compiles to
2N/A * <regex type>|COUNT><compiled_regex...>
2N/A * <min_count><max_count>
2N/A *
2N/A * (<regex...>){min_count,max_count} compiles to
2N/A * <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
2N/A * <compiled_regex...><END_GROUP|COUNT><groupn>\
2N/A * <minimum_match_count><maximum_match_count>
2N/A */
2N/A
2N/A if (can_repeat == B_FALSE) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A }
2N/A can_repeat = B_FALSE;
2N/A *regex_typep = (unsigned char)((unsigned int)*
2N/A regex_typep | COUNT);
2N/A count_length = get_count(&min_count, regexp);
2N/A if (count_length <= 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A }
2N/A regexp += count_length;
2N/A
2N/A if (*regexp == RIGHT_CURLY_BRACE) { /* {min_count} */
2N/A regexp++;
2N/A max_count = min_count;
2N/A } else if (*regexp == COMMA) { /* {min_count,..} */
2N/A regexp++;
2N/A /* {min_count,} */
2N/A if (*regexp == RIGHT_CURLY_BRACE) {
2N/A regexp++;
2N/A max_count = UNLIMITED;
2N/A } else { /* {min_count,max_count} */
2N/A count_length = get_count(
2N/A &max_count, regexp);
2N/A if (count_length <= 0) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A }
2N/A regexp += count_length;
2N/A if (*regexp != RIGHT_CURLY_BRACE) {
2N/A ERROR_EXIT(&regcmp_lock,
2N/A arg_listp, compile_startp);
2N/A }
2N/A regexp++;
2N/A }
2N/A } else { /* invalid expression */
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A }
2N/A
2N/A if ((min_count > MAX_SINGLE_BYTE_INT) ||
2N/A ((max_count != UNLIMITED) &&
2N/A (min_count > max_count))) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A *compilep = (unsigned char)min_count;
2N/A compilep++;
2N/A *compilep = (unsigned char)max_count;
2N/A compilep++;
2N/A }
2N/A break; /* end case LEFT_CURLY_BRACE */
2N/A
2N/A default: /* a single non-special character */
2N/A
2N/A /*
2N/A * compiles to <ASCII_CHAR><ascii_char> or
2N/A * <MULTIBYTE_CHAR><multibyte_char>
2N/A */
2N/A
2N/A can_repeat = B_TRUE;
2N/A regex_typep = compilep;
2N/A expr_length = add_single_char_expr(compilep,
2N/A current_char);
2N/A compilep += expr_length;
2N/A
2N/A } /* end switch (current_char) */
2N/A
2N/A /* GET THE NEXT CHARACTER FOR THE WHILE LOOP */
2N/A
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size < 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp, compile_startp);
2N/A } else if (char_size > 0) {
2N/A regexp += char_size;
2N/A } else if /* (char_size == 0) && */ (next_argp != (char *)0) {
2N/A regexp = next_argp;
2N/A next_argp = va_arg(arg_listp, /* const */ char *);
2N/A char_size = get_wchar(&current_char, regexp);
2N/A if (char_size <= 0) {
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A } else {
2N/A regexp += char_size;
2N/A }
2N/A } else /* ((char_size == 0) && (next_argp == (char *)0)) */ {
2N/A if (pop_compilep() != (char *)0) {
2N/A /* unmatched parentheses */
2N/A ERROR_EXIT(&regcmp_lock, arg_listp,
2N/A compile_startp);
2N/A }
2N/A *compilep = (unsigned char)END_REGEX;
2N/A compilep++;
2N/A *compilep = '\0';
2N/A compilep++;
2N/A __i_size = (int)(compilep - compile_startp);
2N/A va_end(arg_listp);
2N/A lmutex_unlock(&regcmp_lock);
2N/A return (compile_startp);
2N/A }
2N/A } /* end for (;;) */
2N/A
2N/A} /* regcmp() */
2N/A
2N/A
2N/A/* DEFINITIONS OF PRIVATE FUNCTIONS */
2N/A
2N/Astatic int
2N/Aadd_char(char *compilep, wchar_t wchar)
2N/A{
2N/A int expr_length;
2N/A
2N/A if ((unsigned int)wchar <= (unsigned int)0x7f) {
2N/A *compilep = (unsigned char)wchar;
2N/A expr_length = 1;
2N/A } else {
2N/A expr_length = wctomb(compilep, wchar);
2N/A }
2N/A return (expr_length);
2N/A}
2N/A
2N/Astatic int
2N/Aadd_single_char_expr(char *compilep, wchar_t wchar)
2N/A{
2N/A int expr_length = 0;
2N/A
2N/A if ((unsigned int)wchar <= (unsigned int)0x7f) {
2N/A *compilep = (unsigned char)ASCII_CHAR;
2N/A compilep++;
2N/A *compilep = (unsigned char)wchar;
2N/A expr_length += 2;
2N/A } else {
2N/A *compilep = (unsigned char)MULTIBYTE_CHAR;
2N/A compilep++;
2N/A expr_length++;
2N/A expr_length += wctomb(compilep, wchar);
2N/A }
2N/A return (expr_length);
2N/A}
2N/A
2N/Astatic int
2N/Aget_count(int *countp, const char *regexp)
2N/A{
2N/A char count_char = '0';
2N/A int count = 0;
2N/A int count_length = 0;
2N/A
2N/A if (regexp == (char *)0) {
2N/A return ((int)0);
2N/A } else {
2N/A count_char = *regexp;
2N/A while (('0' <= count_char) && (count_char <= '9')) {
2N/A count = (10 * count) + (int)(count_char - '0');
2N/A count_length++;
2N/A regexp++;
2N/A count_char = *regexp;
2N/A }
2N/A }
2N/A *countp = count;
2N/A return (count_length);
2N/A}
2N/A
2N/Astatic int
2N/Aget_digit(const char *regexp)
2N/A{
2N/A char digit;
2N/A
2N/A if (regexp == (char *)0) {
2N/A return ((int)-1);
2N/A } else {
2N/A digit = *regexp;
2N/A if (('0' <= digit) && (digit <= '9')) {
2N/A return ((int)(digit - '0'));
2N/A } else {
2N/A return ((int)-1);
2N/A }
2N/A }
2N/A}
2N/A
2N/Astatic int
2N/Aget_wchar(wchar_t *wcharp, const char *regexp)
2N/A{
2N/A int char_size;
2N/A
2N/A if (regexp == (char *)0) {
2N/A char_size = 0;
2N/A *wcharp = (wchar_t)((unsigned int)'\0');
2N/A } else if (*regexp == '\0') {
2N/A char_size = 0;
2N/A *wcharp = (wchar_t)((unsigned int)*regexp);
2N/A } else if ((unsigned char)*regexp <= (unsigned char)0x7f) {
2N/A char_size = 1;
2N/A *wcharp = (wchar_t)((unsigned int)*regexp);
2N/A } else {
2N/A char_size = mbtowc(wcharp, regexp, MB_LEN_MAX);
2N/A }
2N/A return (char_size);
2N/A}
2N/A
2N/Astatic char *
2N/Apop_compilep(void)
2N/A{
2N/A char *compilep;
2N/A
2N/A if (compilep_stackp >= &compilep_stack[STRINGP_STACK_SIZE]) {
2N/A return ((char *)0);
2N/A } else {
2N/A compilep = *compilep_stackp;
2N/A compilep_stackp++;
2N/A return (compilep);
2N/A }
2N/A}
2N/A
2N/Astatic char *
2N/Apush_compilep(char *compilep)
2N/A{
2N/A if (compilep_stackp <= &compilep_stack[0]) {
2N/A return ((char *)0);
2N/A } else {
2N/A compilep_stackp--;
2N/A *compilep_stackp = compilep;
2N/A return (compilep);
2N/A }
2N/A}
2N/A
2N/Astatic boolean_t
2N/Avalid_range(wchar_t lower_char, wchar_t upper_char)
2N/A{
2N/A return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
2N/A !iswcntrl(lower_char) && !iswcntrl(upper_char) &&
2N/A (lower_char < upper_char)) ||
2N/A (((lower_char & WCHAR_CSMASK) ==
2N/A (upper_char & WCHAR_CSMASK)) &&
2N/A (lower_char < upper_char)));
2N/A}