2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2N/A * or http://www.opensolaris.org/os/licensing.
2N/A * See the License for the specific language governing permissions
2N/A * and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
2N/A * Use is subject to license terms.
2N/A */
2N/A
2N/A/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2N/A/* All Rights Reserved */
2N/A
2N/A#pragma ident "%Z%%M% %I% %E% SMI"
2N/A
2N/A/*
2N/A * IMPORTANT NOTE:
2N/A *
2N/A * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
2N/A * IT IS **NOT** CHARACTER SET INDEPENDENT.
2N/A *
2N/A */
2N/A
2N/A#pragma weak _regex = regex
2N/A
2N/A#include "lint.h"
2N/A/* CONSTANTS SHARED WITH regcmp() */
2N/A#include "regex.h"
2N/A#include "mtlib.h"
2N/A#include <limits.h>
2N/A#include <stdarg.h>
2N/A#include <stdlib.h>
2N/A#include <thread.h>
2N/A#include <widec.h>
2N/A#include "tsd.h"
2N/A
2N/A
2N/A/* PRIVATE CONSTANTS */
2N/A
2N/A#define ADD_256_TO_GROUP_LENGTH 0x1
2N/A#define ADD_512_TO_GROUP_LENGTH 0x2
2N/A#define ADD_768_TO_GROUP_LENGTH 0x3
2N/A#define ADDED_LENGTH_BITS 0x3
2N/A#define SINGLE_BYTE_MASK 0xff
2N/A#define STRINGP_STACK_SIZE 50
2N/A
2N/A
2N/A/* PRIVATE TYPE DEFINITIONS */
2N/A
2N/Atypedef enum {
2N/A NOT_IN_CLASS = 0,
2N/A IN_CLASS
2N/A} char_test_condition_t;
2N/A
2N/Atypedef enum {
2N/A TESTING_CHAR = 0,
2N/A CONDITION_TRUE,
2N/A CONDITION_FALSE,
2N/A CHAR_TEST_ERROR
2N/A} char_test_result_t;
2N/A
2N/A
2N/A/* PRIVATE GLOBAL VARIABLES */
2N/A
2N/Astatic mutex_t regex_lock = DEFAULTMUTEX;
2N/Astatic int return_arg_number[NSUBSTRINGS];
2N/Astatic const char *substring_endp[NSUBSTRINGS];
2N/Astatic const char *substring_startp[NSUBSTRINGS];
2N/Astatic const char *stringp_stack[STRINGP_STACK_SIZE];
2N/Astatic const char **stringp_stackp;
2N/A
2N/A
2N/A/* DECLARATIONS OF PRIVATE FUNCTIONS */
2N/A
2N/Astatic int
2N/Aget_wchar(wchar_t *wcharp,
2N/A const char *stringp);
2N/A
2N/Astatic void
2N/Aget_match_counts(int *nmust_matchp,
2N/A int *nextra_matches_allowedp,
2N/A const char *count_stringp);
2N/A
2N/Astatic boolean_t
2N/Ain_wchar_range(wchar_t test_char,
2N/A wchar_t lower_char,
2N/A wchar_t upper_char);
2N/A
2N/Astatic const char *
2N/Apop_stringp(void);
2N/A
2N/Astatic const char *
2N/Aprevious_charp(const char *current_charp);
2N/A
2N/Astatic const char *
2N/Apush_stringp(const char *stringp);
2N/A
2N/Astatic char_test_result_t
2N/Atest_char_against_ascii_class(char test_char,
2N/A const char *classp,
2N/A char_test_condition_t test_condition);
2N/A
2N/Astatic char_test_result_t
2N/Atest_char_against_multibyte_class(wchar_t test_char,
2N/A const char *classp,
2N/A char_test_condition_t test_condition);
2N/A
2N/A
2N/A/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
2N/A
2N/Astatic char_test_result_t
2N/Atest_char_against_old_ascii_class(char test_char,
2N/A const char *classp,
2N/A char_test_condition_t test_condition);
2N/A
2N/Astatic const char *
2N/Atest_repeated_ascii_char(const char *repeat_startp,
2N/A const char *stringp,
2N/A const char *regexp);
2N/A
2N/Astatic const char *
2N/Atest_repeated_multibyte_char(const char *repeat_startp,
2N/A const char *stringp,
2N/A const char *regexp);
2N/A
2N/Astatic const char *
2N/Atest_repeated_group(const char *repeat_startp,
2N/A const char *stringp,
2N/A const char *regexp);
2N/A
2N/Astatic const char *
2N/Atest_string(const char *stringp,
2N/A const char *regexp);
2N/A
2N/A
2N/A/* DEFINITIONS OF PUBLIC VARIABLES */
2N/A
2N/Achar *__loc1;
2N/A
2N/A/*
2N/A * reserve thread-specific storage for __loc1
2N/A */
2N/Achar **
2N/A____loc1(void)
2N/A{
2N/A if (thr_main())
2N/A return (&__loc1);
2N/A return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
2N/A}
2N/A
2N/A#define __loc1 (*(____loc1()))
2N/A
2N/A/* DEFINITION OF regex() */
2N/A
2N/Aextern char *
2N/Aregex(const char *regexp, const char *stringp, ...)
2N/A{
2N/A va_list arg_listp;
2N/A int char_size;
2N/A const char *end_of_matchp;
2N/A wchar_t regex_wchar;
2N/A char *return_argp[NSUBSTRINGS];
2N/A char *returned_substringp;
2N/A int substringn;
2N/A const char *substringp;
2N/A wchar_t string_wchar;
2N/A
2N/A if (____loc1() == (char **)0) {
2N/A return ((char *)0);
2N/A } else {
2N/A lmutex_lock(&regex_lock);
2N/A __loc1 = (char *)0;
2N/A }
2N/A
2N/A if ((stringp == (char *)0) || (regexp == (char *)0)) {
2N/A lmutex_unlock(&regex_lock);
2N/A return ((char *)0);
2N/A }
2N/A
2N/A
2N/A /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
2N/A
2N/A substringn = 0;
2N/A va_start(arg_listp, stringp);
2N/A while (substringn < NSUBSTRINGS) {
2N/A return_argp[substringn] = va_arg(arg_listp, char *);
2N/A substring_startp[substringn] = (char *)0;
2N/A return_arg_number[substringn] = -1;
2N/A substringn++;
2N/A }
2N/A va_end(arg_listp);
2N/A
2N/A
2N/A /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
2N/A
2N/A end_of_matchp = (char *)0;
2N/A stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
2N/A
2N/A if ((int)*regexp == (int)START_OF_STRING_MARK) {
2N/A
2N/A /*
2N/A * the match must start at the beginning of the string
2N/A */
2N/A
2N/A __loc1 = (char *)stringp;
2N/A regexp++;
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A
2N/A } else if ((int)*regexp == (int)ASCII_CHAR) {
2N/A
2N/A /*
2N/A * test a string against a regular expression
2N/A * that starts with a single ASCII character:
2N/A *
2N/A * move to each character in the string that matches
2N/A * the first character in the regular expression
2N/A * and test the remaining string
2N/A */
2N/A
2N/A while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
2N/A stringp++;
2N/A }
2N/A while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A if (end_of_matchp != (char *)0) {
2N/A __loc1 = (char *)stringp;
2N/A } else {
2N/A stringp++;
2N/A while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
2N/A stringp++;
2N/A }
2N/A }
2N/A }
2N/A
2N/A } else if (!multibyte) {
2N/A
2N/A /*
2N/A * if the value of the "multibyte" macro defined in <euc.h>
2N/A * is false, regex() is running in an ASCII locale;
2N/A * test an ASCII string against an ASCII regular expression
2N/A * that doesn't start with a single ASCII character:
2N/A *
2N/A * move forward in the string one byte at a time, testing
2N/A * the remaining string against the regular expression
2N/A */
2N/A
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
2N/A stringp++;
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A }
2N/A if (end_of_matchp != (char *)0) {
2N/A __loc1 = (char *)stringp;
2N/A }
2N/A
2N/A } else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
2N/A
2N/A /*
2N/A * test a multibyte string against a multibyte regular expression
2N/A * that starts with a single multibyte character:
2N/A *
2N/A * move to each character in the string that matches
2N/A * the first character in the regular expression
2N/A * and test the remaining string
2N/A */
2N/A
2N/A (void) get_wchar(&regex_wchar, regexp + 1);
2N/A char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_wchar != regex_wchar) && (char_size > 0)) {
2N/A stringp += char_size;
2N/A char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A while ((end_of_matchp == (char *)0) && (char_size > 0)) {
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A if (end_of_matchp != (char *)0) {
2N/A __loc1 = (char *)stringp;
2N/A } else {
2N/A stringp += char_size;
2N/A char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_wchar != regex_wchar) && (char_size > 0)) {
2N/A stringp += char_size;
2N/A char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A }
2N/A }
2N/A
2N/A } else {
2N/A
2N/A /*
2N/A * test a multibyte string against a multibyte regular expression
2N/A * that doesn't start with a single multibyte character
2N/A *
2N/A * move forward in the string one multibyte character at a time,
2N/A * testing the remaining string against the regular expression
2N/A */
2N/A
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A char_size = get_wchar(&string_wchar, stringp);
2N/A while ((end_of_matchp == (char *)0) && (char_size > 0)) {
2N/A stringp += char_size;
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A if (end_of_matchp != (char *)0) {
2N/A __loc1 = (char *)stringp;
2N/A }
2N/A }
2N/A
2N/A /*
2N/A * Return substrings that matched subexpressions for which
2N/A * matching substrings are to be returned.
2N/A *
2N/A * NOTE:
2N/A *
2N/A * According to manual page regcmp(3G), regex() returns substrings
2N/A * that match subexpressions even when no substring matches the
2N/A * entire regular expression.
2N/A */
2N/A
2N/A substringn = 0;
2N/A while (substringn < NSUBSTRINGS) {
2N/A substringp = substring_startp[substringn];
2N/A if ((substringp != (char *)0) &&
2N/A (return_arg_number[substringn] >= 0)) {
2N/A returned_substringp =
2N/A return_argp[return_arg_number[substringn]];
2N/A if (returned_substringp != (char *)0) {
2N/A while (substringp < substring_endp[substringn]) {
2N/A *returned_substringp = (char)*substringp;
2N/A returned_substringp++;
2N/A substringp++;
2N/A }
2N/A *returned_substringp = '\0';
2N/A }
2N/A }
2N/A substringn++;
2N/A }
2N/A lmutex_unlock(&regex_lock);
2N/A return ((char *)end_of_matchp);
2N/A} /* regex() */
2N/A
2N/A
2N/A/* DEFINITIONS OF PRIVATE FUNCTIONS */
2N/A
2N/Astatic int
2N/Aget_wchar(wchar_t *wcharp,
2N/A const char *stringp)
2N/A{
2N/A int char_size;
2N/A
2N/A if (stringp == (char *)0) {
2N/A char_size = 0;
2N/A *wcharp = (wchar_t)((unsigned int)'\0');
2N/A } else if (*stringp == '\0') {
2N/A char_size = 0;
2N/A *wcharp = (wchar_t)((unsigned int)*stringp);
2N/A } else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
2N/A char_size = 1;
2N/A *wcharp = (wchar_t)((unsigned int)*stringp);
2N/A } else {
2N/A char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
2N/A }
2N/A return (char_size);
2N/A}
2N/A
2N/Astatic void
2N/Aget_match_counts(int *nmust_matchp,
2N/A int *nextra_matches_allowedp,
2N/A const char *count_stringp)
2N/A{
2N/A int minimum_match_count;
2N/A int maximum_match_count;
2N/A
2N/A minimum_match_count =
2N/A (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
2N/A *nmust_matchp = minimum_match_count;
2N/A
2N/A count_stringp++;
2N/A maximum_match_count =
2N/A (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
2N/A if (maximum_match_count == (int)UNLIMITED) {
2N/A *nextra_matches_allowedp = (int)UNLIMITED;
2N/A } else {
2N/A *nextra_matches_allowedp =
2N/A maximum_match_count - minimum_match_count;
2N/A }
2N/A return;
2N/A
2N/A} /* get_match_counts() */
2N/A
2N/Astatic boolean_t
2N/Ain_wchar_range(wchar_t test_char,
2N/A wchar_t lower_char,
2N/A wchar_t upper_char)
2N/A{
2N/A return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
2N/A (lower_char <= test_char) && (test_char <= upper_char)) ||
2N/A (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
2N/A ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
2N/A (lower_char <= test_char) && (test_char <= upper_char)));
2N/A
2N/A} /* in_wchar_range() */
2N/A
2N/Astatic const char *
2N/Apop_stringp(void)
2N/A{
2N/A const char *stringp;
2N/A
2N/A if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp = *stringp_stackp;
2N/A stringp_stackp++;
2N/A return (stringp);
2N/A }
2N/A}
2N/A
2N/A
2N/Astatic const char *
2N/Aprevious_charp(const char *current_charp)
2N/A{
2N/A /*
2N/A * returns the pointer to the previous character in
2N/A * a string of multibyte characters
2N/A */
2N/A
2N/A const char *prev_cs0 = current_charp - 1;
2N/A const char *prev_cs1 = current_charp - eucw1;
2N/A const char *prev_cs2 = current_charp - eucw2 - 1;
2N/A const char *prev_cs3 = current_charp - eucw3 - 1;
2N/A const char *prev_charp;
2N/A
2N/A if ((unsigned char)*prev_cs0 <= 0x7f) {
2N/A prev_charp = prev_cs0;
2N/A } else if ((unsigned char)*prev_cs2 == SS2) {
2N/A prev_charp = prev_cs2;
2N/A } else if ((unsigned char)*prev_cs3 == SS3) {
2N/A prev_charp = prev_cs3;
2N/A } else {
2N/A prev_charp = prev_cs1;
2N/A }
2N/A return (prev_charp);
2N/A
2N/A} /* previous_charp() */
2N/A
2N/Astatic const char *
2N/Apush_stringp(const char *stringp)
2N/A{
2N/A if (stringp_stackp <= &stringp_stack[0]) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp_stackp--;
2N/A *stringp_stackp = stringp;
2N/A return (stringp);
2N/A }
2N/A}
2N/A
2N/A
2N/Astatic char_test_result_t
2N/Atest_char_against_ascii_class(char test_char,
2N/A const char *classp,
2N/A char_test_condition_t test_condition)
2N/A{
2N/A /*
2N/A * tests a character for membership in an ASCII character class compiled
2N/A * by the internationalized version of regcmp();
2N/A *
2N/A * NOTE: The internationalized version of regcmp() compiles
2N/A * the range a-z in an ASCII character class to aTHRUz.
2N/A */
2N/A
2N/A int nbytes_to_check;
2N/A
2N/A nbytes_to_check = (int)*classp;
2N/A classp++;
2N/A nbytes_to_check--;
2N/A
2N/A while (nbytes_to_check > 0) {
2N/A if (test_char == *classp) {
2N/A if (test_condition == IN_CLASS)
2N/A return (CONDITION_TRUE);
2N/A else
2N/A return (CONDITION_FALSE);
2N/A } else if (*classp == THRU) {
2N/A if ((*(classp - 1) <= test_char) &&
2N/A (test_char <= *(classp + 1))) {
2N/A if (test_condition == IN_CLASS)
2N/A return (CONDITION_TRUE);
2N/A else
2N/A return (CONDITION_FALSE);
2N/A } else {
2N/A classp += 2;
2N/A nbytes_to_check -= 2;
2N/A }
2N/A } else {
2N/A classp++;
2N/A nbytes_to_check--;
2N/A }
2N/A }
2N/A if (test_condition == NOT_IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A} /* test_char_against_ascii_class() */
2N/A
2N/Astatic char_test_result_t
2N/Atest_char_against_multibyte_class(wchar_t test_char,
2N/A const char *classp,
2N/A char_test_condition_t test_condition)
2N/A{
2N/A /*
2N/A * tests a character for membership in a multibyte character class;
2N/A *
2N/A * NOTE: The range a-z in a multibyte character class compiles to
2N/A * aTHRUz.
2N/A */
2N/A
2N/A int char_size;
2N/A wchar_t current_char;
2N/A int nbytes_to_check;
2N/A wchar_t previous_char;
2N/A
2N/A nbytes_to_check = (int)*classp;
2N/A classp++;
2N/A nbytes_to_check--;
2N/A
2N/A char_size = get_wchar(&current_char, classp);
2N/A if (char_size <= 0) {
2N/A return (CHAR_TEST_ERROR);
2N/A } else if (test_char == current_char) {
2N/A if (test_condition == IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A } else {
2N/A classp += char_size;
2N/A nbytes_to_check -= char_size;
2N/A }
2N/A
2N/A while (nbytes_to_check > 0) {
2N/A previous_char = current_char;
2N/A char_size = get_wchar(&current_char, classp);
2N/A if (char_size <= 0) {
2N/A return (CHAR_TEST_ERROR);
2N/A } else if (test_char == current_char) {
2N/A if (test_condition == IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A } else if (current_char == THRU) {
2N/A classp += char_size;
2N/A nbytes_to_check -= char_size;
2N/A char_size = get_wchar(&current_char, classp);
2N/A if (char_size <= 0) {
2N/A return (CHAR_TEST_ERROR);
2N/A } else if (in_wchar_range(test_char, previous_char,
2N/A current_char)) {
2N/A if (test_condition == IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A } else {
2N/A classp += char_size;
2N/A nbytes_to_check -= char_size;
2N/A }
2N/A } else {
2N/A classp += char_size;
2N/A nbytes_to_check -= char_size;
2N/A }
2N/A }
2N/A if (test_condition == NOT_IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A} /* test_char_against_multibyte_class() */
2N/A
2N/A
2N/A/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
2N/A
2N/Astatic char_test_result_t
2N/Atest_char_against_old_ascii_class(char test_char,
2N/A const char *classp,
2N/A char_test_condition_t test_condition)
2N/A{
2N/A /*
2N/A * tests a character for membership in an ASCII character class compiled
2N/A * by the ASCII version of regcmp();
2N/A *
2N/A * NOTE: ASCII versions of regcmp() compile the range a-z in an
2N/A * ASCII character class to THRUaz. The internationalized
2N/A * version compiles the same range to aTHRUz.
2N/A */
2N/A
2N/A int nbytes_to_check;
2N/A
2N/A nbytes_to_check = (int)*classp;
2N/A classp++;
2N/A nbytes_to_check--;
2N/A
2N/A while (nbytes_to_check > 0) {
2N/A if (test_char == *classp) {
2N/A if (test_condition == IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A } else if (*classp == THRU) {
2N/A if ((*(classp + 1) <= test_char) &&
2N/A (test_char <= *(classp + 2))) {
2N/A if (test_condition == IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A } else {
2N/A classp += 3;
2N/A nbytes_to_check -= 3;
2N/A }
2N/A } else {
2N/A classp++;
2N/A nbytes_to_check--;
2N/A }
2N/A }
2N/A if (test_condition == NOT_IN_CLASS) {
2N/A return (CONDITION_TRUE);
2N/A } else {
2N/A return (CONDITION_FALSE);
2N/A }
2N/A} /* test_char_against_old_ascii_class() */
2N/A
2N/Astatic const char *
2N/Atest_repeated_ascii_char(const char *repeat_startp,
2N/A const char *stringp,
2N/A const char *regexp)
2N/A{
2N/A const char *end_of_matchp;
2N/A
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A while ((end_of_matchp == (char *)0) &&
2N/A (stringp > repeat_startp)) {
2N/A stringp--;
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A }
2N/A return (end_of_matchp);
2N/A}
2N/A
2N/Astatic const char *
2N/Atest_repeated_multibyte_char(const char *repeat_startp,
2N/A const char *stringp,
2N/A const char *regexp)
2N/A{
2N/A const char *end_of_matchp;
2N/A
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A while ((end_of_matchp == (char *)0) &&
2N/A (stringp > repeat_startp)) {
2N/A stringp = previous_charp(stringp);
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A }
2N/A return (end_of_matchp);
2N/A}
2N/A
2N/Astatic const char *
2N/Atest_repeated_group(const char *repeat_startp,
2N/A const char *stringp,
2N/A const char *regexp)
2N/A{
2N/A const char *end_of_matchp;
2N/A
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A while ((end_of_matchp == (char *)0) &&
2N/A (stringp > repeat_startp)) {
2N/A stringp = pop_stringp();
2N/A if (stringp == (char *)0) {
2N/A return ((char *)0);
2N/A }
2N/A end_of_matchp = test_string(stringp, regexp);
2N/A }
2N/A return (end_of_matchp);
2N/A}
2N/A
2N/Astatic const char *
2N/Atest_string(const char *stringp,
2N/A const char *regexp)
2N/A{
2N/A /*
2N/A * returns a pointer to the first character following the first
2N/A * substring of the string addressed by stringp that matches
2N/A * the compiled regular expression addressed by regexp
2N/A */
2N/A
2N/A unsigned int group_length;
2N/A int nextra_matches_allowed;
2N/A int nmust_match;
2N/A wchar_t regex_wchar;
2N/A int regex_char_size;
2N/A const char *repeat_startp;
2N/A unsigned int return_argn;
2N/A wchar_t string_wchar;
2N/A int string_char_size;
2N/A unsigned int substringn;
2N/A char_test_condition_t test_condition;
2N/A const char *test_stringp;
2N/A
2N/A for (;;) {
2N/A
2N/A /*
2N/A * Exit the loop via a return whenever there's a match
2N/A * or it's clear that there can be no match.
2N/A */
2N/A
2N/A switch ((int)*regexp) {
2N/A
2N/A /*
2N/A * No fall-through.
2N/A * Each case ends with either a return or with stringp
2N/A * addressing the next character to be tested and regexp
2N/A * addressing the next compiled regular expression
2N/A *
2N/A * NOTE: The comments for each case give the meaning
2N/A * of the compiled regular expression decoded by the case
2N/A * and the character string that the compiled regular
2N/A * expression uses to encode the case. Each single
2N/A * character encoded in the compiled regular expression
2N/A * is shown enclosed in angle brackets (<>). Each
2N/A * compiled regular expression begins with a marker
2N/A * character which is shown as a named constant
2N/A * (e.g. <ASCII_CHAR>). Character constants are shown
2N/A * enclosed in single quotes (e.g. <'$'>). All other
2N/A * single characters encoded in the compiled regular
2N/A * expression are shown as lower case variable names
2N/A * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
2N/A * strings encoded in the compiled regular expression
2N/A * are shown as variable names followed by elipses
2N/A * (e.g. <compiled_regex...>).
2N/A */
2N/A
2N/A case ASCII_CHAR: /* single ASCII char */
2N/A
2N/A /* encoded as <ASCII_CHAR><ascii_char> */
2N/A
2N/A regexp++;
2N/A if (*regexp == *stringp) {
2N/A regexp++;
2N/A stringp++;
2N/A } else {
2N/A return ((char *)0);
2N/A }
2N/A break; /* end case ASCII_CHAR */
2N/A
2N/A case MULTIBYTE_CHAR: /* single multibyte char */
2N/A
2N/A /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
2N/A
2N/A regexp++;
2N/A regex_char_size = get_wchar(&regex_wchar, regexp);
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
2N/A return ((char *)0);
2N/A } else {
2N/A regexp += regex_char_size;
2N/A stringp += string_char_size;
2N/A }
2N/A break; /* end case MULTIBYTE_CHAR */
2N/A
2N/A case ANY_CHAR: /* any single ASCII or multibyte char */
2N/A
2N/A /* encoded as <ANY_CHAR> */
2N/A
2N/A if (!multibyte) {
2N/A if (*stringp == '\0') {
2N/A return ((char *)0);
2N/A } else {
2N/A regexp++;
2N/A stringp++;
2N/A }
2N/A } else {
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A if (string_char_size <= 0) {
2N/A return ((char *)0);
2N/A } else {
2N/A regexp++;
2N/A stringp += string_char_size;
2N/A }
2N/A }
2N/A break; /* end case ANY_CHAR */
2N/A
2N/A case IN_ASCII_CHAR_CLASS: /* [.....] */
2N/A case NOT_IN_ASCII_CHAR_CLASS:
2N/A
2N/A /*
2N/A * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
2N/A * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A if ((*stringp != '\0') &&
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A stringp++;
2N/A } else {
2N/A return ((char *)0);
2N/A }
2N/A break; /* end case IN_ASCII_CHAR_CLASS */
2N/A
2N/A case IN_MULTIBYTE_CHAR_CLASS: /* [....] */
2N/A case NOT_IN_MULTIBYTE_CHAR_CLASS:
2N/A
2N/A /*
2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A if ((string_char_size > 0) &&
2N/A (test_char_against_multibyte_class(string_wchar, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A stringp += string_char_size;
2N/A } else {
2N/A return ((char *)0);
2N/A }
2N/A break; /* end case IN_MULTIBYTE_CHAR_CLASS */
2N/A
2N/A case IN_OLD_ASCII_CHAR_CLASS: /* [...] */
2N/A case NOT_IN_OLD_ASCII_CHAR_CLASS:
2N/A
2N/A /*
2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A if ((*stringp != '\0') &&
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A stringp++;
2N/A } else {
2N/A return ((char *)0);
2N/A }
2N/A break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
2N/A
2N/A case SIMPLE_GROUP: /* (.....) */
2N/A
2N/A /* encoded as <SIMPLE_GROUP><group_length> */
2N/A
2N/A regexp += 2;
2N/A break; /* end case SIMPLE_GROUP */
2N/A
2N/A case END_GROUP: /* (.....) */
2N/A
2N/A /* encoded as <END_GROUP><groupn> */
2N/A
2N/A regexp += 2;
2N/A break; /* end case END_GROUP */
2N/A
2N/A case SAVED_GROUP: /* (.....)$0-9 */
2N/A
2N/A /* encoded as <SAVED_GROUP><substringn> */
2N/A
2N/A regexp++;
2N/A substringn = (unsigned int)*regexp;
2N/A if (substringn >= NSUBSTRINGS)
2N/A return ((char *)0);
2N/A substring_startp[substringn] = stringp;
2N/A regexp++;
2N/A break; /* end case SAVED_GROUP */
2N/A
2N/A case END_SAVED_GROUP: /* (.....)$0-9 */
2N/A
2N/A /*
2N/A * encoded as <END_SAVED_GROUP><substringn>\
2N/A * <return_arg_number[substringn]>
2N/A */
2N/A
2N/A regexp++;
2N/A substringn = (unsigned int)*regexp;
2N/A if (substringn >= NSUBSTRINGS)
2N/A return ((char *)0);
2N/A substring_endp[substringn] = stringp;
2N/A regexp++;
2N/A return_argn = (unsigned int)*regexp;
2N/A if (return_argn >= NSUBSTRINGS)
2N/A return ((char *)0);
2N/A return_arg_number[substringn] = return_argn;
2N/A regexp++;
2N/A break; /* end case END_SAVED_GROUP */
2N/A
2N/A case ASCII_CHAR|ZERO_OR_MORE: /* char* */
2N/A
2N/A /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
2N/A
2N/A regexp++;
2N/A repeat_startp = stringp;
2N/A while (*stringp == *regexp) {
2N/A stringp++;
2N/A }
2N/A regexp++;
2N/A return (test_repeated_ascii_char(repeat_startp,
2N/A stringp, regexp));
2N/A
2N/A /* end case ASCII_CHAR|ZERO_OR_MORE */
2N/A
2N/A case ASCII_CHAR|ONE_OR_MORE: /* char+ */
2N/A
2N/A /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
2N/A
2N/A regexp++;
2N/A if (*stringp != *regexp) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp++;
2N/A repeat_startp = stringp;
2N/A while (*stringp == *regexp) {
2N/A stringp++;
2N/A }
2N/A regexp++;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case ASCII_CHAR|ONE_OR_MORE */
2N/A
2N/A case ASCII_CHAR|COUNT: /* char{min_count,max_count} */
2N/A
2N/A /*
2N/A * encoded as <ASCII_CHAR|COUNT><ascii_char>\
2N/A * <minimum_match_count><maximum_match_count>
2N/A */
2N/A
2N/A regexp++;
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + 1);
2N/A while ((*stringp == *regexp) && (nmust_match > 0)) {
2N/A nmust_match--;
2N/A stringp++;
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while (*stringp == *regexp) {
2N/A stringp++;
2N/A }
2N/A regexp += 3;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((*stringp == *regexp) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp++;
2N/A }
2N/A regexp += 3;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case ASCII_CHAR|COUNT */
2N/A
2N/A case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */
2N/A
2N/A /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
2N/A
2N/A regexp++;
2N/A regex_char_size = get_wchar(&regex_wchar, regexp);
2N/A repeat_startp = stringp;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) &&
2N/A (string_wchar == regex_wchar)) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += regex_char_size;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A
2N/A /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
2N/A
2N/A case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */
2N/A
2N/A /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
2N/A
2N/A regexp++;
2N/A regex_char_size = get_wchar(&regex_wchar, regexp);
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp += string_char_size;
2N/A repeat_startp = stringp;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) &&
2N/A (string_wchar == regex_wchar)) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += regex_char_size;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
2N/A
2N/A case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */
2N/A
2N/A /*
2N/A * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
2N/A * <minimum_match_count><maximum_match_count>
2N/A */
2N/A
2N/A regexp++;
2N/A regex_char_size = get_wchar(&regex_wchar, regexp);
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + regex_char_size);
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) &&
2N/A (string_wchar == regex_wchar) &&
2N/A (nmust_match > 0)) {
2N/A
2N/A nmust_match--;
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while ((string_char_size > 0) &&
2N/A (string_wchar == regex_wchar)) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += regex_char_size + 2;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((string_char_size > 0) &&
2N/A (string_wchar == regex_wchar) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += regex_char_size + 2;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case MULTIBYTE_CHAR|COUNT */
2N/A
2N/A case ANY_CHAR|ZERO_OR_MORE: /* .* */
2N/A
2N/A /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
2N/A
2N/A repeat_startp = stringp;
2N/A if (!multibyte) {
2N/A while (*stringp != '\0') {
2N/A stringp++;
2N/A }
2N/A regexp++;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while (string_char_size > 0) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp++;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case <ANY_CHAR|ZERO_OR_MORE> */
2N/A
2N/A case ANY_CHAR|ONE_OR_MORE: /* .+ */
2N/A
2N/A /* encoded as <ANY_CHAR|ONE_OR_MORE> */
2N/A
2N/A if (!multibyte) {
2N/A if (*stringp == '\0') {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp++;
2N/A repeat_startp = stringp;
2N/A while (*stringp != '\0') {
2N/A stringp++;
2N/A }
2N/A regexp++;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A } else {
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A if (string_char_size <= 0) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp += string_char_size;
2N/A repeat_startp = stringp;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while (string_char_size > 0) {
2N/A stringp += string_char_size;
2N/A string_char_size =
2N/A get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp++;
2N/A return (test_repeated_multibyte_char(repeat_startp,
2N/A stringp, regexp));
2N/A }
2N/A }
2N/A /* end case <ANY_CHAR|ONE_OR_MORE> */
2N/A
2N/A case ANY_CHAR|COUNT: /* .{min_count,max_count} */
2N/A
2N/A /*
2N/A * encoded as <ANY_CHAR|COUNT>\
2N/A * <minimum_match_count><maximum_match_count>
2N/A */
2N/A
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + 1);
2N/A if (!multibyte) {
2N/A while ((*stringp != '\0') && (nmust_match > 0)) {
2N/A nmust_match--;
2N/A stringp++;
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while (*stringp != '\0') {
2N/A stringp++;
2N/A }
2N/A regexp += 3;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp++;
2N/A }
2N/A regexp += 3;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A } else { /* multibyte character */
2N/A
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) && (nmust_match > 0)) {
2N/A nmust_match--;
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while (string_char_size > 0) {
2N/A stringp += string_char_size;
2N/A string_char_size =
2N/A get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += 3;
2N/A return (test_repeated_multibyte_char(repeat_startp,
2N/A stringp, regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((string_char_size > 0) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp += string_char_size;
2N/A string_char_size =
2N/A get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += 3;
2N/A return (test_repeated_multibyte_char(repeat_startp,
2N/A stringp, regexp));
2N/A }
2N/A } /* end case ANY_CHAR|COUNT */
2N/A
2N/A case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
2N/A case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
2N/A
2N/A /*
2N/A * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
2N/A * <class_length><class ...>
2N/A * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
2N/A * <class_length><class ...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A
2N/A /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
2N/A
2N/A case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
2N/A case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
2N/A
2N/A /*
2N/A * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
2N/A * <class_length><class ...>
2N/A * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
2N/A * <class_length><class ...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A if ((*stringp == '\0') ||
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) != CONDITION_TRUE)) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp++;
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
2N/A
2N/A case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
2N/A case NOT_IN_ASCII_CHAR_CLASS | COUNT:
2N/A
2N/A /*
2N/A * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
2N/A * <class ...><minimum_match_count>\
2N/A * <maximum_match_count>
2N/A * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
2N/A * <class ...><minimum_match_count>\
2N/A * <maximum_match_count>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte,
2N/A * but not the <minimum_match_count> or
2N/A * <maximum_match_count> bytes
2N/A */
2N/A
2N/A if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + (int)*regexp);
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE) &&
2N/A (nmust_match > 0)) {
2N/A nmust_match--;
2N/A stringp++;
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp + 2;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp + 2;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case IN_ASCII_CHAR_CLASS|COUNT */
2N/A
2N/A case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
2N/A case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
2N/A
2N/A /*
2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
2N/A * <class_length><class ...>
2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
2N/A * <class_length><class ...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp ==
2N/A (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A repeat_startp = stringp;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) &&
2N/A (test_char_against_multibyte_class(string_wchar, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A
2N/A /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
2N/A
2N/A case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
2N/A case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
2N/A
2N/A /*
2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
2N/A * <class_length><class ...>
2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
2N/A * <class_length><class ...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp ==
2N/A (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A if ((string_char_size <= 0) ||
2N/A (test_char_against_multibyte_class(string_wchar, regexp,
2N/A test_condition) != CONDITION_TRUE)) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp += string_char_size;
2N/A repeat_startp = stringp;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) &&
2N/A (test_char_against_multibyte_class(string_wchar,
2N/A regexp, test_condition) == CONDITION_TRUE)) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
2N/A
2N/A case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
2N/A case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
2N/A
2N/A /*
2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
2N/A * <class_length><class ...><min_count><max_count>
2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
2N/A * <class_length><class ...><min_count><max_count>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A * but not the <minimum_match_count> or
2N/A * <maximum_match_count> bytes
2N/A */
2N/A
2N/A if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + (int)*regexp);
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A while ((string_char_size > 0) &&
2N/A (test_char_against_multibyte_class(string_wchar, regexp,
2N/A test_condition) == CONDITION_TRUE) &&
2N/A (nmust_match > 0)) {
2N/A nmust_match--;
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while ((string_char_size > 0) &&
2N/A (test_char_against_multibyte_class(string_wchar,
2N/A regexp, test_condition) == CONDITION_TRUE)) {
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += (int)*regexp + 2;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((string_char_size > 0) &&
2N/A (test_char_against_multibyte_class(string_wchar,
2N/A regexp, test_condition) == CONDITION_TRUE) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp += string_char_size;
2N/A string_char_size = get_wchar(&string_wchar, stringp);
2N/A }
2N/A regexp += (int)*regexp + 2;
2N/A return (test_repeated_multibyte_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
2N/A
2N/A case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
2N/A case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
2N/A
2N/A /*
2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
2N/A * <class_length><class ...>
2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
2N/A * <class_length><class ...>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp ==
2N/A (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A
2N/A /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
2N/A
2N/A case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
2N/A case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
2N/A
2N/A /*
2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
2N/A * <class_length><class ...>
2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
2N/A * <class_length><class ...>
2N/A *
2N/A * NOTE: <class length> includes the <class_length> byte
2N/A */
2N/A
2N/A if ((int)*regexp ==
2N/A (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A if ((*stringp == '\0') ||
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) != CONDITION_TRUE)) {
2N/A return ((char *)0);
2N/A } else {
2N/A stringp++;
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp; /* add the class length to regexp */
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
2N/A
2N/A case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
2N/A case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
2N/A
2N/A /*
2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
2N/A * <class ...><minimum_match_count>\
2N/A * <maximum_match_count>
2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
2N/A * <class_length><class ...><minimum_match_count>\
2N/A * <maximum_match_count>
2N/A *
2N/A * NOTE: <class_length> includes the <class_length> byte
2N/A * but not the <minimum_match_count> or
2N/A * <maximum_match_count> bytes
2N/A */
2N/A
2N/A if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
2N/A test_condition = IN_CLASS;
2N/A } else {
2N/A test_condition = NOT_IN_CLASS;
2N/A }
2N/A regexp++; /* point to the <class_length> byte */
2N/A
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + (int)*regexp);
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE) &&
2N/A (nmust_match > 0)) {
2N/A nmust_match--;
2N/A stringp++;
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE)) {
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp + 2;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((*stringp != '\0') &&
2N/A (test_char_against_old_ascii_class(*stringp, regexp,
2N/A test_condition) == CONDITION_TRUE) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A stringp++;
2N/A }
2N/A regexp += (int)*regexp + 2;
2N/A return (test_repeated_ascii_char(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
2N/A
2N/A case ZERO_OR_MORE_GROUP: /* (.....)* */
2N/A case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
2N/A case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
2N/A case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
2N/A
2N/A /*
2N/A * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
2N/A * <group_length><compiled_regex...>\
2N/A * <END_GROUP|ZERO_OR_MORE><groupn>
2N/A *
2N/A * NOTE:
2N/A *
2N/A * group_length + (256 * ADDED_LENGTH_BITS) ==
2N/A * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
2N/A * <groupn>)
2N/A *
2N/A */
2N/A
2N/A group_length =
2N/A (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
2N/A TIMES_256_SHIFT);
2N/A regexp++;
2N/A group_length += (unsigned int)*regexp;
2N/A regexp++;
2N/A repeat_startp = stringp;
2N/A test_stringp = test_string(stringp, regexp);
2N/A while (test_stringp != (char *)0) {
2N/A if (push_stringp(stringp) == (char *)0)
2N/A return ((char *)0);
2N/A stringp = test_stringp;
2N/A test_stringp = test_string(stringp, regexp);
2N/A }
2N/A regexp += group_length;
2N/A return (test_repeated_group(repeat_startp, stringp, regexp));
2N/A
2N/A /* end case ZERO_OR_MORE_GROUP */
2N/A
2N/A case END_GROUP|ZERO_OR_MORE: /* (.....)* */
2N/A
2N/A /* encoded as <END_GROUP|ZERO_OR_MORE> */
2N/A
2N/A /* return from recursive call to test_string() */
2N/A
2N/A return ((char *)stringp);
2N/A
2N/A /* end case END_GROUP|ZERO_OR_MORE */
2N/A
2N/A case ONE_OR_MORE_GROUP: /* (.....)+ */
2N/A case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
2N/A case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
2N/A case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
2N/A
2N/A /*
2N/A * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
2N/A * <group_length><compiled_regex...>\
2N/A * <END_GROUP|ONE_OR_MORE><groupn>
2N/A *
2N/A * NOTE:
2N/A *
2N/A * group_length + (256 * ADDED_LENGTH_BITS) ==
2N/A * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
2N/A * <groupn>)
2N/A */
2N/A
2N/A group_length =
2N/A (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
2N/A TIMES_256_SHIFT);
2N/A regexp++;
2N/A group_length += (unsigned int)*regexp;
2N/A regexp++;
2N/A stringp = test_string(stringp, regexp);
2N/A if (stringp == (char *)0)
2N/A return ((char *)0);
2N/A repeat_startp = stringp;
2N/A test_stringp = test_string(stringp, regexp);
2N/A while (test_stringp != (char *)0) {
2N/A if (push_stringp(stringp) == (char *)0)
2N/A return ((char *)0);
2N/A stringp = test_stringp;
2N/A test_stringp = test_string(stringp, regexp);
2N/A }
2N/A regexp += group_length;
2N/A return (test_repeated_group(repeat_startp, stringp, regexp));
2N/A
2N/A /* end case ONE_OR_MORE_GROUP */
2N/A
2N/A case END_GROUP|ONE_OR_MORE: /* (.....)+ */
2N/A
2N/A /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
2N/A
2N/A /* return from recursive call to test_string() */
2N/A
2N/A return ((char *)stringp);
2N/A
2N/A /* end case END_GROUP|ONE_OR_MORE */
2N/A
2N/A case COUNTED_GROUP: /* (.....){max_count,min_count} */
2N/A case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
2N/A case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
2N/A case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
2N/A
2N/A /*
2N/A * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
2N/A * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
2N/A * <minimum_match_count><maximum_match_count>
2N/A *
2N/A * NOTE:
2N/A *
2N/A * group_length + (256 * ADDED_LENGTH_BITS) ==
2N/A * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
2N/A *
2N/A * but does not include the <minimum_match_count> or
2N/A * <maximum_match_count> bytes
2N/A */
2N/A
2N/A group_length =
2N/A (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
2N/A TIMES_256_SHIFT);
2N/A regexp++;
2N/A group_length += (unsigned int)*regexp;
2N/A regexp++;
2N/A get_match_counts(&nmust_match, &nextra_matches_allowed,
2N/A regexp + group_length);
2N/A test_stringp = test_string(stringp, regexp);
2N/A while ((test_stringp != (char *)0) && (nmust_match > 0)) {
2N/A stringp = test_stringp;
2N/A nmust_match--;
2N/A test_stringp = test_string(stringp, regexp);
2N/A }
2N/A if (nmust_match > 0) {
2N/A return ((char *)0);
2N/A } else if (nextra_matches_allowed == UNLIMITED) {
2N/A repeat_startp = stringp;
2N/A while (test_stringp != (char *)0) {
2N/A if (push_stringp(stringp) == (char *)0)
2N/A return ((char *)0);
2N/A stringp = test_stringp;
2N/A test_stringp = test_string(stringp, regexp);
2N/A }
2N/A regexp += group_length + 2;
2N/A return (test_repeated_group(repeat_startp, stringp,
2N/A regexp));
2N/A } else {
2N/A repeat_startp = stringp;
2N/A while ((test_stringp != (char *)0) &&
2N/A (nextra_matches_allowed > 0)) {
2N/A nextra_matches_allowed--;
2N/A if (push_stringp(stringp) == (char *)0)
2N/A return ((char *)0);
2N/A stringp = test_stringp;
2N/A test_stringp = test_string(stringp, regexp);
2N/A }
2N/A regexp += group_length + 2;
2N/A return (test_repeated_group(repeat_startp, stringp,
2N/A regexp));
2N/A }
2N/A /* end case COUNTED_GROUP */
2N/A
2N/A case END_GROUP|COUNT: /* (.....){max_count,min_count} */
2N/A
2N/A /* encoded as <END_GROUP|COUNT> */
2N/A
2N/A /* return from recursive call to test_string() */
2N/A
2N/A return (stringp);
2N/A
2N/A /* end case END_GROUP|COUNT */
2N/A
2N/A case END_OF_STRING_MARK:
2N/A
2N/A /* encoded as <END_OF_STRING_MARK><END_REGEX> */
2N/A
2N/A if (*stringp == '\0') {
2N/A regexp++;
2N/A } else {
2N/A return ((char *)0);
2N/A }
2N/A break; /* end case END_OF_STRING_MARK */
2N/A
2N/A case END_REGEX: /* end of the compiled regular expression */
2N/A
2N/A /* encoded as <END_REGEX> */
2N/A
2N/A return (stringp);
2N/A
2N/A /* end case END_REGEX */
2N/A
2N/A default:
2N/A
2N/A return ((char *)0);
2N/A
2N/A } /* end switch (*regexp) */
2N/A
2N/A } /* end for (;;) */
2N/A
2N/A} /* test_string() */