2N/A * The contents of this file are subject to the terms of the 2N/A * Common Development and Distribution License (the "License"). 2N/A * You may not use this file except in compliance with the License. 2N/A * See the License for the specific language governing permissions 2N/A * and limitations under the License. 2N/A * When distributing Covered Code, include this CDDL HEADER in each 2N/A * If applicable, add the following below this CDDL HEADER, with the 2N/A * fields enclosed by brackets "[]" replaced with your own identifying 2N/A * information: Portions Copyright [yyyy] [name of copyright owner] 2N/A * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2N/A * Use is subject to license terms. 2N/A/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 2N/A/* All Rights Reserved */ 2N/A#
pragma ident "%Z%%M% %I% %E% SMI" 2N/A * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 2N/A * IT IS **NOT** CHARACTER SET INDEPENDENT. 2N/A/* CONSTANTS SHARED WITH regcmp() */ 2N/A/* PRIVATE CONSTANTS */ 2N/A/* PRIVATE TYPE DEFINITIONS */ 2N/A/* PRIVATE GLOBAL VARIABLES */ 2N/A/* DECLARATIONS OF PRIVATE FUNCTIONS */ 2N/A/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 2N/A/* DEFINITIONS OF PUBLIC VARIABLES */ 2N/A * reserve thread-specific storage for __loc1 2N/A/* DEFINITION OF regex() */ 2N/A /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 2N/A /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 2N/A * the match must start at the beginning of the string 2N/A * test a string against a regular expression 2N/A * that starts with a single ASCII character: 2N/A * move to each character in the string that matches 2N/A * the first character in the regular expression 2N/A * and test the remaining string 2N/A * if the value of the "multibyte" macro defined in <euc.h> 2N/A * is false, regex() is running in an ASCII locale; 2N/A * test an ASCII string against an ASCII regular expression 2N/A * that doesn't start with a single ASCII character: 2N/A * move forward in the string one byte at a time, testing 2N/A * the remaining string against the regular expression 2N/A * test a multibyte string against a multibyte regular expression 2N/A * that starts with a single multibyte character: 2N/A * move to each character in the string that matches 2N/A * the first character in the regular expression 2N/A * and test the remaining string 2N/A * test a multibyte string against a multibyte regular expression 2N/A * that doesn't start with a single multibyte character 2N/A * move forward in the string one multibyte character at a time, 2N/A * testing the remaining string against the regular expression 2N/A * Return substrings that matched subexpressions for which 2N/A * matching substrings are to be returned. 2N/A * According to manual page regcmp(3G), regex() returns substrings 2N/A * that match subexpressions even when no substring matches the 2N/A * entire regular expression. 2N/A/* DEFINITIONS OF PRIVATE FUNCTIONS */ 2N/A }
else if ((
unsigned char)*
stringp <= (
unsigned char)
0x7f) {
2N/A}
/* get_match_counts() */ 2N/A}
/* in_wchar_range() */ 2N/A * returns the pointer to the previous character in 2N/A * a string of multibyte characters 2N/A}
/* previous_charp() */ 2N/A * tests a character for membership in an ASCII character class compiled 2N/A * by the internationalized version of regcmp(); 2N/A * NOTE: The internationalized version of regcmp() compiles 2N/A * the range a-z in an ASCII character class to aTHRUz. 2N/A}
/* test_char_against_ascii_class() */ 2N/A * tests a character for membership in a multibyte character class; 2N/A * NOTE: The range a-z in a multibyte character class compiles to 2N/A}
/* test_char_against_multibyte_class() */ 2N/A/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 2N/A * tests a character for membership in an ASCII character class compiled 2N/A * by the ASCII version of regcmp(); 2N/A * NOTE: ASCII versions of regcmp() compile the range a-z in an 2N/A * ASCII character class to THRUaz. The internationalized 2N/A * version compiles the same range to aTHRUz. 2N/A}
/* test_char_against_old_ascii_class() */ 2N/A * returns a pointer to the first character following the first 2N/A * substring of the string addressed by stringp that matches 2N/A * the compiled regular expression addressed by regexp 2N/A * Exit the loop via a return whenever there's a match 2N/A * or it's clear that there can be no match. 2N/A * Each case ends with either a return or with stringp 2N/A * addressing the next character to be tested and regexp 2N/A * addressing the next compiled regular expression 2N/A * NOTE: The comments for each case give the meaning 2N/A * of the compiled regular expression decoded by the case 2N/A * and the character string that the compiled regular 2N/A * expression uses to encode the case. Each single 2N/A * character encoded in the compiled regular expression 2N/A * is shown enclosed in angle brackets (<>). Each 2N/A * compiled regular expression begins with a marker 2N/A * character which is shown as a named constant 2N/A * (e.g. <ASCII_CHAR>). Character constants are shown 2N/A * enclosed in single quotes (e.g. <'$'>). All other 2N/A * single characters encoded in the compiled regular 2N/A * expression are shown as lower case variable names 2N/A * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 2N/A * strings encoded in the compiled regular expression 2N/A * are shown as variable names followed by elipses 2N/A * (e.g. <compiled_regex...>). 2N/A /* encoded as <ASCII_CHAR><ascii_char> */ 2N/A break;
/* end case ASCII_CHAR */ 2N/A /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 2N/A break;
/* end case MULTIBYTE_CHAR */ 2N/A case ANY_CHAR:
/* any single ASCII or multibyte char */ 2N/A /* encoded as <ANY_CHAR> */ 2N/A break;
/* end case ANY_CHAR */ 2N/A * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 2N/A * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A break;
/* end case IN_ASCII_CHAR_CLASS */ 2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A break;
/* end case IN_MULTIBYTE_CHAR_CLASS */ 2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A break;
/* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 2N/A /* encoded as <SIMPLE_GROUP><group_length> */ 2N/A break;
/* end case SIMPLE_GROUP */ 2N/A /* encoded as <END_GROUP><groupn> */ 2N/A break;
/* end case END_GROUP */ 2N/A /* encoded as <SAVED_GROUP><substringn> */ 2N/A break;
/* end case SAVED_GROUP */ 2N/A * encoded as <END_SAVED_GROUP><substringn>\ 2N/A * <return_arg_number[substringn]> 2N/A break;
/* end case END_SAVED_GROUP */ 2N/A /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 2N/A /* end case ASCII_CHAR|ZERO_OR_MORE */ 2N/A /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 2N/A /* end case ASCII_CHAR|ONE_OR_MORE */ 2N/A * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 2N/A * <minimum_match_count><maximum_match_count> 2N/A /* end case ASCII_CHAR|COUNT */ 2N/A /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 2N/A /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 2N/A /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 2N/A /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 2N/A * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 2N/A * <minimum_match_count><maximum_match_count> 2N/A /* end case MULTIBYTE_CHAR|COUNT */ 2N/A /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 2N/A /* end case <ANY_CHAR|ZERO_OR_MORE> */ 2N/A /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 2N/A /* end case <ANY_CHAR|ONE_OR_MORE> */ 2N/A * encoded as <ANY_CHAR|COUNT>\ 2N/A * <minimum_match_count><maximum_match_count> 2N/A }
else {
/* multibyte character */ 2N/A }
/* end case ANY_CHAR|COUNT */ 2N/A * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 2N/A * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 2N/A * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 2N/A * <class ...><minimum_match_count>\ 2N/A * <maximum_match_count> 2N/A * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 2N/A * <class ...><minimum_match_count>\ 2N/A * <maximum_match_count> 2N/A * NOTE: <class_length> includes the <class_length> byte, 2N/A * but not the <minimum_match_count> or 2N/A * <maximum_match_count> bytes 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_ASCII_CHAR_CLASS|COUNT */ 2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 2N/A * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 2N/A * <class_length><class ...><min_count><max_count> 2N/A * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 2N/A * <class_length><class ...><min_count><max_count> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A * but not the <minimum_match_count> or 2N/A * <maximum_match_count> bytes 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 2N/A * <class_length><class ...> 2N/A * NOTE: <class length> includes the <class_length> byte 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 2N/A * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 2N/A * <class ...><minimum_match_count>\ 2N/A * <maximum_match_count> 2N/A * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 2N/A * <class_length><class ...><minimum_match_count>\ 2N/A * <maximum_match_count> 2N/A * NOTE: <class_length> includes the <class_length> byte 2N/A * but not the <minimum_match_count> or 2N/A * <maximum_match_count> bytes 2N/A regexp++;
/* point to the <class_length> byte */ 2N/A /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 2N/A * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 2N/A * <group_length><compiled_regex...>\ 2N/A * <END_GROUP|ZERO_OR_MORE><groupn> 2N/A * group_length + (256 * ADDED_LENGTH_BITS) == 2N/A * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 2N/A /* end case ZERO_OR_MORE_GROUP */ 2N/A /* encoded as <END_GROUP|ZERO_OR_MORE> */ 2N/A /* return from recursive call to test_string() */ 2N/A /* end case END_GROUP|ZERO_OR_MORE */ 2N/A * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 2N/A * <group_length><compiled_regex...>\ 2N/A * <END_GROUP|ONE_OR_MORE><groupn> 2N/A * group_length + (256 * ADDED_LENGTH_BITS) == 2N/A * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 2N/A /* end case ONE_OR_MORE_GROUP */ 2N/A /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 2N/A /* return from recursive call to test_string() */ 2N/A /* end case END_GROUP|ONE_OR_MORE */ 2N/A * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 2N/A * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 2N/A * <minimum_match_count><maximum_match_count> 2N/A * group_length + (256 * ADDED_LENGTH_BITS) == 2N/A * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 2N/A * but does not include the <minimum_match_count> or 2N/A * <maximum_match_count> bytes 2N/A /* end case COUNTED_GROUP */ 2N/A /* encoded as <END_GROUP|COUNT> */ 2N/A /* return from recursive call to test_string() */ 2N/A /* end case END_GROUP|COUNT */ 2N/A /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 2N/A break;
/* end case END_OF_STRING_MARK */ 2N/A case END_REGEX:
/* end of the compiled regular expression */ 2N/A /* encoded as <END_REGEX> */ 2N/A /* end case END_REGEX */ 2N/A }
/* end switch (*regexp) */ 2N/A }
/* end for (;;) */ 2N/A}
/* test_string() */