regcomp.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/* regcomp.c
*/
/*
* "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
*/
/* NOTE: this is derived from Henry Spencer's regexp code, and should not
* confused with the original package (see point 3 below). Thanks, Henry!
*/
/* Additional note: this code is very heavily munged from Henry's version
* in places. In some spots I've traded clarity for efficiency, so don't
* blame Henry for some of the lack of readability.
*/
/* The names of the functions have been changed from regcomp and
* regexec to pregcomp and pregexec in order to avoid conflicts
* with the POSIX routines of the same names.
*/
#ifdef PERL_EXT_RE_BUILD
/* need to replace pregcomp et al, so enable that */
# ifndef PERL_IN_XSUB_RE
# define PERL_IN_XSUB_RE
# endif
/* need access to debugger hooks */
# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
# define DEBUGGING
# endif
#endif
#ifdef PERL_IN_XSUB_RE
/* We *really* need to overwrite these symbols: */
# define Perl_pregcomp my_regcomp
# define Perl_regdump my_regdump
# define Perl_regprop my_regprop
# define Perl_pregfree my_regfree
# define Perl_re_intuit_string my_re_intuit_string
/* *These* symbols are masked to allow static link. */
# define Perl_regnext my_regnext
# define Perl_save_re_context my_save_re_context
# define Perl_reginitcolors my_reginitcolors
# define PERL_NO_GET_CONTEXT
#endif
/*SUPPRESS 112*/
/*
* pregcomp and pregexec -- regsub and regerror are not used in perl
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. The author is not responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*
*
**** Alterations to Henry's code are...
****
**** Copyright (c) 1991-2001, Larry Wall
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
*
* Beware that some of this code is subtly aware of the way operator
* precedence is structured in regular expressions. Serious changes in
* regular-expression syntax might require a total rethink.
*/
#include "EXTERN.h"
#define PERL_IN_REGCOMP_C
#include "perl.h"
#ifdef PERL_IN_XSUB_RE
# if defined(PERL_CAPI) || defined(PERL_OBJECT)
# include "XSUB.h"
# endif
#else
# include "INTERN.h"
#endif
#define REG_COMP_C
#include "regcomp.h"
#ifdef op
#endif /* op */
#ifdef MSDOS
# if defined(BUGGY_MSC6)
/* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
# endif /* BUGGY_MSC6 */
#endif /* MSDOS */
#ifndef STATIC
#define STATIC static
#endif
((*s) == '{' && regcurly(s)))
#ifdef SPSTART
#endif
/*
* Flags to be passed up and down.
*/
#define WORST 0 /* Worst case. */
/* Length of a variant. */
typedef struct scan_data_t {
SV *last_found;
struct regnode_charclass_class *start_class;
} scan_data_t;
/*
* Forward declarations for pregcomp()'s friends.
*/
static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0};
#define SF_BEFORE_SEOL 0x1
#define SF_BEFORE_MEOL 0x2
#ifdef NO_UNARY_PLUS
# define SF_FIX_SHIFT_EOL (0+2)
# define SF_FL_SHIFT_EOL (0+4)
#else
# define SF_FIX_SHIFT_EOL (+2)
# define SF_FL_SHIFT_EOL (+4)
#endif
#define SF_IS_INF 0x40
#define SF_HAS_PAR 0x80
#define SF_IN_PAR 0x100
#define SF_HAS_EVAL 0x200
#define SCF_DO_SUBSTR 0x400
#define SCF_DO_STCLASS_AND 0x0800
#define SCF_DO_STCLASS_OR 0x1000
#define SCF_WHILEM_VISITED_POS 0x2000
#define RF_utf8 8
#define OOB_CHAR8 1234
#define OOB_UTF8 123456
#define OOB_NAMEDCLASS -1
/* length of regex to show in messages that don't mark a position within */
#define RegexLengthToShowInErrorMessages 127
/*
* If MARKER[12] are adjusted, be sure to adjust the constants at the top
*/
/*
* Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
* arg. Show regex, up to a maximum length. If it's too long, chop and add
* "...".
*/
STMT_START { \
char *ellipses = ""; \
\
if (!SIZE_ONLY) \
\
if (len > RegexLengthToShowInErrorMessages) { \
/* chop 10 shorter than the max, to ensure meaning of "..." */ \
ellipses = "..."; \
} \
} STMT_END
/*
* Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
* args. Show regex, up to a maximum length. If it's too long, chop and add
* "...".
*/
STMT_START { \
char *ellipses = ""; \
\
if (!SIZE_ONLY) \
\
if (len > RegexLengthToShowInErrorMessages) { \
/* chop 10 shorter than the max, to ensure meaning of "..." */ \
ellipses = "..."; \
} \
} STMT_END
/*
* Simple_vFAIL -- like FAIL, but marks the current location in the scan
*/
#define Simple_vFAIL(m) \
STMT_START { \
\
} STMT_END
/*
* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
*/
#define vFAIL(m) \
STMT_START { \
if (!SIZE_ONLY) \
Simple_vFAIL(m); \
} STMT_END
/*
* Like Simple_vFAIL(), but accepts two arguments.
*/
#define Simple_vFAIL2(m,a1) \
STMT_START { \
\
} STMT_END
/*
* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
*/
STMT_START { \
if (!SIZE_ONLY) \
Simple_vFAIL2(m, a1); \
} STMT_END
/*
* Like Simple_vFAIL(), but accepts three arguments.
*/
STMT_START { \
\
} STMT_END
/*
* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
*/
STMT_START { \
if (!SIZE_ONLY) \
} STMT_END
/*
* Like Simple_vFAIL(), but accepts four arguments.
*/
STMT_START { \
\
} STMT_END
/*
* Like Simple_vFAIL(), but accepts five arguments.
*/
STMT_START { \
} STMT_END
STMT_START { \
} STMT_END \
STMT_START { \
a1, \
} STMT_END
STMT_START { \
} STMT_END
STMT_START { \
} STMT_END
/* Allow for side effects in s */
/* Mark that we cannot extend a found fixed substring at this point.
Updata the longest found anchored substring and the longest found
floating substrings if needed. */
STATIC void
{
else
}
else {
data->offset_float_max = (l
else
}
}
}
/* Can match anything (initialization) */
STATIC void
{
int value;
if (LOC)
}
/* Can match anything (initialization) */
STATIC int
{
int value;
return 1;
return 0;
return 1;
}
/* Can match anything (initialization) */
STATIC void
{
}
STATIC void
{
if (LOC)
}
/* 'And' a given class with another one. Can create false positives */
/* We assume that cl is not inverted */
STATIC void
struct regnode_charclass_class *and_with)
{
int i;
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
else
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
} /* XXXX: logic is complicated otherwise, leave it along for a moment. */
}
/* 'OR' a given class with another one. Can create false positives */
/* We assume that cl is not inverted */
STATIC void
{
/* We do not use
* (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2))
* <= (B1 | !B2) | (CL1 | !CL2)
* which is wasteful if CL2 is small, but we ignore CL2:
* (B1 | CL1) | (!B2 & !CL2) <= (B1 | CL1) | !B2 = (B1 | !B2) | CL1
* XXXX Can we handle case-fold? Unclear:
* (OK1(i) | OK1(i')) | !(OK1(i) | OK1(i')) =
* (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
*/
int i;
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
} /* XXXX: logic is complicated otherwise */
else {
}
} else {
/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
int i;
/* OR char bitmap and class bitmap separately */
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
}
}
else { /* XXXX: logic is complicated, leave it along for a moment. */
}
}
}
/* REx optimizer. Converts nodes into quickier variants "in place".
Finds fixed substrings. */
/* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set
to the position after last scanned or to NULL. */
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
{
int is_inf_internal = 0; /* The studied chunk is infinite */
/* Peephole optimizer: */
/* Merge several consecutive EXACTish nodes into one. */
#ifdef DEBUGGING
#endif
/* Skip NOTHING, merge EXACT*. */
while (n &&
&& NEXT_OFF(n)
stringok = 0;
next = n + NODE_STEP_REGNODE;
#ifdef DEBUGGING
if (stringok)
stop = n;
#endif
n = regnext(n);
}
else if (stringok) {
break;
next = n + NODE_SZ_STR(n);
/* Now we can overwrite *n : */
#ifdef DEBUGGING
#endif
n = nnext;
}
}
#ifdef DEBUGGING
/* Allow dumping */
while (n <= stop) {
NEXT_OFF(n) = 0;
}
n++;
}
#endif
}
/* Follow the next-chain of the current node and optimize
away all the NOTHINGs from it. */
? I32_MAX
/* I32 may be smaller than U16 on CRAYs! */
int noff;
/* Skip NOTHING and LONGJMP. */
while ((n = regnext(n))
else
}
/* The principal pseudo-switch. Cannot be a switch, since we
look into several different things. */
struct regnode_charclass_class accum;
if (flags & SCF_DO_STCLASS)
struct regnode_charclass_class this_class;
num++;
if (data) {
}
else
if (flags & SCF_DO_STCLASS) {
f = SCF_DO_STCLASS_AND;
}
if (flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
/* we suppose the run is continuous, last=next...*/
&data_fake, f);
pars++;
if (data)
if (flags & SCF_DO_STCLASS)
break;
}
min1 = 0;
if (flags & SCF_DO_SUBSTR) {
}
if (flags & SCF_DO_STCLASS_OR) {
if (min1) {
flags &= ~SCF_DO_STCLASS;
}
}
else if (flags & SCF_DO_STCLASS_AND) {
if (min1) {
flags &= ~SCF_DO_STCLASS;
}
else {
/* Switch to OR mode: cache the old value of
* data->start_class */
struct regnode_charclass_class);
flags &= ~SCF_DO_STCLASS_AND;
struct regnode_charclass_class);
}
}
}
else /* single branch is optimized. */
continue;
}
if (UTF) {
unsigned char *e = s + l;
while (s < e) {
newl++;
s += UTF8SKIP(s);
}
l = newl;
}
min += l;
/* The code below prefers earlier match for fixed
offset, later match for variable offset. */
}
}
if (flags & SCF_DO_STCLASS_AND) {
/* Check whether it is compatible with what we know already! */
int compat = 1;
compat = 0;
if (compat)
}
else if (flags & SCF_DO_STCLASS_OR) {
/* false positive possible if the class is case-folded */
}
flags &= ~SCF_DO_STCLASS;
}
/* Search for fixed substrings supports EXACT only. */
if (flags & SCF_DO_SUBSTR)
if (UTF) {
unsigned char *e = s + l;
while (s < e) {
newl++;
s += UTF8SKIP(s);
}
l = newl;
}
min += l;
if (flags & SCF_DO_STCLASS_AND) {
/* Check whether it is compatible with what we know already! */
int compat = 1;
compat = 0;
if (compat) {
}
}
else if (flags & SCF_DO_STCLASS_OR) {
/* false positive possible if the class is case-folded.
Assume that the locale settings are the same... */
}
}
flags &= ~SCF_DO_STCLASS;
}
struct regnode_charclass_class this_class;
case WHILEM: /* End of (?:...)* . */
goto finish;
case PLUS:
mincount = 1;
goto do_curly;
}
}
if (flags & SCF_DO_SUBSTR)
min++;
/* Fall through. */
case STAR:
if (flags & SCF_DO_STCLASS) {
mincount = 0;
goto do_curly;
}
if (flags & SCF_DO_SUBSTR) {
}
goto optimize_curly_tail;
case CURLY:
}
if (flags & SCF_DO_SUBSTR) {
}
if (data) {
if (is_inf)
}
if (flags & SCF_DO_STCLASS) {
f |= SCF_DO_STCLASS_AND;
f &= ~SCF_DO_STCLASS_OR;
}
/* These are the cases when once a subexpression
fails at a particular position, it cannot succeed
even after backtracking at the enclosing scope.
XXXX what if minimal match and we are at the
initial run of {n,m}? */
f &= ~SCF_WHILEM_VISITED_POS;
/* This will finish on WHILEM, setting scan, or on NULL: */
mincount == 0
? (f & ~SCF_DO_SUBSTR) : f);
if (flags & SCF_DO_STCLASS)
if (flags & SCF_DO_STCLASS_OR) {
}
else if (flags & SCF_DO_STCLASS_AND) {
/* Switch to OR mode: cache the old value of
* data->start_class */
struct regnode_charclass_class);
flags &= ~SCF_DO_STCLASS_AND;
struct regnode_charclass_class);
}
} else { /* Non-zero len */
if (flags & SCF_DO_STCLASS_OR) {
}
else if (flags & SCF_DO_STCLASS_AND)
flags &= ~SCF_DO_STCLASS;
}
if (!scan) /* It was not CURLYX, but CURLY. */
{
"Quantifier unexpected on zero-length expression");
}
/* Try powerful optimization CURLYX => CURLYN. */
/* Try to optimize to CURLYN. */
/* Skip open. */
goto nogo;
goto nogo;
/* Now we know that nxt2 is the only contents: */
#ifdef DEBUGGING
#endif
}
nogo:
/* Try optimization CURLYX => CURLYM. */
&& !deltanext ) {
/* XXXX How to optimize if data == 0? */
/* Optimize to a simpler form. */
/* Need to optimize away parenths. */
/* Set the parenth number. */
FAIL("Panic opt close");
#ifdef DEBUGGING
#endif
#if 0
else
}
}
#endif
/* Optimize again: */
}
else
}
&& (flags & SCF_WHILEM_VISITED_POS)
/* See the comment on a similar expression above.
However, this time it not a subexpression
we care about, but the expression itself. */
/* Find WHILEM (as in regexec.c) */
}
pars++;
if (flags & SCF_DO_SUBSTR) {
STRLEN l;
if (UTF)
l -= old;
/* Get the added string: */
if (deltanext == 0 && pos_before == b) {
/* What was added is a constant string */
if (mincount > 1) {
/* Add additional parts. */
}
} else {
/* start offset must point into the last copy */
}
}
/* It is counted once already... */
/* Cannot extend fixed substrings found inside
the group. */
? I32_MAX
}
}
}
}
continue;
default: /* REF and CLUMP only? */
if (flags & SCF_DO_SUBSTR) {
}
if (flags & SCF_DO_STCLASS_OR)
flags &= ~SCF_DO_STCLASS;
break;
}
}
int value;
if (flags & SCF_DO_SUBSTR) {
}
min++;
if (flags & SCF_DO_STCLASS) {
/* Some of the logic below assumes that switching
locale on will only add false positives. */
case ANYUTF8:
case SANY:
case SANYUTF8:
case ALNUMUTF8:
case ANYOFUTF8:
case ALNUMLUTF8:
case NALNUMUTF8:
case NALNUMLUTF8:
case SPACEUTF8:
case NSPACEUTF8:
case SPACELUTF8:
case NSPACELUTF8:
case DIGITUTF8:
case NDIGITUTF8:
default:
/* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
break;
case REG_ANY:
goto do_default;
}
break;
case ANYOF:
if (flags & SCF_DO_STCLASS_AND)
(struct regnode_charclass_class*)scan);
else
(struct regnode_charclass_class*)scan);
break;
case ALNUM:
if (flags & SCF_DO_STCLASS_AND) {
}
}
else {
else {
}
}
break;
case ALNUML:
if (flags & SCF_DO_STCLASS_AND) {
}
else {
}
break;
case NALNUM:
if (flags & SCF_DO_STCLASS_AND) {
}
}
else {
else {
}
}
break;
case NALNUML:
if (flags & SCF_DO_STCLASS_AND) {
}
else {
}
break;
case SPACE:
if (flags & SCF_DO_STCLASS_AND) {
}
}
else {
else {
}
}
break;
case SPACEL:
if (flags & SCF_DO_STCLASS_AND) {
}
else {
}
break;
case NSPACE:
if (flags & SCF_DO_STCLASS_AND) {
}
}
else {
else {
}
}
break;
case NSPACEL:
if (flags & SCF_DO_STCLASS_AND) {
}
}
else {
}
break;
case DIGIT:
if (flags & SCF_DO_STCLASS_AND) {
}
else {
else {
}
}
break;
case NDIGIT:
if (flags & SCF_DO_STCLASS_AND) {
}
else {
else {
}
}
break;
}
if (flags & SCF_DO_STCLASS_OR)
flags &= ~SCF_DO_STCLASS;
}
}
: SF_BEFORE_SEOL);
}
/* Lookahead/lookbehind */
struct regnode_charclass_class intrnl;
int f = 0;
if (data) {
}
else
f |= SCF_DO_STCLASS_AND;
}
if (flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
if (deltanext) {
vFAIL("Variable length lookbehind not implemented");
}
}
}
pars++;
if (data)
if (f & SCF_DO_STCLASS_AND) {
if (was)
}
}
pars++;
}
is_par = 0; /* Disable optimization */
}
if (data)
}
if (data)
}
if (flags & SCF_DO_SUBSTR) {
}
flags &= ~SCF_DO_STCLASS;
}
/* Else: zero-length, ignore. */
}
is_par = 0;
}
}
if (flags & SCF_DO_STCLASS_OR)
return min;
}
{
if (PL_regcomp_rx->data) {
char, struct reg_data);
}
else {
char, struct reg_data);
}
}
void
{
int i = 0;
char *s = PerlEnv_getenv("PERL_RE_COLORS");
if (s) {
while (++i < 6) {
s = strchr(s, '\t');
if (s) {
*s = '\0';
PL_colors[i] = ++s;
}
else
PL_colors[i] = s = "";
}
} else {
while (i < 6)
PL_colors[i++] = "";
}
PL_colorset = 1;
}
/*
- pregcomp - compile a regular expression into internal code
*
* We can't allocate space until we know how big the compiled form will be,
* but we can't compile it (and thus know how big it is) until we've got a
* place to put the code. So we cheat: we compile it twice, once with code
* generation turned off and size counting turned on, and once "for real".
* This also means that we don't allocate space until we are sure that the
* thing really will compile successfully, and we never have to move the
* code and thus invalidate pointers into it. (Note that it has to be in
* one piece because free() must be able to free it all.) [NB: not true in perl]
*
* Beware that the optimization-preparation code in here knows about some
* of the structure of the compiled regexp. [I'll say.]
*/
regexp *
{
register regexp *r;
FAIL("NULL regexp argument");
PL_reg_flags |= RF_utf8;
}
else
PL_reg_flags = 0;
PL_regprecomp = exp;
PL_regsawback = 0;
PL_regseen = 0;
PL_seen_evals = 0;
PL_extralen = 0;
/* First pass: determine size, legality. */
PL_regxend = xend;
PL_regnaughty = 0;
PL_regnpar = 1;
PL_regsize = 0L;
PL_reg_whilem_seen = 0;
#if 0 /* REGC() is (currently) a NOP at the first pass.
* Clever compilers notice this and complain. --jhi */
#endif
return(NULL);
}
/* Small enough for pointer-storage convention?
If extralen==0, this means that we will not need long jumps. */
else
PL_extralen = 0;
if (PL_reg_whilem_seen > 15)
PL_reg_whilem_seen = 15;
/* Allocate space and initialize. */
char, regexp);
if (r == NULL)
FAIL("Regexp out of space");
#ifdef DEBUGGING
/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
#endif
r->refcnt = 1;
r->substrs = 0; /* Useful during FAIL. */
r->startp = 0; /* Useful during FAIL. */
r->endp = 0; /* Useful during FAIL. */
PL_regcomp_rx = r;
/* Second pass: emit code. */
PL_regxend = xend;
PL_regnaughty = 0;
PL_regnpar = 1;
PL_regcode = r->program;
/* Store the count of eval-groups for security checks: */
r->data = 0;
return(NULL);
/* Dig out information for optimizations. */
if (UTF)
r->regstclass = NULL;
r->reganch |= ROPT_NAUGHTY;
/* XXXX To minimize changes to RE engine we always allocate
3-units-long substrs field. */
/* XXXX Should not we check for something else? Usually it is OPEN1... */
struct regnode_charclass_class ch_class;
int stclass_flag;
I32 last_close = 0;
/* Skip introductions and multiplicators >= 1. */
/* An OR of *one* alternative - should not happen now. */
/* An {n,m} with n>0 */
sawplus = 1;
else
}
/* Starting-point info. */
&& !UTF)
r->regstclass = first;
}
r->regstclass = first;
r->regstclass = first;
: ROPT_ANCH_BOL));
goto again;
}
r->reganch |= ROPT_ANCH_GPOS;
goto again;
}
{
/* turn .* into ^.* with an implied $*=1 */
else
goto again;
}
/* x+ must match at the 1st pos of run of x's */
/* Scan is after the zeroth branch, first is atomic matcher. */
/*
* If there's something expensive in the r.e., find the
* longest literal string that must appear and make it the
* regmust. Resolve ties in favor of later strings, since
* the regstart check works with the beginning of the r.e.
* and avoiding duplication strengthens checking. Not a
* strong reason, but sufficient in the absence of others.
* [Now we resolve ties in favor of the earlier string if
* it happens that c_offset_min has been invalidated, since the
* earlier string may buy us something the later one won't.]
*/
minlen = 0;
if (!r->regstclass) {
} else /* XXXX Check for BOUND? */
stclass_flag = 0;
&& !PL_seen_zerolen
r->reganch |= ROPT_CHECK_ALL;
scan_commit(&data);
|| (PL_regflags & PMf_MULTILINE)))) {
int t;
goto remove_float; /* As in (a)+. */
|| (PL_regflags & PMf_MULTILINE)));
}
else {
r->float_substr = Nullsv;
longest_float_length = 0;
}
|| (PL_regflags & PMf_MULTILINE)))) {
int t;
|| (PL_regflags & PMf_MULTILINE)));
}
else {
r->anchored_substr = Nullsv;
longest_fixed_length = 0;
}
if (r->regstclass
r->regstclass = NULL;
struct regnode_charclass_class);
struct regnode_charclass_class);
}
/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
if (longest_fixed_length > longest_float_length) {
r->check_substr = r->anchored_substr;
if (r->reganch & ROPT_ANCH_SINGLE)
r->reganch |= ROPT_NOSCAN;
}
else {
r->check_substr = r->float_substr;
}
/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
This should be changed ASAP! */
r->reganch |= RE_USE_INTUIT;
if (SvTAIL(r->check_substr))
r->reganch |= RE_INTUIT_TAIL;
}
}
else {
/* Several toplevels. Best we can is to set minlen. */
struct regnode_charclass_class ch_class;
I32 last_close = 0;
minlen = study_chunk(&scan, &fake, scan + PL_regsize, &data, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS);
struct regnode_charclass_class);
struct regnode_charclass_class);
}
}
if (PL_regseen & REG_SEEN_GPOS)
r->reganch |= ROPT_GPOS_SEEN;
if (PL_regseen & REG_SEEN_LOOKBEHIND)
r->reganch |= ROPT_LOOKBEHIND_SEEN;
if (PL_regseen & REG_SEEN_EVAL)
r->reganch |= ROPT_EVAL_SEEN;
return(r);
}
/*
- reg - regular expression, i.e. main body or parenthesized thing
*
* Caller must absorb opening parenthesis.
*
* Combining parenthesis handling with the base level of regular expression
* is a trifle forced, but the need to tie the tails of the branches to what
* follows makes it hard to avoid.
*/
/* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
{
char *oregcomp_parse = PL_regcomp_parse;
char c;
*flagp = 0; /* Tentatively. */
/* Make an OPEN node, if parenthesized. */
if (paren) {
if (*PL_regcomp_parse == '?') {
int logical = 0;
char *seqstart = PL_regcomp_parse;
paren = *PL_regcomp_parse++;
switch (paren) {
case '<':
if (*PL_regcomp_parse == '!')
paren = ',';
goto unknown;
case '=':
case '!':
case ':':
case '>':
break;
case '$':
case '@':
break;
case '#':
if (*PL_regcomp_parse != ')')
FAIL("Sequence (?#... not terminated");
nextchar();
return NULL;
case 'p':
if (SIZE_ONLY)
/* FALL THROUGH*/
case '?':
logical = 1;
paren = *PL_regcomp_parse++;
/* FALL THROUGH */
case '{':
{
char c;
char *s = PL_regcomp_parse;
while (count && (c = *PL_regcomp_parse)) {
else if (c == '{')
count++;
else if (c == '}')
count--;
}
if (*PL_regcomp_parse != ')')
{
PL_regcomp_parse = s;
vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
}
if (!SIZE_ONLY) {
if (PL_regcomp_parse - 1 - s)
else
}
else { /* First pass */
if (PL_reginterp_cnt < ++PL_seen_evals
&& PL_curcop != &PL_compiling)
/* No compiled RE interpolated, has runtime
components ===> unsafe. */
FAIL("Eval-group not allowed at runtime, use re 'eval'");
if (PL_tainting && PL_tainted)
FAIL("Eval-group in insecure regular expression");
}
nextchar();
if (logical) {
if (!SIZE_ONLY)
return ret;
}
}
case '(':
{
if (PL_regcomp_parse[0] == '?') {
if (!SIZE_ONLY)
goto insert_if;
}
}
while (isDIGIT(*PL_regcomp_parse))
if ((c = *nextchar()) != ')')
vFAIL("Switch condition not recognized");
else
c = *nextchar();
if (c == '|') {
c = *nextchar();
}
else
if (c != ')')
vFAIL("Switch (?(condition)... contains too many branches");
if (lastbr) {
}
else
return ret;
}
else {
}
}
case 0:
PL_regcomp_parse--; /* for vFAIL to print correctly */
vFAIL("Sequence (? incomplete");
break;
default:
if (*PL_regcomp_parse != 'o')
}
if (*PL_regcomp_parse == '-') {
goto parse_flags;
}
PL_regflags |= posflags;
PL_regflags &= ~negflags;
if (*PL_regcomp_parse == ':') {
paren = ':';
break;
}
if (*PL_regcomp_parse != ')') {
}
nextchar();
return NULL;
}
}
else {
parno = PL_regnpar;
PL_regnpar++;
open = 1;
}
}
else
/* Pick up the branches, linking them together. */
return(NULL);
if (*PL_regcomp_parse == '|') {
if (!SIZE_ONLY && PL_extralen) {
}
else
have_branch = 1;
if (SIZE_ONLY)
}
else if (paren == ':') {
}
if (open) { /* Starts with OPEN. */
}
while (*PL_regcomp_parse == '|') {
if (!SIZE_ONLY && PL_extralen) {
}
if (SIZE_ONLY)
nextchar();
return(NULL);
}
/* Make a closing node, and hook it on the end. */
switch (paren) {
case ':':
break;
case 1:
break;
case '<':
case ',':
case '=':
case '!':
/* FALL THROUGH */
case '>':
break;
case 0:
break;
}
if (have_branch) {
/* Hook the tails of the branches to the closing node. */
}
}
}
{
char *p;
static char parens[] = "=!<,>";
if (paren == '>')
}
}
/* Check for proper termination. */
if (paren) {
vFAIL("Unmatched (");
}
}
if (*PL_regcomp_parse == ')') {
vFAIL("Unmatched )");
}
else
/* NOTREACHED */
}
return(ret);
}
/*
- regbranch - one alternative of an | operator
*
* Implements the concatenation operator.
*/
{
if (first)
else {
if (!SIZE_ONLY && PL_extralen)
else
}
nextchar();
continue;
return(NULL);
}
else {
}
c++;
}
}
if (c == 1) {
}
return(ret);
}
/*
- regpiece - something followed by possible [*+?]
*
* Note that the branching code sequences used for ? and the general cases
* of * and + are somewhat optimized: they use the same NOTHING node as
* both the endmarker for their branch list and the body of the last branch.
* It might seem that this node could be dispensed with entirely, but the
* endmarker role is not redundant.
*/
{
register char op;
register char *next;
char *origparse = PL_regcomp_parse;
char *maxpos;
return(NULL);
}
op = *PL_regcomp_parse;
if (*next == ',') {
if (maxpos)
break;
else
}
next++;
}
if (!maxpos)
if (*maxpos == ',')
maxpos++;
else
nextchar();
}
else {
w->flags = 0;
if (!SIZE_ONLY && PL_extralen) {
}
if (!SIZE_ONLY && PL_extralen)
if (SIZE_ONLY)
}
if (min > 0)
if (max > 0)
vFAIL("Can't do {n,m} with n > m");
if (!SIZE_ONLY) {
}
goto nest_check;
}
}
return(ret);
}
#if 0 /* Now runtime fix should be reliable. */
/* if this is reinstated, don't forget to put this back into perldiag:
=item Regexp *+ operand could be empty at {#} in regex m/%s/
(F) The part of the regexp subject to either the * or + quantifier
could match an empty string. The {#} shows in the regular
expression about where the problem was discovered.
*/
vFAIL("Regexp *+ operand could be empty");
#endif
nextchar();
PL_regnaughty += 4;
}
else if (op == '*') {
min = 0;
goto do_curly;
}
PL_regnaughty += 3;
}
else if (op == '+') {
min = 1;
goto do_curly;
}
else if (op == '?') {
goto do_curly;
}
"%.*s matches null string many times",
}
if (*PL_regcomp_parse == '?') {
nextchar();
}
if (ISMULT2(PL_regcomp_parse)) {
vFAIL("Nested quantifiers");
}
return(ret);
}
/*
- regatom - the lowest level
*
* Optimization: gobbles an entire sequence of ordinary characters so that
* it can turn them into a single node, which is smaller to store and
* faster to run. Backslashed characters are exceptions, each becoming a
* separate node; the code is simpler that way and it's not worth fixing.
*
* [Yes, it is worth fixing, some scripts can run twice the speed.] */
{
switch (*PL_regcomp_parse) {
case '^':
nextchar();
if (PL_regflags & PMf_MULTILINE)
else if (PL_regflags & PMf_SINGLELINE)
else
break;
case '$':
nextchar();
if (*PL_regcomp_parse)
if (PL_regflags & PMf_MULTILINE)
else if (PL_regflags & PMf_SINGLELINE)
else
break;
case '.':
nextchar();
if (UTF) {
if (PL_regflags & PMf_SINGLELINE)
else
}
else {
if (PL_regflags & PMf_SINGLELINE)
else
}
break;
case '[':
{
char *oregcomp_parse = ++PL_regcomp_parse;
if (*PL_regcomp_parse != ']') {
vFAIL("Unmatched [");
}
nextchar();
break;
}
case '(':
nextchar();
if (PL_regcomp_parse == PL_regxend) {
/* Make parent create an empty node if needed. */
return(NULL);
}
goto tryagain;
}
return(NULL);
}
break;
case '|':
case ')':
return NULL;
}
vFAIL("Internal urp");
/* Supposed to be caught earlier. */
break;
case '{':
if (!regcurly(PL_regcomp_parse)) {
goto defchar;
}
/* FALL THROUGH */
case '?':
case '+':
case '*':
vFAIL("Quantifier follows nothing");
break;
case '\\':
switch (*++PL_regcomp_parse) {
case 'A':
nextchar();
break;
case 'G':
nextchar();
break;
case 'Z':
nextchar();
break;
case 'z':
PL_seen_zerolen++; /* Do not optimize RE away */
nextchar();
break;
case 'C':
nextchar();
break;
case 'X':
nextchar();
if (UTF && !PL_utf8_mark)
break;
case 'w':
nextchar();
if (UTF && !PL_utf8_alnum)
break;
case 'W':
nextchar();
if (UTF && !PL_utf8_alnum)
break;
case 'b':
nextchar();
if (UTF && !PL_utf8_alnum)
break;
case 'B':
nextchar();
if (UTF && !PL_utf8_alnum)
break;
case 's':
nextchar();
if (UTF && !PL_utf8_space)
break;
case 'S':
nextchar();
if (UTF && !PL_utf8_space)
break;
case 'd':
nextchar();
if (UTF && !PL_utf8_digit)
break;
case 'D':
nextchar();
if (UTF && !PL_utf8_digit)
break;
case 'p':
case 'P':
{ /* a lovely hack--pretend we saw [\pX] instead */
char* oldregxend = PL_regxend;
if (!PL_regxend) {
PL_regcomp_parse += 2;
vFAIL("Missing right brace on \\p{}");
}
PL_regxend++;
}
else
ret = regclassutf8();
nextchar();
}
break;
case 'n':
case 'r':
case 't':
case 'f':
case 'e':
case 'a':
case 'x':
case 'c':
case '0':
goto defchar;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
goto defchar;
else {
while (isDIGIT(*PL_regcomp_parse))
vFAIL("Reference to nonexistent group");
PL_regsawback = 1;
nextchar();
}
}
break;
case '\0':
if (PL_regcomp_parse >= PL_regxend)
FAIL("Trailing \\");
/* FALL THROUGH */
default:
/* Do not generate `unrecognized' warnings here, we fall
back into the quick-grab loop below */
goto defchar;
}
break;
case '#':
if (PL_regflags & PMf_EXTENDED) {
if (PL_regcomp_parse < PL_regxend)
goto tryagain;
}
/* FALL THROUGH */
default: {
register char *p;
char *oldp, *s;
: EXACT);
len++)
{
oldp = p;
if (PL_regflags & PMf_EXTENDED)
p = regwhite(p, PL_regxend);
switch (*p) {
case '^':
case '$':
case '.':
case '[':
case '(':
case ')':
case '|':
goto loopdone;
case '\\':
switch (*++p) {
case 'A':
case 'G':
case 'Z':
case 'z':
case 'w':
case 'W':
case 'b':
case 'B':
case 's':
case 'S':
case 'd':
case 'D':
case 'p':
case 'P':
--p;
goto loopdone;
case 'n':
ender = '\n';
p++;
break;
case 'r':
ender = '\r';
p++;
break;
case 't':
ender = '\t';
p++;
break;
case 'f':
ender = '\f';
p++;
break;
case 'e':
#ifdef ASCIIish
ender = '\033';
#else
ender = '\047';
#endif
p++;
break;
case 'a':
#ifdef ASCIIish
ender = '\007';
#else
ender = '\057';
#endif
p++;
break;
case 'x':
if (*++p == '{') {
char* e = strchr(p, '}');
if (!e) {
PL_regcomp_parse = p + 1;
vFAIL("Missing right brace on \\x{}");
}
else {
/* numlen is generous */
p--;
goto loopdone;
}
p = e + 1;
}
}
else {
numlen = 0; /* disallow underscores */
p += numlen;
}
break;
case 'c':
p++;
break;
case '0': case '1': case '2': case '3':case '4':
case '5': case '6': case '7': case '8':case '9':
if (*p == '0' ||
numlen = 0; /* disallow underscores */
p += numlen;
}
else {
--p;
goto loopdone;
}
break;
case '\0':
if (p >= PL_regxend)
FAIL("Trailing \\");
/* FALL THROUGH */
default:
goto normal_default;
}
break;
default:
if (UTF8_IS_START(*p) && UTF) {
&numlen, 0);
p += numlen;
}
else
ender = *p++;
break;
}
if (PL_regflags & PMf_EXTENDED)
p = regwhite(p, PL_regxend);
if (LOC)
else
}
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
/* ender is a Unicode value so it can be > 0xff --
* in other words, do not use UTF8_IS_CONTINUED(). */
s += numlen;
}
else {
len++;
}
break;
}
/* ender is a Unicode value so it can be > 0xff --
* in other words, do not use UTF8_IS_CONTINUED(). */
s += numlen;
}
else
}
PL_regcomp_parse = p - 1;
nextchar();
{
/* len is STRLEN which is unsigned, need to copy to signed */
if (iv < 0)
vFAIL("Internal disaster");
}
if (len > 0)
if (len == 1)
if (!SIZE_ONLY)
if (SIZE_ONLY)
else
}
break;
}
return(ret);
}
STATIC char *
S_regwhite(pTHX_ char *p, char *e)
{
while (p < e) {
if (isSPACE(*p))
++p;
else if (*p == '#') {
do {
p++;
} while (p < e && *p != '\n');
}
else
break;
}
return p;
}
/* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
Character classes ([:foo:]) can also be negated ([:^foo:]).
Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
but trigger warnings because they are currently unimplemented. */
{
char *posixcc = 0;
/* I smell either [: or [= or [. -- POSIX has been here, right? */
(*PL_regcomp_parse == ':' ||
*PL_regcomp_parse == '=' ||
*PL_regcomp_parse == '.')) {
char c = *PL_regcomp_parse;
char* s = PL_regcomp_parse++;
if (PL_regcomp_parse == PL_regxend)
/* Grandfather lone [:, [=, [. */
PL_regcomp_parse = s;
else {
char* t = PL_regcomp_parse++; /* skip over the c */
if (*PL_regcomp_parse == ']') {
PL_regcomp_parse++; /* skip over the ending ] */
posixcc = s + 1;
if (*s == ':') {
switch (*posixcc) {
case 'a':
break;
case 'b':
break;
case 'c':
break;
case 'd':
break;
case 'g':
break;
case 'l':
break;
case 'p':
break;
case 's':
break;
case 'u':
break;
case 'w': /* this is not POSIX, this is the Perl \w */
skip = 4;
}
break;
case 'x':
skip = 6;
}
break;
}
if (namedclass == OOB_NAMEDCLASS ||
{
Simple_vFAIL3("POSIX class [:%.*s:] unknown",
t - s - 1, s + 1);
}
} else if (!SIZE_ONLY) {
/* [[=foo=]] and [[.foo.]] are still future. */
/* adjust PL_regcomp_parse so the warning shows after
the class closes */
Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
}
} else {
/* Maternal grandfather:
* "[:" ending in ":" but not in ":]" */
PL_regcomp_parse = s;
}
}
}
return namedclass;
}
STATIC void
{
(*PL_regcomp_parse == ':' ||
*PL_regcomp_parse == '=' ||
*PL_regcomp_parse == '.')) {
char *s = PL_regcomp_parse;
char c = *s++;
while(*s && isALNUM(*s))
s++;
if (*s && c == *s && s[1] == ']') {
/* [[=foo=]] and [[.foo.]] are still future. */
if (c == '=' || c == '.')
{
/* adjust PL_regcomp_parse so the error shows after
the class closes */
;
Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
}
}
}
}
{
char *rangebegin;
bool need_class = 0;
if (SIZE_ONLY)
PL_regsize += ANYOF_SKIP;
else {
PL_regcode += ANYOF_SKIP;
if (FOLD)
if (LOC)
}
if (!SIZE_ONLY)
}
checkposixcc();
goto skipcond; /* allow 1st char to be ] or - */
if (!range)
if (value == '[')
else if (value == '\\') {
/* Some compilers cannot handle switching on 64-bit integer
* values, therefore the 'value' cannot be an UV. --jhi */
switch (value) {
#ifdef ASCIIish
#else
#endif
case 'x':
numlen = 0; /* disallow underscores */
break;
case 'c':
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
numlen = 0; /* disallow underscores */
break;
default:
break;
}
}
if (namedclass > OOB_NAMEDCLASS) {
if (!need_class && !SIZE_ONLY)
need_class = 1;
if (range) { /* a-\d, a-[:digit:] */
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP))
"False [] range \"%*.*s\"",
}
range = 0; /* this is not a true range */
}
if (!SIZE_ONLY) {
switch (namedclass) {
case ANYOF_ALNUM:
if (LOC)
else {
}
break;
case ANYOF_NALNUM:
if (LOC)
else {
}
break;
case ANYOF_SPACE:
if (LOC)
else {
}
break;
case ANYOF_NSPACE:
if (LOC)
else {
}
break;
case ANYOF_DIGIT:
if (LOC)
else {
}
break;
case ANYOF_NDIGIT:
if (LOC)
else {
}
break;
case ANYOF_NALNUMC:
if (LOC)
else {
}
break;
case ANYOF_ALNUMC:
if (LOC)
else {
}
break;
case ANYOF_ALPHA:
if (LOC)
else {
}
break;
case ANYOF_NALPHA:
if (LOC)
else {
}
break;
case ANYOF_ASCII:
if (LOC)
else {
#ifdef ASCIIish
#else /* EBCDIC */
#endif /* EBCDIC */
}
break;
case ANYOF_NASCII:
if (LOC)
else {
#ifdef ASCIIish
#else /* EBCDIC */
#endif /* EBCDIC */
}
break;
case ANYOF_BLANK:
if (LOC)
else {
}
break;
case ANYOF_NBLANK:
if (LOC)
else {
}
break;
case ANYOF_CNTRL:
if (LOC)
else {
}
break;
case ANYOF_NCNTRL:
if (LOC)
else {
}
break;
case ANYOF_GRAPH:
if (LOC)
else {
}
break;
case ANYOF_NGRAPH:
if (LOC)
else {
}
break;
case ANYOF_LOWER:
if (LOC)
else {
}
break;
case ANYOF_NLOWER:
if (LOC)
else {
}
break;
case ANYOF_PRINT:
if (LOC)
else {
}
break;
case ANYOF_NPRINT:
if (LOC)
else {
}
break;
case ANYOF_PSXSPC:
if (LOC)
else {
}
break;
case ANYOF_NPSXSPC:
if (LOC)
else {
}
break;
case ANYOF_PUNCT:
if (LOC)
else {
}
break;
case ANYOF_NPUNCT:
if (LOC)
else {
}
break;
case ANYOF_UPPER:
if (LOC)
else {
}
break;
case ANYOF_NUPPER:
if (LOC)
else {
}
break;
case ANYOF_XDIGIT:
if (LOC)
else {
}
break;
case ANYOF_NXDIGIT:
if (LOC)
else {
}
break;
default:
vFAIL("Invalid [::] class");
break;
}
if (LOC)
continue;
}
}
if (range) {
Simple_vFAIL4("Invalid [] range \"%*.*s\"",
}
range = 0;
}
else {
if (ckWARN(WARN_REGEXP))
"False [] range \"%*.*s\"",
if (!SIZE_ONLY)
} else
range = 1;
continue; /* do it next time */
}
}
/* now is the next time */
if (!SIZE_ONLY) {
#ifndef ASCIIish /* EBCDIC, for example. */
{
I32 i;
if (isLOWER(i))
ANYOF_BITMAP_SET(ret, i);
} else {
if (isUPPER(i))
ANYOF_BITMAP_SET(ret, i);
}
}
else
#endif
}
range = 0;
}
if (need_class) {
if (SIZE_ONLY)
else
}
/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
if (!SIZE_ONLY &&
}
}
}
/* optimize inverted simple patterns (e.g. [^a-z]) */
ANYOF_FLAGS(ret) = 0;
}
return ret;
}
{
register char *e;
I32 n;
char *rangebegin;
if (!SIZE_ONLY)
flags |= ANYOF_INVERT;
}
if (!SIZE_ONLY) {
if (FOLD)
flags |= ANYOF_FOLD;
if (LOC)
flags |= ANYOF_LOCALE;
}
checkposixcc();
goto skipcond; /* allow 1st char to be ] or - */
if (!range)
&numlen, 0);
if (value == '[')
else if (value == '\\') {
&numlen, 0);
/* Some compilers cannot handle switching on 64-bit integer
* values, therefore value cannot be an UV. Yes, this will
* be a problem later if we want switch on Unicode. --jhi */
switch (value) {
case 'p':
case 'P':
if (*PL_regcomp_parse == '{') {
if (!e)
vFAIL("Missing right brace on \\p{}");
n = e - PL_regcomp_parse;
}
else {
e = PL_regcomp_parse;
n = 1;
}
if (!SIZE_ONLY) {
if (value == 'p')
"+utf8::%.*s\n", (int)n, PL_regcomp_parse);
else
"!utf8::%.*s\n", (int)n, PL_regcomp_parse);
}
PL_regcomp_parse = e + 1;
continue;
#ifdef ASCIIish
#else
#endif
case 'x':
if (*PL_regcomp_parse == '{') {
if (!e)
vFAIL("Missing right brace on \\x{}");
e - PL_regcomp_parse,
&numlen);
PL_regcomp_parse = e + 1;
}
else {
numlen = 0; /* disallow underscores */
}
break;
case 'c':
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
numlen = 0; /* disallow underscores */
break;
default:
"Unrecognized escape \\%c in character class passed through",
(int)value);
break;
}
}
if (namedclass > OOB_NAMEDCLASS) {
if (range) { /* a-\d, a-[:digit:] */
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP))
"False [] range \"%*.*s\"",
/* 0x002D is Unicode for '-' */
}
range = 0;
}
if (!SIZE_ONLY) {
switch (namedclass) {
case ANYOF_ALNUM:
case ANYOF_NALNUM:
case ANYOF_ALNUMC:
case ANYOF_NALNUMC:
case ANYOF_ALPHA:
case ANYOF_NALPHA:
case ANYOF_ASCII:
case ANYOF_NASCII:
case ANYOF_CNTRL:
case ANYOF_NCNTRL:
case ANYOF_GRAPH:
case ANYOF_NGRAPH:
case ANYOF_DIGIT:
case ANYOF_NDIGIT:
case ANYOF_LOWER:
case ANYOF_NLOWER:
case ANYOF_PRINT:
case ANYOF_NPRINT:
case ANYOF_PUNCT:
case ANYOF_NPUNCT:
case ANYOF_SPACE:
case ANYOF_NSPACE:
case ANYOF_BLANK:
case ANYOF_NBLANK:
case ANYOF_PSXSPC:
case ANYOF_NPSXSPC:
case ANYOF_UPPER:
case ANYOF_NUPPER:
case ANYOF_XDIGIT:
case ANYOF_NXDIGIT:
}
continue;
}
}
if (range) {
Simple_vFAIL4("Invalid [] range \"%*.*s\"",
}
range = 0;
}
else {
if (ckWARN(WARN_REGEXP))
"False [] range \"%*.*s\"",
if (!SIZE_ONLY)
/* 0x002D is Unicode for '-' */
"002D\n");
} else
range = 1;
continue; /* do it next time */
}
}
/* now is the next time */
if (!SIZE_ONLY)
range = 0;
}
if (!SIZE_ONLY) {
#ifdef DEBUGGING
#else
#endif
}
return ret;
}
STATIC char*
{
char* retval = PL_regcomp_parse++;
for (;;) {
continue;
}
if (PL_regflags & PMf_EXTENDED) {
if (isSPACE(*PL_regcomp_parse)) {
continue;
}
else if (*PL_regcomp_parse == '#') {
continue;
}
}
return retval;
}
}
/*
- reg_node - emit a node
*/
{
ret = PL_regcode;
if (SIZE_ONLY) {
PL_regsize += 1;
return(ret);
}
PL_regcode = ptr;
return(ret);
}
/*
- reganode - emit a node with an argument
*/
{
ret = PL_regcode;
if (SIZE_ONLY) {
PL_regsize += 2;
return(ret);
}
PL_regcode = ptr;
return(ret);
}
/*
- reguni - emit (if appropriate) a Unicode character
*/
STATIC void
{
}
/*
- reginsert - insert an operator in front of already-emitted operand
*
* Means relocating the operand.
*/
STATIC void
{
/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
if (SIZE_ONLY) {
return;
}
src = PL_regcode;
dst = PL_regcode;
}
/*
- regtail - set the next-pointer at the end of a node chain of p to val.
*/
STATIC void
{
if (SIZE_ONLY)
return;
/* Find last node. */
scan = p;
for (;;) {
break;
}
}
else {
}
}
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
STATIC void
{
/* "Operandless" and "op != BRANCH" are synonymous in practice. */
return;
}
}
else
return;
}
/*
- regcurly - a little FSA that accepts {\d+,?\d*}
*/
S_regcurly(pTHX_ register char *s)
{
if (*s++ != '{')
return FALSE;
if (!isDIGIT(*s))
return FALSE;
while (isDIGIT(*s))
s++;
if (*s == ',')
s++;
while (isDIGIT(*s))
s++;
if (*s != '}')
return FALSE;
return TRUE;
}
{
#ifdef DEBUGGING
/* While that wasn't END last time... */
l--;
/* Where, what. */
goto after_print;
else
: next);
}
}
}
}
}
node += ANYOF_SKIP;
}
/* Literal string, where present. */
}
else {
}
l++;
l--;
}
#endif /* DEBUGGING */
return node;
}
/*
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
void
{
#ifdef DEBUGGING
/* Header fields of interest. */
if (r->anchored_substr)
PL_colors[0],
SvPVX(r->anchored_substr),
PL_colors[1],
(IV)r->anchored_offset);
if (r->float_substr)
PL_colors[0],
SvPVX(r->float_substr),
PL_colors[1],
if (r->check_substr)
r->check_substr == r->float_substr
? "(checking floating" : "(checking anchored");
if (r->reganch & ROPT_NOSCAN)
if (r->reganch & ROPT_CHECK_ALL)
if (r->check_substr)
if (r->regstclass) {
}
if (r->reganch & ROPT_ANCH_BOL)
if (r->reganch & ROPT_ANCH_MBOL)
if (r->reganch & ROPT_ANCH_SBOL)
if (r->reganch & ROPT_ANCH_GPOS)
}
if (r->reganch & ROPT_GPOS_SEEN)
if (r->reganch & ROPT_IMPLICIT)
if (r->reganch & ROPT_EVAL_SEEN)
#endif /* DEBUGGING */
}
STATIC void
{
else if (c == '-' || c == ']' || c == '\\' || c == '^')
else
}
/*
- regprop - printable representation of opcode
*/
void
{
#ifdef DEBUGGING
register int k;
FAIL("Corrupted regexp opcode");
if (k == EXACT)
else if (k == CURLY) {
}
else if (k == LOGICAL)
else if (k == ANYOF) {
int i, rangestart = -1;
const char * const anyofs[] = { /* Should be syncronized with
* ANYOF_ #xdefines in regcomp.h */
"\\w",
"\\W",
"\\s",
"\\S",
"\\d",
"\\D",
"[:alnum:]",
"[:^alnum:]",
"[:alpha:]",
"[:^alpha:]",
"[:ascii:]",
"[:^ascii:]",
"[:ctrl:]",
"[:^ctrl:]",
"[:graph:]",
"[:^graph:]",
"[:lower:]",
"[:^lower:]",
"[:print:]",
"[:^print:]",
"[:punct:]",
"[:^punct:]",
"[:upper:]",
"[:^upper:]",
"[:xdigit:]",
"[:^xdigit:]",
"[:space:]",
"[:^space:]",
"[:blank:]",
"[:^blank:]"
};
if (flags & ANYOF_LOCALE)
if (flags & ANYOF_FOLD)
if (flags & ANYOF_INVERT)
for (i = 0; i <= 256; i++) {
if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
if (rangestart == -1)
rangestart = i;
} else if (rangestart != -1) {
if (i <= rangestart + 3)
for (; rangestart < i; rangestart++)
else {
}
rangestart = -1;
}
}
if (o->flags & ANYOF_CLASS)
for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
if (ANYOF_CLASS_TEST(o,i))
}
else {
UV i;
for (i = 0; i <= 256; i++) { /* just the first 256 */
U8 *e = uv_to_utf8(s, i);
if (rangestart == -1)
rangestart = i;
} else if (rangestart != -1) {
U8 *p;
if (i <= rangestart + 3)
for (; rangestart < i; rangestart++) {
for(e = uv_to_utf8(s, rangestart), p = s; p < e; p++)
}
else {
for (e = uv_to_utf8(s, rangestart), p = s; p < e; p++)
for (e = uv_to_utf8(s, i - 1), p = s; p < e; p++)
}
rangestart = -1;
}
}
{
while(*s && *s != '\n') s++;
if (*s == '\n') {
char *t = ++s;
while (*s) {
if (*s == '\n')
*s = ' ';
s++;
}
if (s[-1] == ' ')
s[-1] = 0;
}
}
}
}
#endif /* DEBUGGING */
}
SV *
{ /* Assume that RE_INTUIT is set */
if (!PL_colorset) reginitcolors();
"%sUsing REx substr:%s `%s%.60s%s%s'\n",
s,
PL_colors[1],
} );
return prog->check_substr;
}
void
{
if (!r || (--r->refcnt > 0))
return;
"%sFreeing REx:%s `%s%.60s%s%s'\n",
r->precomp,
PL_colors[1],
if (r->precomp)
if (RX_MATCH_COPIED(r))
if (r->substrs) {
if (r->anchored_substr)
if (r->float_substr)
SvREFCNT_dec(r->float_substr);
}
if (r->data) {
SV** old_curpad;
while (--n >= 0) {
case 's':
break;
case 'f':
break;
case 'p':
break;
case 'o':
if (new_comppad == NULL)
/* Watch out for global destruction's random ordering. */
}
else
new_comppad = NULL;
break;
case 'n':
break;
default:
}
}
}
Safefree(r);
}
/*
- regnext - dig the "next" pointer out of a node
*
* [Note, when REGALIGN is defined there are two places in regmatch()
* that bypass this code for speed.]
*/
regnode *
{
if (p == &PL_regdummy)
return(NULL);
if (offset == 0)
return(NULL);
return(p+offset);
}
STATIC void
{
char buf[512];
char *message;
if (l1 > 510)
l1 = 510;
#ifdef I_STDARG
/* ANSI variant takes additional second argument */
#else
#endif
if (l1 > 512)
l1 = 512;
}
/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
void
{
PL_reg_start_tmp = 0;
PL_reg_start_tmpl = 0;
#ifdef DEBUGGING
#endif
}
#ifdef PERL_OBJECT
#include "XSUB.h"
#endif
static void
{
ReREFCNT_dec((regexp *)r);
}