cmd/re/regex++.dat

: translated from the Regex++ tests.txt by cvtregex++.c 2001-05-16
# manual edits were done to insert the standard error codes
#
# inserted {...} features tests may have changed line numbers and/or ordering
#
# comments beyond this point are verbatim from the original input
#
# this file contains a script of tests to run through regress.exe
#
# comments start with a semicolon and proceed to the end of the line
#
# changes to regular expression compile flags start with a "-" as the first
# non-whitespace character and consist of a list of the printable names
# of the flags, for example "match_default"
#
# Other lines contain a test to perform using the current flag status
# the first token contains the expression to compile, the second the string
# to match it against. If the second string is "!" then the expression should
# not compile, that is the first string is an invalid regular expression.
# This is then followed by a list of integers that specify what should match,
# each pair represents the starting and ending positions of a subexpression
# starting with the zeroth subexpression (the whole match).
# A value of -1 indicates that the subexpression should not take part in the
# match at all, if the first value is -1 then no part of the expression should
# match the string.
#

# - match_default normal REG_EXTENDED

#
# try some really simple literals:
E   a   a   (0,1)
E   Z   Z   (0,1)
E   Z   aaa NOMATCH
E   Z   xxxxZZxxx   (4,5)

# and some simple brackets:
E   (a) zzzaazz (3,4)(3,4)
Exz ()  zzz (0,0)(0,0)
Exz ()  NULL    (0,0)(0,0)
E   (   !   EPAREN
E   )   !   NOMATCH
E   (aa !   EPAREN
E   aa) !   NOMATCH
E   a   b   NOMATCH
E   \(\)    ()  (0,2)
E   \(a\)   (a) (0,3)
E   \() !   NOMATCH
E   (\) !   EPAREN
E   p(a)rameter ABCparameterXYZ (3,12)(4,5)
E   [pq](a)rameter  ABCparameterXYZ (3,12)(4,5)

# now try escaped brackets:
# - match_default bk_parens REG_BASIC
B   \(a\)   zzzaazz (3,4)(3,4)
B   \(\)    zzz (0,0)(0,0)
B   \(\)    NULL    (0,0)(0,0)
B   \(  !   EPAREN
B   \)  !   EPAREN
B   \(aa    !   EPAREN
B   aa\)    !   EPAREN
B   ()  ()  (0,2)
B   (a) (a) (0,3)
B   (\) !   EPAREN
B   \() !   EPAREN

# now move on to "." wildcards
# - match_default normal REG_EXTENDED REG_STARTEND
E   .   a   (0,1)
E$  .   \n  (0,1)
E$  .   \r  (0,1)
E   .   NULL    NOMATCH
# - match_default normal match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE
En  .   a   (0,1)
En$ .   \n  NOMATCH
En$ .   \r  (0,1)
En  .   NULL    NOMATCH
# - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE
En$ .   \n  NOMATCH
En$ .   \r  (0,1)
# this *WILL* produce an error from the POSIX API functions:
# - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE REG_NO_POSIX_TEST
Enz .   NULL    NOMATCH


#
# now move on to the repetion ops,
# starting with operator *
# - match_default normal REG_EXTENDED
E   a*  b   (0,0)
E   ab* a   (0,1)
E   ab* ab  (0,2)
E   ab* sssabbbbbbsss   (3,10)
E   ab*c*   a   (0,1)
E   ab*c*   abbb    (0,4)
E   ab*c*   accc    (0,4)
E   ab*c*   abbcc   (0,5)
E   *a  !   BADRPT
E$  \n* \n\n    (0,2)
E   \** **  (0,2)
E   \*  *   (0,1)

# now try operator +
E   ab+ a   NOMATCH
E   ab+ ab  (0,2)
E   ab+ sssabbbbbbsss   (3,10)
E   ab+c+   a   NOMATCH
E   ab+c+   abbb    NOMATCH
E   ab+c+   accc    NOMATCH
E   ab+c+   abbcc   (0,5)
E   +a  !   BADRPT
E$  \n+ \n\n    (0,2)
E   \+  +   (0,1)
E   \+  ++  (0,1)
E   \++ ++  (0,2)
# - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST
Exz +   +   BADRPT
Exz \+  !   NOMATCH
Exz a\+ aa  NOMATCH

# now try operator ?
# - match_default normal REG_EXTENDED
E   a?  b   (0,0)
E   ab? a   (0,1)
E   ab? ab  (0,2)
E   ab? sssabbbbbbsss   (3,5)
E   ab?c?   a   (0,1)
E   ab?c?   abbb    (0,2)
E   ab?c?   accc    (0,2)
E   ab?c?   abcc    (0,3)
E   ?a  !   BADRPT
E$  \n? \n\n    (0,1)
E   \?  ?   (0,1)
E   \?  ??  (0,1)
E   \?? ??  (0,1)
# - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST
Exz ?   ?   BADRPT
Exz \?  !   NOMATCH
Exz a\? aa  NOMATCH
Exz a\? b   NOMATCH

# - match_default normal limited_ops
B   a?  a?  (0,2)
B   a+  a+  (0,2)
B   a\? a?  (0,2)
B   a\+ a+  (0,2)

# now try operator {}
# - match_default normal REG_EXTENDED
E   a{2}    a   NOMATCH
E   a{2}    aa  (0,2)
E   a{2}    aaa (0,2)
E   a{2,}   a   NOMATCH
E   a{2,}   aa  (0,2)
E   a{2,}   aaaaa   (0,5)
E   a{2,4}  a   NOMATCH
E   a{2,4}  aa  (0,2)
E   a{2,4}  aaa (0,3)
E   a{2,4}  aaaa    (0,4)
E   a{2,4}  aaaaa   (0,4)
# spaces are now allowed inside {}
E   a{ 2 , 4 }  aaaaa   BADBR
E   a{} !   BADBR
E   a{ }    !   BADBR
E   a{2 !   EBRACE
E   a}  !   NOMATCH
E   \{\}    {}  (0,2)

# - match_default normal bk_braces
B   a\{2\}  a   NOMATCH
B   a\{2\}  aa  (0,2)
B   a\{2\}  aaa (0,2)
B   a\{2,\} a   NOMATCH
B   a\{2,\} aa  (0,2)
B   a\{2,\} aaaaa   (0,5)
B   a\{2,4\}    a   NOMATCH
B   a\{2,4\}    aa  (0,2)
B   a\{2,4\}    aaa (0,3)
B   a\{2,4\}    aaaa    (0,4)
B   a\{2,4\}    aaaaa   (0,4)
B   a\{ 2 , 4 \}    aaaaa   BADBR
B   {}  {}  (0,2)

# now test the alternation operator |
# - match_default normal REG_EXTENDED
E   a|b a   (0,1)
E   a|b b   (0,1)
E   a(b|c)  ab  (0,2)(1,2)
E   a(b|c)  ac  (0,2)(1,2)
E   a(b|c)  ad  NOMATCH
E   |c  !   ENULL
E   c|  !   ENULL
E   (|) !   ENULL
E   (a|)    !   ENULL
E   (|a)    !   ENULL
E   a\| a|  (0,2)
# - match_default normal limited_ops
B   a|  a|  (0,2)
B   a\| a|  (0,2)
B   |   |   (0,1)
# - match_default normal bk_vbar REG_NO_POSIX_TEST
Bxz a|  a|  (0,2)
Bxz a\|b    a   (0,1)
Bxz a\|b    b   (0,1)

# now test the set operator []
# - match_default normal REG_EXTENDED
# try some literals first
E   [abc]   a   (0,1)
E   [abc]   b   (0,1)
E   [abc]   c   (0,1)
E   [abc]   d   NOMATCH
E   [^bcd]  a   (0,1)
E   [^bcd]  b   NOMATCH
E   [^bcd]  d   NOMATCH
E   [^bcd]  e   (0,1)
E   a[b]c   abc (0,3)
E   a[ab]c  abc (0,3)
E   a[^ab]c adc (0,3)
E   a[]b]c  a]c (0,3)
E   a[[b]c  a[c (0,3)
E   a[-b]c  a-c (0,3)
E   a[^]b]c adc (0,3)
E   a[^-b]c adc (0,3)
E   a[b-]c  a-c (0,3)
E   a[b !   EBRACK
E   a[] !   EBRACK

# then some ranges
E   [b-e]   a   NOMATCH
E   [b-e]   b   (0,1)
E   [b-e]   e   (0,1)
E   [b-e]   f   NOMATCH
E   [^b-e]  a   (0,1)
E   [^b-e]  b   NOMATCH
E   [^b-e]  e   NOMATCH
E   [^b-e]  f   (0,1)
E   a[1-3]c a2c (0,3)
E   a[3-1]c !   ERANGE
E   a[1-3-5]c   !   ERANGE
E   a[1-    !   EBRACK

# and some classes
E   a[[:alpha:]]c   abc (0,3)
E   a[[:unknown:]]c !   ECTYPE
E   a[[:    !   ECTYPE
E   a[[:alpha   !   ECTYPE
E   a[[:alpha:] !   EBRACK
E   a[[:alpha,:]    !   ECTYPE
E   a[[:]:]]b   !   ECTYPE
E   a[[:-:]]b   !   ECTYPE
E   a[[:alph:]] !   ECTYPE
E   a[[:alphabet:]] !   ECTYPE
E   [[:alnum:]]+    -%@a0X- (3,6)
E   [[:alpha:]]+    -%@aX0- (3,5)
E$  [[:blank:]]+    a  \tb  (1,4)
E$  [[:cntrl:]]+    a\n\tb  (1,3)
E   [[:digit:]]+    a019b   (1,4)
E   [[:graph:]]+    a%b     (0,3)
E   [[:lower:]]+    AabC    (1,3)
# This test fails with STLPort, disable for now as this is a corner case anyway...
#[[:print:]]+ "\na b\n" 1 4
E$  [[:punct:]]+    %-&\t   (0,3)
E$  [[:space:]]+    a \n\t\rb   (1,5)
E   [[:upper:]]+    aBCd    (1,3)
E   [[:xdigit:]]+   p0f3Cx  (1,5)

# now test flag settings:
# - escape_in_lists REG_NO_POSIX_TEST
Exz$    [\n]    \n  (0,1)
# - REG_NO_POSIX_TEST
Bxz$    [\n]    \n  (0,1)
Bxz$    [\n]    \\  NOMATCH
Bxz [[:class:]  :   ECTYPE
Bxz [[:class:]  [   ECTYPE
Bxz [[:class:]  c   ECTYPE

# line anchors
# - match_default normal REG_EXTENDED
En  ^ab ab  (0,2)
En  ^ab xxabxx  NOMATCH
En$ ^ab xx\nabzz    (3,5)
En  ab$ ab  (0,2)
En  ab$ abxx    NOMATCH
En$ ab$ ab\nzz  (0,2)
# - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
Eben    ^ab ab  NOMATCH
Eben    ^ab xxabxx  NOMATCH
Eben$   ^ab xx\nabzz    (3,5)
Eben    ab$ ab  NOMATCH
Eben    ab$ abxx    NOMATCH
Eben$   ab$ ab\nzz  (0,2)

# back references
# - match_default normal REG_EXTENDED
E   a(b)\2c !   ESUBREG
E   a(b\1)c !   ESUBREG
E   a(b*)c\1d   abbcbbd (0,7)(1,3)
E   a(b*)c\1d   abbcbd  NOMATCH
E   a(b*)c\1d   abbcbbbd    NOMATCH
E   ^(.)\1  abc NOMATCH
E   a([bc])\1d  abcdabbd    (4,8)(5,6)
# strictly speaking this is at best ambiguous, at worst wrong, this is what most
# re implimentations will match though.
E   a(([bc])\2)*d   abbccd  (0,6)(3,5)(3,4)

E   a(([bc])\2)*d   abbcbd  NOMATCH
E   a((b)*\2)*d abbbd   (0,5)(1,4)(2,3)
E   (ab*)[ab]*\1    ababaaa (0,7)(0,1)
E   (a)\1bcd    aabcd   (0,5)(0,1)
E   (a)\1bc*d   aabcd   (0,5)(0,1)
E   (a)\1bc*d   aabd    (0,4)(0,1)
E   (a)\1bc*d   aabcccd (0,7)(0,1)
E   (a)\1bc*[ce]d   aabcccd (0,7)(0,1)
E   ^(a)\1b(c)*cd$  aabcccd (0,7)(0,1)(4,5)

#
# characters by code:
# - match_default normal REG_EXTENDED REG_STARTEND
{E  \101    A   (0,1)       not an ascii implementation
E   \172    z   (0,1)
E   \0172   z   NOMATCH
}
E   NULL    NULL    ENULL
E   NULL    NULL    ENULL

#
# word operators:
{E  \w  a   (0,1)       perl \w not supported
E   \w  z   (0,1)
E   \w  A   (0,1)
E   \w  Z   (0,1)
E   \w  _   (0,1)
E   \w  }   NOMATCH
E   \w  `   NOMATCH
E   \w  [   NOMATCH
E   \w  @   NOMATCH
}
# non-word:
{E  \W  W   NOMATCH     perl \W not supported
E   \W  z   NOMATCH
E   \W  A   NOMATCH
E   \W  Z   NOMATCH
E   \W  _   NOMATCH
E   \W  }   (0,1)
E   \W  `   (0,1)
E   \W  [   (0,1)
E   \W  @   (0,1)

E   \<\w+\W+     aa  aa  a  (1,5)
}
# word boundaries
{E  \<a\>   ,a, (1,2)       word boundaries not supported
E   \<* !   BADRPT
E   \>* !   BADRPT
E   \<+ !   BADRPT
E   \>+ !   BADRPT
E   \<? !   BADRPT
E   \>? !   BADRPT
# word start:
E   \<abcd    abcd  (2,6)
E   \<ab    cab NOMATCH
E$  \\<ab   \nab    (1,3)
E   \<tag   ::tag   (2,5)
# word end:
E   abc\>   abc (0,3)
E   abc\>   abcd    NOMATCH
E$  abc\\>  abc\n   (0,3)
E   abc\>   abc::   (0,3)

E   \<abc   abcabc abc\n\nabc   (0,3)
E   \<    ab a aaa          (2,2)
}
# word boundary:
{E  \babcd  abcd    (0,4)       perl \b not supported
E   \babcd  :abcd:  (1,5)       perl \b not supported
E   \bab    cab NOMATCH
E$  \\bab   \nab    (1,3)
E   \btag   ::tag   (2,5)
E   abc\b   abc (0,3)
E   abc\b   abcd    NOMATCH
E$  abc\\b  abc\n   (0,3)
E   abc\b   abc::   (0,3)

E   \b    abb a abbb    (0,0)
}
# within word:
{E  \B  ab  (1,1)       perl \B not supported
E   a\Bb    ab  (0,2)
E   a\B ab  (0,1)
E   a\B a   NOMATCH
E   a\B a   NOMATCH
}

#
# buffer operators:
{E  \`abc   abc (0,3)       regex++ \' not supported
E$  \\`abc  \nabc   NOMATCH
E   \`abc    abc    NOMATCH
E   abc\'   abc (0,3)
E$  abc\\'  abc\n   NOMATCH
E   abc\'   abc     NOMATCH
}

#
# extra escape sequences:
E$  \a  \a  (0,1)
E$  \f  \f  (0,1)
E$  \n  \n  (0,1)
E$  \r  \r  (0,1)
E$  \t  \t  (0,1)
E$  \v  \v  (0,1)

E$  \\a \a  (0,1)
E$  \\f \f  (0,1)
E$  \\n \n  (0,1)
E$  \\r \r  (0,1)
E$  \\t \t  (0,1)
E$  \\v \v  (0,1)

E   \\a \a  (0,2)
E   \\f \f  (0,2)
E   \\n \n  (0,2)
E   \\r \r  (0,2)
E   \\t \t  (0,2)
E   \\v \v  (0,2)

#
# now follows various complex expressions designed to try and bust the matcher:
E   a(((b)))c   abc (0,3)(1,2)(1,2)(1,2)
E   a(b|(c))d   abd (0,3)(1,2)
E   a(b|(c))d   acd (0,3)(1,2)(1,2)
E   a(b*|c)d    abbd    (0,4)(1,3)
# just gotta have one DFA-buster, of course
E   a[ab]{20}   aaaaabaaaabaaaabaaaab   (0,21)
# and an inline expansion in case somebody gets tricky
E   a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]   aaaaabaaaabaaaabaaaab   (0,21)
# and in case somebody just slips in an NFA...
E   a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)  aaaaabaaaabaaaabaaaabweeknights (0,31)(21,24)(24,31)
# one really big one
E   1234567890123456789012345678901234567890123456789012345678901234567890  a1234567890123456789012345678901234567890123456789012345678901234567890b    (1,71)
# fish for problems as brackets go past 8
E   [ab][cd][ef][gh][ij][kl][mn]    xacegikmoq  (1,8)
E   [ab][cd][ef][gh][ij][kl][mn][op]    xacegikmoq  (1,9)
E   [ab][cd][ef][gh][ij][kl][mn][op][qr]    xacegikmoqy (1,10)
E   [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy (1,10)
# and as parenthesis go past 9:
E   (a)(b)(c)(d)(e)(f)(g)(h)    zabcdefghi  (1,9)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)
E   (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij (1,10)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)
E   (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)  zabcdefghijk    (1,11)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)
E   (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)   zabcdefghijkl   (1,12)(1,2)(2,3)(3,4)(4,5)(5,6)(6,7)(7,8)(8,9)(9,10)(10,11)(11,12)
E   (a)d|(b)c   abc (1,3)(-1,-1)(1,2)
E   _+((www)|(ftp)|(mailto)):_* _wwwnocolon _mailto:    (12,20)(13,19)(-1,-1)(-1,-1)(13,19)

# subtleties of matching
E   a(b)?cd     acd (0,3)
E   a(b)?c\1d   acd NOMATCH
E   a(b?c)+d    accd    (0,4)(2,3)
E   (wee|week)(knights|night)   weeknights  (0,10)(0,3)(3,10)
E   .*  abc (0,3)
E   a(b|(c))d   abd (0,3)(1,2)
E   a(b|(c))d   acd (0,3)(1,2)(1,2)
E   a(b*|c|e)d  abbd    (0,4)(1,3)
E   a(b*|c|e)d  acd (0,3)(1,2)
E   a(b*|c|e)d  ad  (0,2)(1,1)
E   a(b?)c  abc (0,3)(1,2)
E   a(b?)c  ac  (0,2)(1,1)
E   a(b+)c  abc (0,3)(1,2)
E   a(b+)c  abbbc   (0,5)(1,4)
E   a(b*)c  ac  (0,2)(1,1)
E   (a|ab)(bc([de]+)f|cde)  abcdef  (0,6)(0,1)(1,6)(3,5)
E   a([bc]?)c   abc (0,3)(1,2)
E   a([bc]?)c   ac  (0,2)(1,1)
E   a([bc]+)c   abc (0,3)(1,2)
E   a([bc]+)c   abcc    (0,4)(1,3)
E   a([bc]+)bc  abcbc   (0,5)(1,3)
E   a(bb+|b)b   abb (0,3)(1,2)
E   a(bbb+|bb+|b)b  abb (0,3)(1,2)
E   a(bbb+|bb+|b)b  abbb    (0,4)(1,3)
E   a(bbb+|bb+|b)bb abbb    (0,4)(1,2)
E   (.*).*  abcdef  (0,6)(0,6)
E   (a*)*   bc  (0,0)(0,0)

# do we get the right subexpression when it is used more than once?
E   a(b|c)*d    ad  (0,2)
E   a(b|c)*d    abcd    (0,4)(2,3)
E   a(b|c)+d    abd (0,3)(1,2)
E   a(b|c)+d    abcd    (0,4)(2,3)
E   a(b|c?)+d   ad  (0,2)(1,1)
E   a(b|c?)+d   abcd    (0,4)(2,3)
E   a(b|c){0,0}d    ad  (0,2)
E   a(b|c){0,1}d    ad  (0,2)
E   a(b|c){0,1}d    abd (0,3)(1,2)
E   a(b|c){0,2}d    ad  (0,2)
E   a(b|c){0,2}d    abcd    (0,4)(2,3)
E   a(b|c){0,}d ad  (0,2)
E   a(b|c){0,}d abcd    (0,4)(2,3)
E   a(b|c){1,1}d    abd (0,3)(1,2)
E   a(b|c){1,2}d    abd (0,3)(1,2)
E   a(b|c){1,2}d    abcd    (0,4)(2,3)
E   a(b|c){1,}d abd (0,3)(1,2)
E   a(b|c){1,}d abcd    (0,4)(2,3)
E   a(b|c){2,2}d    acbd    (0,4)(2,3)
E   a(b|c){2,2}d    abcd    (0,4)(2,3)
E   a(b|c){2,4}d    abcd    (0,4)(2,3)
E   a(b|c){2,4}d    abcbd   (0,5)(3,4)
E   a(b|c){2,4}d    abcbcd  (0,6)(4,5)
E   a(b|c){2,}d abcd    (0,4)(2,3)
E   a(b|c){2,}d abcbd   (0,5)(3,4)
E   a(b+|((c)*))+d  abd (0,3)(1,2)
E   a(b+|((c)*))+d  abcd    (0,4)(2,3)(2,3)(2,3)

# - match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal
L   \**?/{} \**?/{} (0,7)

# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST
# try to match C++ syntax elements:
# line comment:
Exz$    //[^\n]*    ++i //here is a line comment\n  (4,28)
# block comment:
Exz /\*([^*]|\*+[^*/])*\*+/ /* here is a block comment */   (0,29)(26,27)
Exz /\*([^*]|\*+[^*/])*\*+/ /**/    (0,4)
Exz /\*([^*]|\*+[^*/])*\*+/ /***/   (0,5)
Exz /\*([^*]|\*+[^*/])*\*+/ /****/  (0,6)
Exz /\*([^*]|\*+[^*/])*\*+/ /*****/ (0,7)
Exz /\*([^*]|\*+[^*/])*\*+/ /*****/*/   (0,7)
# preprossor directives:
E$  ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol (0,19)
E$  ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol(x) #x   (0,25)
E$  ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* #define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x); (0,27)
# literals:
E   ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?  0xFF    (0,4)(0,4)(0,4)
E   ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?  35  (0,2)(0,2)(-1,-1)(0,2)
E   ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?  0xFFu   (0,5)(0,4)(0,4)
E   ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?  0xFFL   (0,5)(0,4)(0,4)(-1,-1)(4,5)
E   ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?  0xFFFFFFFFFFFFFFFFuint64    (0,24)(0,18)(0,18)(-1,-1)(19,24)(19,24)(22,24)
# strings:
#E  '([^\\']|\\.)*' '\\x3A' (0,6)(4,5)
E   '([^\\']|\\.)*' '\\''   (0,4)(1,3)
E$  '([^']|\\.)*'   '\n'    (0,3)(1,2)

# now try and test some unicode specific characters:
# - match_default normal REG_EXTENDED REG_UNICODE_ONLY
E   [[:unicode:]]+  a\0300\0400z    ECTYPE

# finally try some case insensitive matches:
# - match_default normal REG_EXTENDED REG_ICASE
# upper and lower have no meaning here so they fail, however these
# may compile with other libraries...
Ei  [[:lower:]]+    Ab  (0,2)
Ei  [[:lower:]]+    aB  (0,2)
Ei  [[:upper:]]+    Ab  (0,2)
Ei  [[:upper:]]+    aB  (0,2)
Ei  0123456789@abcdefghijklmnopqrstuvwxyz_`ABCDEFGHIJKLMNOPQRSTUVWXYZ   0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz   (0,65)
Ei  0123456789@abcdefghijklmnopqrstuvwxyz\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz  (0,66)
Ei  0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}    (0,72)

# known and suspected bugs:
# - match_default normal REG_EXTENDED
E   \(  (   (0,1)
E   \)  )   (0,1)
E   \$  $   (0,1)
E   \^  ^   (0,1)
E   \.  .   (0,1)
E   \*  *   (0,1)
E   \+  +   (0,1)
E   \?  ?   (0,1)
E   \[  [   (0,1)
E   \]  ]   (0,1)
E   \|  |   (0,1)
E   \\  \\  (0,1)
E   #   #   (0,1)
E   \#  #   BADESC
Ex  \#  #   (0,1)
E   a-  a-  (0,2)
E   \-  -   BADESC
Ex  \-  -   (0,1)
E   \{  {   (0,1)
E   \}  }   (0,1)
E   0   0   (0,1)
E   1   1   (0,1)
E   9   9   (0,1)
E   b   b   (0,1)
E   B   B   (0,1)
E   <   <   (0,1)
E   >   >   (0,1)
E   w   w   (0,1)
E   W   W   (0,1)
E   `   `   (0,1)
E   '   '   (0,1)
E$  \n  \n  (0,1)
E   ,   ,   (0,1)
E   a   a   (0,1)
E   f   f   (0,1)
E   n   n   (0,1)
E   r   r   (0,1)
E   t   t   (0,1)
E   v   v   (0,1)
E   c   c   (0,1)
E   x   x   (0,1)
E   :   :   (0,1)
E   (\.[[:alnum:]]+){2} w.a.b   (1,5)(3,5)

# - match_default normal REG_EXTENDED REG_ICASE
Ei  a   A   (0,1)
Ei  A   a   (0,1)
Ei  [abc]+  abcABC  (0,6)
Ei  [ABC]+  abcABC  (0,6)
Ei  [a-z]+  abcABC  (0,6)
Ei  [A-Z]+  abzANZ  (0,6)
Ei  [a-Z]+  abzABZ  ERANGE
Eix [a-Z]+  abzABZ  NOMATCH
Ei  [A-z]+  abzABZ  (0,6)
Ei  [[:lower:]]+    abyzABYZ    (0,8)
Ei  [[:upper:]]+    abzABZ  (0,6)
Ei  [[:word:]]+ abcZZZ  (0,6)
Ei  [[:alpha:]]+    abyzABYZ    (0,8)
Ei  [[:alnum:]]+    09abyzABYZ  (0,10)

# updated tests for version 2:
# - match_default normal REG_EXTENDED
E$  \x41    A   (0,1)
E$  \xff    \xFF    (0,1)
E$  \xFF    \xff    (0,1)
# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST
{Exz$   \\c[    \e  (0,1)           perl \c not supported
# - match_default normal REG_EXTENDED
E$  \\cA    \001    (0,1)
E$  \\cz    \032    (0,1)
E$  \\c=    !   NOMATCH
E$  \\c?    !   NOMATCH
}
E   =:  =:  (0,2)

# word start:
E   [[:<:]]abcd   abcd  (2,6)
E   [[:<:]]ab   cab NOMATCH
E$  [[:<:]]ab   \nab    (1,3)
E   [[:<:]]tag  ::tag   (2,5)
#word end:
E   abc[[:>:]]  abc (0,3)
E   abc[[:>:]]  abcd    NOMATCH
E$  abc[[:>:]]  abc\n   (0,3)
E   abc[[:>:]]  abc::   (0,3)

# collating elements and rewritten set code:
# - match_default normal REG_EXTENDED REG_STARTEND
{E  [[.zero.]]  0   (0,1)       [[.element-name.]] not supported
E   [[.one.]]   1   (0,1)
E   [[.two.]]   2   (0,1)
E   [[.three.]] 3   (0,1)
E   [[.a.]] baa (1,2)
#E  [[.NUL.]]   NULL    (0,1)
E   [[.right-curly-bracket.]]   }   (0,1)
E   [[=right-curly-bracket=]]   }   (0,1)
}
E   [[:<:]z]    !   ECTYPE
E   [a[:>:]]    !   ECTYPE
E   [[=a=]] a   (0,1)
# - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
Ei  [[.A.]] A   (0,1)
Ei  [[.A.]] a   (0,1)
Ei  [[.A.]-b]+  AaBb    (0,4)
Ei  [A-[.b.]]+  AaBb    (0,4)
Ei  [[.a.]-B]+  AaBb    ERANGE
Eix [[.a.]-B]+  AaBb    NOMATCH
Ei  [a-[.B.]]+  AaBb    ERANGE
Eix [a-[.B.]]+  AaBb    NOMATCH
# - match_default normal REG_EXTENDED REG_NO_POSIX_TEST
Exz$    [\x61]  a   (0,1)
Exz$    [\x61-c]+   abcd    (0,3)
Exz$    [a-\x63]+   abcd    (0,3)
# - match_default normal REG_EXTENDED REG_STARTEND
E   [[.a.]-c]+  abcd    (0,3)
E   [a-[.c.]]+  abcd    (0,3)
E   [[:alpha:]-a]   !   ERANGE
E   [a-[:alpha:]]   !   NOMATCH

# try mutli-character ligatures:
{E  [[.ae.]]    ae  (0,2)       [[.ligature.]] not supported
E   [[.ae.]]    aE  NOMATCH
E   [[.AE.]]    AE  (0,2)
E   [[.Ae.]]    Ae  (0,2)
E   [[.ae.]-b]  a   NOMATCH
E   [[.ae.]-b]  b   (0,1)
E   [[.ae.]-b]  ae  (0,2)
E   [a-[.ae.]]  a   (0,1)
E   [a-[.ae.]]  b   NOMATCH
E   [a-[.ae.]]  ae  (0,2)
# - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
Ei  [[.ae.]]    AE  (0,2)
Ei  [[.ae.]]    Ae  (0,2)
Ei  [[.AE.]]    Ae  (0,2)
Ei  [[.Ae.]]    aE  (0,2)
Ei  [[.AE.]-B]  a   NOMATCH
Ei  [[.Ae.]-b]  b   (0,1)
Ei  [[.Ae.]-b]  B   (0,1)
Ei  [[.ae.]-b]  AE  (0,2)
}

# - match_default normal REG_EXTENDED REG_STARTEND
#extended perl style escape sequences:
{E$ \\e \033    (0,1)       perl \e not supported
}
{E$ \\x1b   \033    (0,1)       perl \x not supported
E$  \\x{1b} \033    (0,1)
E   \x{}    !   NOMATCH
E   \x{ !   NOMATCH
E   \x} !   NOMATCH
E   \x  !   NOMATCH
E   \x{yy   !   NOMATCH
E   \x{1b   !   NOMATCH
}

# - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
{Exz    \l+ ABabcAB (2,5)       regex++ \l not supported
Exz [\l]+   ABabcAB (2,5)
Exz [a-\l]  !   NOMATCH
E   [a-\l]  !   ERANGE
E   [\l-a]  !   ERANGE
Exz [\L]    !   (0,1)
Exz \L+ abABCab (2,5)
Exz \L+ ab,.-ab (2,5)
Exz \u+ abABCab (2,5)
Exz [\u]+   abABCab (2,5)
Exz [\U]    !   (0,1)
Exz \U+ ABabcAB (2,5)
}
{Exz    \d+ ab012ab (2,5)       perl \d not supported
Exz [a-\d]  !   NOMATCH
E   [a-\d]  !   ERANGE
E   [\d-a]  !   ERANGE
Exz [\d]+   ab012ad (6,7)
Evxz    [\d]+   ab012ad (2,5)
Exz [\D]    !D  (1,2)
Evxz    [\D]    !D  (0,1)
Exz \D+ 01abc01 (2,5)
Exz \s+ ab   ab (2,5)
Exz [\s]+   as   as (1,2)
Evxz    [\s]+   as   as (2,5)
Exz [\S]    !S  (1,2)
Evxz    [\S]    !S  (0,1)
Exz \S+   abc   (2,5)
}
# - match_default normal REG_EXTENDED REG_STARTEND
{E  \Qabc   abc (0,3)       regex++ \Q not supported
E   \Qabc\E abcd    (0,3)
E   \Qabc\Ed    abcde   (0,4)
E   \Q+*?\\E    +*?\\   (0,4)
}

{E  \C+ abcde   (0,5)       regex++ \C not supported
}
{E  \X+ abcde   (0,5)       regex++ \X not supported

# - match_default normal REG_EXTENDED REG_STARTEND REG_UNICODE_ONLY
E   \X+ a\768\769   (0,3)
E   \X+ \2309\2307  (0,2)
E   \X+ \2489\2494  (0,2)
}

# - match_default normal REG_EXTENDED REG_STARTEND
{E  \Aabc   abc (0,3)       regex++ \A not supported
E   \Aabc   aabc    NOMATCH
E   a\Aab   abc NOMATCH
E   abc\z   abc (0,3)
E   abc\z   abcd    NOMATCH
E$  abc\\z  abc\n\n NOMATCH
E$  abc\\Z  abc\n   (0,3)
E$  abc\\Z  abc\n\n (0,3)
E   abc\Z   abc (0,3)
E   \Aabc   abc   abc   (0,3)
}

{E  \Gabc   abc (0,3)       perl \G not supported
E   \G\w+\W+    abc  abc a cbbb     (0,5)
E   \Ga+b+  aaababb  abb    (0,4)
E   \Gabc   dabcd   NOMATCH
E   a\Gbc   abc NOMATCH
}

#
# now test grep,
# basically check all our restart types - line, word, etc
# checking each one for null and non-null matches.
#
# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP
E   a    a a a aa   (1,2)
E   a+b+    aabaabbb ab (0,3)
E   a(b*|c|e)d  adabbdacd   (0,2)(1,1)
E$  a   \na\na\na\naa   (1,2)

E$  ^      \n\n  \n\n\n (0,0)
E$  ^ab ab  \nab  ab\n  (0,2)
E$  ^[^\n]*\n      \n  \n\n  \n (0,4)

E   abc abc (0,3)
E   abc  abc abcabc     (1,4)
E$  \n\n     \n\n\n       \n      \n\n\n\n      (1,3)

E$  $      \n\n  \n\n\n (10,10)
En$ $      \n\n  \n\n\n (3,3)

# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_ICASE
Ei  A    a a a aa   (1,2)
Ei  A+B+    aabaabbb ab (0,3)
Ei  A(B*|c|e)D  adabbdacd   (0,2)(1,1)
Ei$ A   \na\na\na\naa   (1,2)

Ei$ ^aB Ab  \nab  Ab\n  (0,2)
Ei$ \\<abc  Abcabc aBc\n\nabc   (0,3)

Ei  ABC abc (0,3)
Ei  abc  ABC ABCABC     (1,4)


#
# now test merge,
#
# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_no_copy
# start by testing subs:
#   a+  ...aaa,,,   ($`,...)
#   a+  ...aaa,,,   ($',,,,)
#   a+  ...aaa,,,   ($&,aaa)
#   a+  ...aaa,,,   ($0,aaa)
#   a+  ...aaa,,,   ($1,NULL)
#   a+  ...aaa,,,   ($15,NULL)
#   (a+)b+  ...aaabbb,,,    ($1,aaa)
#   [[:digit:]]*    123ab   (<$0>,<123><><><>)
#   [[:digit:]]*    123ab1  (<$0>,<123><><><1>)

# and now escapes:
#   a+  ...aaa,,,   ($x,$x)
#   a+  ...aaa,,,   (\a,\a)
#   a+  ...aaa,,,   (\f,\f)
#   a+  ...aaa,,,   (\n,\n)
#   a+  ...aaa,,,   (\r,\r)
#   a+  ...aaa,,,   (\t,\t)
#   a+  ...aaa,,,   (\v,\v)

#   a+  ...aaa,,,   (\x21,!)
#   a+  ...aaa,,,   (\x{21},!)
#   a+  ...aaa,,,   (\c@,\0)
#   a+  ...aaa,,,   (\e,\27)
#   a+  ...aaa,,,   (\0101,A)
#   a+  ...aaa,,,   ((\0101),A)

# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed format_no_copy
#   (a+)(b+)    ...aabb,,   (\0,aabb)
#   (a+)(b+)    ...aabb,,   (\1,aa)
#   (a+)(b+)    ...aabb,,   (\2,bb)
#   (a+)(b+)    ...aabb,,   (&,aabb)
#   (a+)(b+)    ...aabb,,   ($,$)
#   (a+)(b+)    ...aabb,,   ($1,$1)
#   (a+)(b+)    ...aabb,,   (()?:,()?:)
#   (a+)(b+)    ...aabb,,   (\\,\\)
#   (a+)(b+)    ...aabb,,   (\&,&)


# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_perl format_no_copy
#   (a+)(b+)    ...aabb,,   ($0,aabb)
#   (a+)(b+)    ...aabb,,   ($1,aa)
#   (a+)(b+)    ...aabb,,   ($2,bb)
#   (a+)(b+)    ...aabb,,   ($&,aabb)
#   (a+)(b+)    ...aabb,,   (&,&)
#   (a+)(b+)    ...aabb,,   (\0,\0)
#   (a+)(b+)    ...aabb,,   (()?:,()?:)

# - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE
# move to copying unmatched data:
#   a+  ...aaa,,,   (bbb,...bbb,,,)
#   a+(b+)  ...aaabb,,, ($1,...bb,,,)
#   a+(b+)  ...aaabb,,,ab*abbb? ($1,...bb,,,b*bbb?)

#   (a+)|(b+)   ...aaabb,,,ab*abbb? ((?1A)(?2B),...AB,,,AB*AB?)
#   (a+)|(b+)   ...aaabb,,,ab*abbb? (?1A:B,...AB,,,AB*AB?)
#   (a+)|(b+)   ...aaabb,,,ab*abbb? ((?1A:B)C,...ACBC,,,ACBC*ACBC?)
#   (a+)|(b+)   ...aaabb,,,ab*abbb? (?1:B,...B,,,B*B?)

#
# changes to newline handling with 2.11:
#

# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP

E$  ^.    \n  \r\n      (0,1)
E$  .$    \n  \r\n      (8,9)
En$ .$    \n  \r\n      (1,2)

# - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_UNICODE_ONLY
#E  ^.  \8232 \8233     (0,1)
#E  .$  \8232 \8233     (1,2)

#
# non-greedy repeats added 21/04/00
# - match_default normal REG_EXTENDED
E   a{1,3}{1}   !   BADRPT
{E  a*? aa  (0,0)       non-greedy repeats not supported
E   a** aaa (0,3)
E   a?? aa  (0,0)
E   a++ !   BADRPT
E   a+? aa  (0,1)
E   a{1,3}? aaa (0,1)
E   \w+?w   ...ccccccwcccccw    (3,10)
E   \W+\w+?w    ...ccccccwcccccw    (0,10)
E   abc|\w+?    abd (0,1)
E   abc|\w+?    abcd    (0,3)
E   <\s*tag[^>]*>(.*?)<\s*/tag\s*>   <tag>here is some text</tag> <tag></tag>   (1,29)(6,23)
E   <\s*tag[^>]*>(.*?)<\s*/tag\s*>   < tag attr=\"something\">here is some text< /tag > <tag></tag> (1,51)(26,43)
}

#
# non-marking parenthesis added 25/04/00
# - match_default normal REG_EXTENDED
{E  (?:abc)+    xxabcabcxx  (2,8)       non-marking parens not supported
E   (?:a+)(b+)  xaaabbbx    (1,7)(4,7)
E   (a+)(?:b+)  xaaabbba    (1,7)(1,4)
E   (?:(a+)b+)  xaaabbba    (1,7)(1,4)
E   (?:a+(b+))  xaaabbba    (1,7)(4,7)
E   a+(?#b+)b+  xaaabbba    (1,7)
}

#
# try some partial matches:
# - match_partial match_default normal REG_EXTENDED REG_NO_POSIX_TEST
Exz (xyz)(.*)abc    xyzaaab NOMATCH
Exz (xyz)(.*)abc    xyz NOMATCH
Exz (xyz)(.*)abc    xy  NOMATCH