1N/A# This is a home for regular expression tests that don't fit into 1N/A chdir 't' if -d 't'; 1N/Aeval 'use Config';
# Defaults assumed if this fails 1N/Aif ($x =~ /^abc/) {
print "ok 1\n";}
else {
print "not ok 1\n";}
1N/Aif ($x !~ /^def/) {
print "ok 2\n";}
else {
print "not ok 2\n";}
1N/Aif ($x =~ /^def/) {
print "ok 3\n";}
else {
print "not ok 3\n";}
1N/Aif (/^([0-9][0-9]*)/) {
print "ok 4\n";}
else {
print "not ok 4\n";}
1N/Aif ($x =~ /^xxx/) {
print "not ok 5\n";}
else {
print "ok 5\n";}
1N/Aif ($x !~ /^abc/) {
print "not ok 6\n";}
else {
print "ok 6\n";}
1N/Aif ($x =~ /def/) {
print "ok 7\n";}
else {
print "not ok 7\n";}
1N/Aif ($x !~ /def/) {
print "not ok 8\n";}
else {
print "ok 8\n";}
1N/Aif ($x !~ /.def/) {
print "ok 9\n";}
else {
print "not ok 9\n";}
1N/Aif ($x =~ /.def/) {
print "not ok 10\n";}
else {
print "ok 10\n";}
1N/Aif ($x =~ /\ndef/) {
print "ok 11\n";}
else {
print "not ok 11\n";}
1N/Aif ($x !~ /\ndef/) {
print "not ok 12\n";}
else {
print "ok 12\n";}
1N/Aif (/(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc') { 1N/A print "not ok 13\n"; 1N/Aif (/(a+b+c+)/ && $1 eq 'aaabbbccc') { 1N/A print "not ok 14\n"; 1N/Aif (/a+b?c+/) {
print "not ok 15\n";}
else {
print "ok 15\n";}
1N/Aif (/a+b?c+/) {
print "ok 16\n";}
else {
print "not ok 16\n";}
1N/Aif (/a*b+c*/) {
print "ok 17\n";}
else {
print "not ok 17\n";}
1N/Aif (/a*b?c*/) {
print "ok 18\n";}
else {
print "not ok 18\n";}
1N/Aif (/a*b+c*/) {
print "not ok 19\n";}
else {
print "ok 19\n";}
1N/Aif (/bcd|xyz/) {
print "ok 20\n";}
else {
print "not ok 20\n";}
1N/Aif (/xyz|bcd/) {
print "ok 21\n";}
else {
print "not ok 21\n";}
1N/Aif (m|bc/*d|) {
print "ok 22\n";}
else {
print "not ok 22\n";}
1N/Aif (/^$_$/) {
print "ok 23\n";}
else {
print "not ok 23\n";}
1N/A$* =
1;
# test 3 only tested the optimized version--this one is for real 1N/Aif ("ab\ncd\n" =~ /^cd/) {
print "ok 24\n";}
else {
print "not ok 24\n";}
1N/A@
XXX =
('ok 25','not ok 25', 'ok 26','not ok 26','not ok 27');
1N/A ?(.*)? && (print $1,"\n"); 1N/A print "not ok 27\n"; 1N/Aif ($& eq 'xyz') {
print "ok 28\n";}
else {
print "not ok 28\n";}
1N/Aif ($& eq 'xyz') {
print "ok 29\n";}
else {
print "not ok 29\n";}
1N/Aif ($& eq 'xyz') {
print "ok 30\n";}
else {
print "not ok 30\n";}
1N/Aif ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 31\n";} else {print "not ok 31\n";} 1N/A/cde/ + 0; # optimized only to spat 1N/Aif ("$`:$&:$'" eq 'ab:cde:fghi') {print "ok 32\n";} else {print "not ok 32\n";} 1N/A/[d][e][f]/; # not optimized 1N/Aif ("$`:$&:$'" eq 'abc:def:ghi') {
print "ok 33\n";}
else {
print "not ok 33\n";}
1N/A$_ =
'now is the {time for all} good men to come to.';
1N/Aif ($1 eq 'time for all') {
print "ok 34\n";}
else {
print "not ok 34 $1\n";}
1N/A$_ =
'xxx {3,4} yyy zzz';
1N/Aprint /
( {3,4})/ ?
"ok 35\n" :
"not ok 35\n";
1N/Aprint $
1 eq ' ' ?
"ok 36\n" :
"not ok 36\n";
1N/Aprint /
( {4,})/ ?
"not ok 37\n" :
"ok 37\n";
1N/Aprint /
( {2,3}.)/ ?
"ok 38\n" :
"not ok 38\n";
1N/Aprint $
1 eq ' y' ?
"ok 39\n" :
"not ok 39\n";
1N/Aprint /
(y{2,3}.)/ ?
"ok 40\n" :
"not ok 40\n";
1N/Aprint $
1 eq 'yyy ' ?
"ok 41\n" :
"not ok 41\n";
1N/Aprint /x {
3,
4}/ ?
"not ok 42\n" :
"ok 42\n";
1N/Aprint /^
xxx {
3,
4}/ ?
"not ok 43\n" :
"ok 43\n";
1N/A$_ =
"now is the time for all good men to come to.";
1N/Aprint join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" 1N/Aprint join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" 1N/Aprint join(':',@words) eq "to:to" 1N/A : "not ok 47 `@words'\n"; 1N/Aprint $x eq '505550555' ? "ok 48\n" : "not ok 48 $x\n"; 1N/Aprint $` eq "" ? "ok 52\n" : "not ok 52\n" if $x; 1N/Aprint $` eq "" ? "ok 55\n" : "not ok 55\n" if $x; 1N/Aprint "not " if defined pos $_; 1N/A$_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; 1N/A@out = /(?<!foo)bar./g; 1N/A# Tests which depend on REG_INFTY 1N/A# As well as failing if the pattern matches do unexpected things, the 1N/A# next three tests will fail if you should have picked up a lower-than- 1N/Aprint "not " if $@ !~ m%^\QQuantifier in {,} bigger than%; 1N/A if $@ !~ m%^\QQuantifier in {,} bigger than%; 1N/A# Poke a couple more parse failures 1N/Aprint "not " if $@ !~ m%^\QLookbehind longer than 255 not%; 1N/Afor $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory 1N/A print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/; 1N/A print "not " if "b$a=" =~ /a$a=/; 1N/A# 20000 nodes, each taking 3 words per string, and 1 per branch 1N/A%ans = ( 'ax13876y25677lbc' => 1, 1N/A 'ax13876y25677mcb' => 0, # not b. 1N/A 'ax13876y35677nbc' => 0, # Num too big 1N/A 'ax13876y25677y21378obc' => 1, 1N/A 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] 1N/A 'ax13876y25677y21378y21378kbc' => 1, 1N/A 'ax13876y25677y21378y21378kcb' => 0, # Not b. 1N/A 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs 1N/A print "# const-len `$_' not => $ans{$_}\nnot " 1N/A if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o; 1N/A (?{ $c = 1 }) # Initialize 1N/A (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop 1N/A ) # Fail: will unwind one iteration back 1N/A [^()]+ # Match a big chunk 1N/A ) # Do not try to match subchunks 1N/A )+ # This may not match with different subblocks 1N/A ) # Otherwise the chunk 1 may succeed with $c>0 1N/Aprint "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; 1N/Aprint "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; 1N/A@ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad 1N/Aprint "not " if "@ans" ne 'a/ b'; 1N/A'foot' =~ /foo(?{$x = 12; 75})[t]/; 1N/A'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; 1N/A /(?<=(?=a)..)((?=c)|.)/g; 1N/A '' =~ /(?{ $c = 4 })/; 1N/A eval 'BEGIN { use warnings; $SIG{__WARN__} = $warn_pat };' . $code; 1N/A#&$for_future('q(a=[b]=) =~ /[x[=foo=]]/'); 1N/A#&$for_future('q(a.[b].) =~ /[x[.foo.]]/'); 1N/A# test if failure of patterns returns empty list 1N/Aprint "not " if $#+ != 0 or $#- != 0; 1N/Aprint "not " if $+[0] != 2 or $-[0] != 1; 1N/Aprint "not " if $#+ != 2 or $#- != 2; 1N/Aprint "not " if $+[0] != 3 or $-[0] != 0; 1N/Aprint "not " if $+[1] != 2 or $-[1] != 1; 1N/Aprint "not " if $+[2] != 3 or $-[2] != 2; 1N/Aprint "not " if $#+ != 3 or $#- != 3; 1N/Aprint "not " if $+[0] != 3 or $-[0] != 0; 1N/Aprint "not " if $+[1] != 2 or $-[1] != 1; 1N/Aprint "not " if $+[3] != 3 or $-[3] != 2; 1N/Aprint "not " if $#+ != 1 or $#- != 1; 1N/Aprint "not " if $+[0] != 2 or $-[0] != 0; 1N/Aprint "not " if $+[1] != 2 or $-[1] != 1; 1N/Aeval { $+[0] = 13; }; 1N/Aeval { $-[0] = 13; }; 1N/Aeval { @+ = (7, 6, 5); }; 1N/Aprint "#$#-..$#+\nnot " if $#+ != 2 or $#- != 1; 1N/Aprint "#'@res' '$_'\nnot " 1N/A unless "@res" eq "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; 1N/Aprint "#'@res' '$_'\nnot " 1N/A "'' 'ab' 'cde|abcde' " . 1N/A "'' 'abc' 'de|abcde' " . 1N/A "'abcd' 'e|' 'abcde' " . 1N/A "'abcde|' 'ab' 'cde' " . 1N/A "'abcde|' 'abc' 'de'" ; 1N/A#Some more \G anchor checks 1N/A# see if matching against temporaries (created via pp_helem()) is safe 1N/A# See if $i work inside (?{}) in the presense of saved substrings and 1N/As/(\w)(?{push @b, $1})/,$1,/g for @a; 1N/Aprint "# \@b='@b', expect 'f o o b a r'\nnot " unless("@b" eq "f o o b a r"); 1N/Aprint "not " unless("@a" eq ",f,,o,,o, ,b,,a,,r,"); 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/Aprint "not " if "@b" ne "@c"; 1N/A# see if backtracking optimization works correctly 1N/A"\n\n" =~ /\n $ \n/x or print "not "; 1N/A"\n\n" =~ /\n* $ \n/x or print "not "; 1N/A"\n\n" =~ /\n+ $ \n/x or print "not "; 1N/A# test result of match used as match (!) 1N/A'a1b' =~ ('xyz' =~ /y/) and $` eq 'a' or print "not "; 1N/A'a1b' =~ ('xyz' =~ /t/) and $` eq 'a' or print "not "; 1N/A $w = 1 if ("1\n" x 102) =~ /^\s*\n/m; 1N/A# There's no \v but the vertical tabulator seems miraculously 1N/A# be 11 both in ASCII and EBCDIC. 1N/A# bugid 20001021.005 - this caused a SEGV 1N/Aprint "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; 1N/Aif (/(.)(\C)(\C)(.)/) { 1N/A print "not ok 233\n"; 1N/A if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 1N/A print "not ok 234\n"; 1N/A print "not ok 235\n"; 1N/A } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC 1N/A print "not ok 234\n"; 1N/A print "not ok 235\n"; 1N/A print "not ok $_ # ord('A') == $ordA\n"; 1N/A print "not ok 236\n"; 1N/A print "not ok $_\n"; 1N/A # currently \C are still tagged as UTF-8 1N/A print "not ok 238\n"; 1N/A print "not ok 238\n"; 1N/A print "not ok 238 # ord('A') == $ordA\n"; 1N/A print "not ok $_\n"; 1N/A # currently \C are still tagged as UTF-8 1N/A print "not ok 240\n"; 1N/A print "not ok 240\n"; 1N/A print "not ok 240 # ord('A') == $ordA\n"; 1N/A print "not ok $_\n"; 1N/A # japhy -- added 03/03/2001 1N/A print "not " if $1 ne "abc"; 1N/A# The 242 and 243 go with the 244 and 245. 1N/A# The trick is that in EBCDIC the explicit numeric range should match 1N/A# (as also in non-EBCDIC) but the explicit alphabetic range should not match. 1N/A print "not ok 242\n"; 1N/A print "not ok 243\n"; 1N/A# In most places these tests would succeed since \x8e does not 1N/A# in most character sets match 'i' or 'j' nor would \xce match 1N/A# 'I' or 'J', but strictly speaking these tests are here for 1N/A# the good of EBCDIC, so let's test these only there. 1N/Aif (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC 1N/A if ("\x8e" !~ /[i-j]/) { 1N/A print "not ok 244\n"; 1N/A if ("\xce" !~ /[I-J]/) { 1N/A print "not ok 245\n"; 1N/A print "ok $_ # Skip: only in EBCDIC\n"; 1N/A # bug id 20001008.001 1N/A my @x = ("stra\337e 138","stra\337e 138"); 1N/A s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; 1N/A use utf8; # needed for the raw UTF-8 1N/A print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; 1N/A print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; 1N/A print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; 1N/A print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; 1N/A # the first half of 20001028.003 1N/A my ($Y) = $X =~ /(.*)/; 1N/A my $X = "Szab\x{f3},Bal\x{e1}zs"; 1N/A $Y =~ s/(B)/$1/ for 0..3; 1N/A # the second half of 20001028.003 1N/A # bug id 20001230.002 1N/A print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; 1N/A # This is far from complete testing, there are dozens of character 1N/A # classes in Unicode. The mixing of literals and \N{...} is 1N/A # intentional so that in non-Latin-1 places we test the native 1N/A # characters, not the Unicode code points. 1N/A "\N{CYRILLIC SMALL LETTER A}" => 'Ll', 1N/A "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu', 1N/A "\N{HIRAGANA LETTER SMALL A}" => 'Lo', 1N/A "\N{COMBINING GRAVE ACCENT}" => 'Mn', 1N/A "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd', 1N/A print "# IsAlpha\n"; 1N/A print "# IsAlnum\n"; 1N/A print "# IsASCII\n"; 1N/A print "# IsCntrl\n"; 1N/A print "# IsBlank\n"; 1N/A print "# IsDigit\n"; 1N/A print "# IsGraph\n"; 1N/A print "# IsLower\n"; 1N/A print "# IsPrint\n"; 1N/A print "# IsPunct\n"; 1N/A print "# IsSpace\n"; 1N/A print "# IsUpper\n"; 1N/A $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; 1N/A if (/(.\x{300})./) { 1N/A print "not " unless $' eq "\x{400}defg" && length($') == 5; 1N/A for (576..580) { print "not ok $_\n" } 1N/A # bug id 20010306.008 1N/A # The original bug report had 'no utf8' here but that was irrelevant. 1N/A $a =~ m/\w/; # used to core dump 1N/A # bugid 20010410.006 1N/A '/(.*?)\{(.*?)\}/csg', 1N/A '/(.*?)\{(.*?)\}/cg', 1N/A '/(.*?)\{(.*?)\}/sg', 1N/A '/(.*?)\{(.*?)\}/g', 1N/A '/(.+?)\{(.+?)\}/csg', 1N/A while (eval \$input =~ $rx) { 1N/A print "# \\\$1 = '\$1' \\\$2 = '\$2'\n"; 1N/A # from Robin Houston 1N/A my $x = "\x{10FFFD}"; 1N/A print "not " if $x =~ /[\x80-\x{100}]/; 1N/A print "not " if $x =~ /[\x{100}]/; 1N/A print "not " if $x =~ /[\x{100}]/; 1N/A print "not " if $x =~ /[\x{100}]/; 1N/A # the next two tests must be ignored on EBCDIC 1N/A # now test multi-error regexes 1N/A eval 'qr/(?o-cg)/'; # (?c) means (?g) error won't be thrown 1N/A# More Unicode "class" tests 1N/A /x && print "ok 631\n"; 1N/A /x && print "ok 632\n"; 1N/A print "not ok 633\n"; 1N/A print "not ok 634\n"; 1N/A# 635..639: ID 20010619.003 (only the space character is 1N/A# supposed to be [:print:], not the whole isprint()). 1N/Aprint "not " if "\n" =~ /[[:print:]]/; 1N/Aprint "not " if "\t" =~ /[[:print:]]/; 1N/A# Amazingly vertical tabulator is the same in ASCII and EBCDIC. 1N/Aprint "not " if "\014" =~ /[[:print:]]/; 1N/Aprint "not " if "\r" =~ /[[:print:]]/; 1N/A## Test basic $^N usage outside of a regex 1N/A$T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"}; 1N/A$T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"}; 1N/A$T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; 1N/A$T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; 1N/A$T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"}; 1N/A$T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"}; 1N/A$T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"}; 1N/A$T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"}; 1N/A$T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"}; 1N/A$T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"}; 1N/A$T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; 1N/A$T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; 1N/A$T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"}; 1N/A $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; 1N/A## test to see if $^N is automatically localized -- it should now 1N/A## have the value set in test 653 1N/A$T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"}; 1N/A## Now test inside (?{...}) 1N/A$T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"}; 1N/A$T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; 1N/A$T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; 1N/A {print $T} else {print "not $T"}; 1N/A {print $T} else {print "not $T"}; 1N/A# Test the Unicode script classes 1N/A# # singleton (not in a range, this test must be ignored on EBCDIC) 1N/A# print "not " unless chr(0xb5) =~ /\p{IsGreek}/ or ord("A") == 193; 1N/Aprint "ok 665 # 0xb5 moved from Greek to Common with Unicode 4.0.1\n"; 1N/A## Should probably put in tests for all the POSIX stuff, but not sure how to 1N/A## guarantee a specific locale...... 1N/A print "ok $_ # Skip: EBCDIC\n" for 673..674; 1N/A# With /s modifier UTF8 chars were interpreted as bytes 1N/A my $a = "Hello \x{263A} World"; 1N/A@a = ("foo\nbar" =~ /./g); 1N/Aprint "ok 676\n" if @a == 6 && "@a" eq "f o o b a r"; 1N/A@a = ("foo\nbar" =~ /./gs); 1N/Aprint "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r"; 1N/A@a = ("foo\nbar" =~ /\C/g); 1N/Aprint "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r"; 1N/Aprint "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r"; 1N/A@a = ("foo\n\x{100}bar" =~ /./g); 1N/Aprint "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r"; 1N/A@a = ("foo\n\x{100}bar" =~ /./gs); 1N/Aprint "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r"; 1N/A($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41); 1N/A@a = ("foo\n\x{100}bar" =~ /\C/g); 1N/Aprint "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; 1N/A@a = ("foo\n\x{100}bar" =~ /\C/gs); 1N/Aprint "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; 1N/A # [ID 20010814.004] pos() doesn't work when using =~m// in list context 1N/A print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n"; 1N/A # [ID 20010407.006] matching utf8 return values from functions does not work 1N/A $x =~ /(..)/; $y = $1; 1N/A x =~ /(..)/; $y = $1; 1N/A# Force scalar context on the patern match 1N/A # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. 1N/A ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); 1N/A ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); 1N/A ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); 1N/A ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); 1N/A ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); 1N/A ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); 1N/A ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); 1N/A print "# and now again in [] ranges\n"; 1N/A ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); 1N/A ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); 1N/A ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); 1N/A ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); 1N/A ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); 1N/A ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); 1N/A ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); 1N/A # Check that \x{##} works. 5.6.1 fails quite a few of these. 1N/A ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); 1N/A ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); 1N/A ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); 1N/A ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); 1N/A ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); 1N/A ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); 1N/A ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); 1N/A print "# and now again in [] ranges\n"; 1N/A ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); 1N/A ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); 1N/A ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); 1N/A ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); 1N/A ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); 1N/A ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); 1N/A ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); 1N/A # high bit bug -- japhy 1N/A $x =~ /.*?\200/ or print "not "; 1N/Aprint "# some Unicode properties\n"; 1N/A # Dashes, underbars, case. 1N/A # Complement, leading and trailing whitespace. 1N/A # No ^In, dashes, case, dash, any intervening (word-break) whitespace. 1N/A # (well, newlines don't work...) 1N/A print "not " if "a" =~ /\p{Lu}/; 1N/A print "not " if "A" =~ /\p{Ll}/; 1N/A print "not " if "a" =~ /\PL/; 1N/A print "not " if "a" =~ /\P{Ll}/; 1N/A print "not " if "A" =~ /\PL/; 1N/A print "not " if "A" =~ /\P{Lu}/; 1N/A print "not " if "1" =~ /\p{L&}/; 1N/A # Script=, Block=, Category= 1N/A print "# the basic character classes and Unicode \n"; 1N/A # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; 1N/A # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; 1N/A # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;; 1N/A print "# folding matches and Unicode\n"; 1N/A print "not " unless "\x{101}a" =~ /\x{100}/i; 1N/A print "not " unless "\x{100}a" =~ /\x{100}/i; 1N/A print "not " unless "\x{101}a" =~ /\x{101}/i; 1N/A print "not " unless "\x{100}a" =~ /\x{101}/i; 1N/A print "not " unless "a\x{100}" =~ /A\x{100}/i; 1N/A print "not " unless "A\x{100}" =~ /a\x{100}/i; 1N/A print "not " unless "a\x{100}" =~ /a\x{100}/i; 1N/A print "not " unless "A\x{100}" =~ /A\x{100}/i; 1N/A print "not " unless "\x{101}a" =~ /[\x{100}]/i; 1N/A print "not " unless "\x{100}a" =~ /[\x{100}]/i; 1N/A print "not " unless "\x{101}a" =~ /[\x{101}]/i; 1N/A print "not " unless "\x{100}a" =~ /[\x{101}]/i; 1N/A print "# LATIN LETTER A WITH GRAVE\n"; 1N/A my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; 1N/A my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; 1N/A print "# GREEK LETTER ALPHA WITH VRACHY\n"; 1N/A $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; 1N/A $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; 1N/A print "# LATIN LETTER Y WITH DIAERESIS\n"; 1N/A $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; 1N/A $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; 1N/A print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; 1N/A my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; 1N/A my $char = "\N{COMBINING GREEK PERISPOMENI}"; 1N/A # Before #13843 this was failing by matching falsely. 1N/A print "a!" =~ /^(\X)!/ && $1 eq "a" ? 1N/A "ok 787\n" : "not ok 787 # $1\n"; 1N/A print "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF" ? 1N/A "ok 788\n" : "not ok 788 # $1\n"; 1N/A print "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}" ? 1N/A "ok 789\n" : "not ok 789 # $1\n"; 1N/A print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ? 1N/A "ok 790\n" : "not ok 790 # $1\n"; 1N/A print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && 1N/A $1 eq "\N{LATIN CAPITAL LETTER E}" ? 1N/A "ok 791\n" : "not ok 791 # $1\n"; 1N/A print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ 1N/A $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ? 1N/A "ok 792\n" : "not ok 792 # $1\n"; 1N/A print "#\\C and \\X\n"; 1N/A print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; 1N/A print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; 1N/A print "# FINAL SIGMA\n"; 1N/A print "# parlez-vous?\n"; 1N/A print "fran\N{LATIN SMALL LETTER C}ais" =~ 1N/A "ok 813\n" : "not ok 813\n"; 1N/A print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ 1N/A $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? 1N/A "ok 814\n" : "not ok 814\n"; 1N/A print "fran\N{LATIN SMALL LETTER C}ais" =~ 1N/A "ok 815\n" : "not ok 815\n"; 1N/A print "franc\N{COMBINING CEDILLA}ais" =~ 1N/A /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded 1N/A "ok 816\n" : "not ok 816\n"; 1N/A print "fran\N{LATIN SMALL LETTER C}ais" =~ 1N/A "ok 817\n" : "not ok 817\n"; 1N/A print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ 1N/A $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? 1N/A "ok 818\n" : "not ok 818\n"; 1N/A print "franc\N{COMBINING CEDILLA}ais" =~ 1N/A $& eq "franc\N{COMBINING CEDILLA}ais" ? 1N/A "ok 819\n" : "not ok 819\n"; 1N/A print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ 1N/A $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? 1N/A "ok 820\n" : "not ok 820\n"; 1N/A print "franc\N{COMBINING CEDILLA}ais" =~ 1N/A $& eq "franc\N{COMBINING CEDILLA}ais" ? 1N/A "ok 821\n" : "not ok 821\n"; 1N/A print "fran\N{LATIN SMALL LETTER C}ais" =~ 1N/A "ok 822\n" : "not ok 822\n"; 1N/A print "fran\N{LATIN SMALL LETTER C}ais" =~ 1N/A "ok 823\n" : "not ok 823\n"; 1N/A print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ 1N/A $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? 1N/A "ok 824\n" : "not ok 824\n"; 1N/A print "franc\N{COMBINING CEDILLA}ais" =~ 1N/A $& eq "franc\N{COMBINING CEDILLA}ais" ? 1N/A "ok 825\n" : "not ok 825\n"; 1N/A print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n"; 1N/A print "not ok 826\n"; 1N/A print "not ok 827\n"; 1N/A print "not ok 828\n"; 1N/A print "# more SIGMAs\n"; 1N/A "ok 829\n" : "not ok 829\n"; 1N/A "ok 830\n" : "not ok 830\n"; 1N/A "ok 831\n" : "not ok 831\n"; 1N/A "ok 832\n" : "not ok 832\n"; 1N/A "ok 833\n" : "not ok 833\n"; 1N/A "ok 834\n" : "not ok 834\n"; 1N/A print "# LATIN SMALL LETTER SHARP S\n"; 1N/A print "\N{LATIN SMALL LETTER SHARP S}" =~ 1N/A print "\N{LATIN SMALL LETTER SHARP S}" =~ 1N/A print "\N{LATIN SMALL LETTER SHARP S}" =~ 1N/A print "\N{LATIN SMALL LETTER SHARP S}" =~ 1N/A print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ? 1N/A "ok 843\n" : "not ok 843\n"; 1N/A print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ? 1N/A "ok 844\n" : "not ok 844\n"; 1N/A print "# more whitespace: U+0085, U+2028, U+2029\n"; 1N/A # U+0085 needs to be forced to be Unicode, the \x{100} does that. 1N/A print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; 1N/A print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n"; 1N/A print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n"; 1N/A print "# . with /s should work on characters, as opposed to bytes\n"; 1N/A my $s = "\x{e4}\x{100}"; 1N/A # This is not expected to match: the point is that 1N/A # neither should we get "Malformed UTF-8" warnings. 1N/A "not ok 848\n" : "ok 848\n"; 1N/A while ($s =~ /\G(.)/gs) { 1N/A print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n"; 1N/A my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256 1N/A while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { 1N/A my $t2 = $t1 . "\x{100}"; # repeat with a larger char 1N/A while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { 1N/A print "# Unicode lookbehind\n"; 1N/A print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 851\n" : "not ok 851\n"; 1N/A print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n"; 1N/A print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n"; 1N/A print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n"; 1N/A print "# UTF-8 hash keys and /\$/\n"; 1N/A my %u = ( $u => $u, $v => $v, $w => $w ); 1N/A my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0; 1N/A print "# [ID 20020124.005]\n"; 1N/A for my $char ("a", "\x{df}", "\x{100}"){ 1N/A "ok $i\n" : "not ok $i # debug: $x\n"; 1N/A print "# SEGV in s/// and UTF-8\n"; 1N/A $s = "s#\x{100}" x 4; 1N/A print $s eq "s \x{100}" x 4 ? "ok 861\n" : "not ok 861\n"; 1N/A print "# UTF-8 bug (maybe alreayd known?)\n"; 1N/A $u =~ s/./\x{100}/g; 1N/A print $u eq "\x{100}\x{100}\x{100}" ? "ok 862\n" : "not ok 862\n"; 1N/A print $u eq "f\x{100}\x{100}b\x{100}r" ? "ok 863\n" : "not ok 863\n"; 1N/A $u =~ s/\x{100}/e/g; 1N/A print $u eq "feeber" ? "ok 864\n" : "not ok 864\n"; 1N/A print "# UTF-8 bug with s///\n"; 1N/A "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", 1N/A for my $re ("xx.*$c*", "$c*.*xx") { 1N/A for my $re ("xy$c*z", "x$c*yz") { 1N/A print "# qr/.../x\n"; 1N/A my $R = qr/ A B C # D E/x; 1N/A print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n"; 1N/A print "# illegal Unicode properties\n"; 1N/A print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; 1N/A # requires reuse of last successful pattern 1N/A $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first'; 1N/Aprint "# user-defined character properties\n"; 1N/A print "# [ID 20020630.002] utf8 regex only matches 32k\n"; 1N/A for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) { 1N/A for my $len (32000, 32768, 33000) { 1N/Aprint(('goodfood' =~ $a ? '' : 'not '), 1N/A "ok $test\t# reblessed qr// matches\n"); 1N/Aprint(($a eq '(?-xism:foo)' ? '' : 'not '), 1N/A "ok $test\t# reblessed qr// stringizes\n"); 1N/A$z=$y = "\317\276"; # $y is byte representation of $x 1N/Aprint(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); 1N/Aprint(("a$a" =~ $x ? '' : 'not '), 1N/A "ok $test - stringifed qr// preserves utf8\n"); 1N/Aprint(("a$x" =~ /^a$a\z/ ? '' : 'not '), 1N/A "ok $test - interpolated qr// preserves utf8\n"); 1N/Aprint(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), 1N/A "ok $test - postponed interpolation of qr// preserves utf8\n"); 1N/A "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n"); 1N/Aprint(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), 1N/A "ok $test - postponed utf8 string in utf8 re matches utf8\n"); 1N/Aprint(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), 1N/A "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); 1N/Aprint(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), 1N/A "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); 1N/Aprint(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), 1N/A "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); 1N/Aprint(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), 1N/A "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); 1N/Aprint(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), 1N/A "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); 1N/A$y = $z; # reset $y after upgrade 1N/Aprint(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), 1N/A "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); 1N/A$y = $z; # reset $y after upgrade 1N/Aprint(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), 1N/A "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); 1N/Aprint "# more user-defined character properties\n"; 1N/A print "# Change #18179\n"; 1N/A # previously failed with "panic: end_shift 1N/A my $s = "\x{100}" x 5; 1N/A my $ok = $s =~ /(\x{100}{4})/; 1N/A print "# [perl #15763]\n"; 1N/A chop $a; # but leaves the UTF-8 flag 1N/A $a .= "y"; # 1 byte before "y" 1N/A ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8'); 1N/A ok($a =~ /^\C{1}/, 'match \C{1}'); 1N/A ok($a =~ /^\C{1}y/, 'match \C{1}y'); 1N/A $a = "\x{100}y"; # 2 bytes before "y" 1N/A ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8'); 1N/A ok($a =~ /^\C{1}/, 'match \C{1}'); 1N/A ok($a =~ /^\C\C/, 'match two \C'); 1N/A ok($a =~ /^\C{2}/, 'match \C{2}'); 1N/A ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'); 1N/A ok($a =~ /^\C{3}/, 'match \C{3}'); 1N/A ok($a =~ /^\C\Cy/, 'match two \C'); 1N/A ok($a =~ /^\C{2}y/, 'match \C{2}'); 1N/A ok($a !~ /^\C{2}\Cy/, q{don't match \C{3}y}); 1N/A $a = "\x{1000}y"; # 3 bytes before "y" 1N/A ok($a =~ /^\C/, 'match one \C on three-byte UTF-8'); 1N/A ok($a =~ /^\C{1}/, 'match \C{1}'); 1N/A ok($a =~ /^\C\C/, 'match two \C'); 1N/A ok($a =~ /^\C{2}/, 'match \C{2}'); 1N/A ok($a =~ /^\C\C\C/, 'match three \C'); 1N/A ok($a =~ /^\C{3}/, 'match \C{3}'); 1N/A ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'); 1N/A ok($a =~ /^\C{4}/, 'match \C{4}'); 1N/A ok($a =~ /^\C\C\Cy/, 'match three \Cy'); 1N/A ok($a =~ /^\C{3}y/, 'match \C{3}y'); 1N/A ok($a !~ /^\C\C\C\C\y/, q{don't match four \Cy}); 1N/A ok($a !~ /^\C{4}y/, q{don't match \C{4}y}); 1N/Aok(/[^\s]+/, "m/[^\s]/ utf8"); 1N/Aok(/[^\d]+/, "m/[^\d]/ utf8"); 1N/Aok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); 1N/Aok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); 1N/Aok("\x{100}" =~ /\x{100}/, "[perl #15397]"); 1N/Aok("\x{100}" =~ /(\x{100})/, "[perl #15397]"); 1N/Aok("\x{100}" =~ /(\x{100}){1}/, "[perl #15397]"); 1N/Aok("\x{100}\x{100}" =~ /(\x{100}){2}/, "[perl #15397]"); 1N/Aok("\x{100}\x{100}" =~ /(\x{100})(\x{100})/, "[perl #15397]"); 1N/A ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' ); 1N/A ok( $1 eq "\x{100}", '$1 is still utf-8' ); 1N/A ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' ); 1N/A ok ( "0" =~ /\p{N}+\z/, "[perl #19767] variant test" ); 1N/A $p++ if /(??{ $p })/ 1N/A ok ($p == 5, "[perl #20683] (??{ }) returns stale values"); 1N/A ok ( $p == 5, "(??{ }) returns stale values"); 1N/A # Subject: Odd regexp behavior 1N/A # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> 1N/A # Date: Wed, 26 Feb 2003 16:53:12 +0000 1N/A # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> 1N/A # To: perl-unicode@perl.org 1N/A $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; 1N/A ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26"); 1N/A $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; 1N/A ok($x eq "b k", "Markus Kuhn 2003-02-26"); 1N/A ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26"); 1N/A "[perl #21411] (??{ .. }) corrupts split's stack"); 1N/A "[perl #21411] (?{ .. }) version of the above"); 1N/A ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'); 1N/A ok("\x{100}\n" =~ /\x{100}\n$/, "UTF8 length cache and fbm_compile"); 1N/A ok(join(":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"); 1N/A "[perl #17757] Parse::RecDescent triggers infinite loop"); 1N/A ok("\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"); 1N/Aok(1, 'skip - $* not deprecated in Perl 5.8') for 1..6; 1N/Aok("abcde" eq "$`", '# TODO #19049 - global match not setting $`'); 1N/Aok(" \x{101}" =~ qr/\x{100}/i, 1N/A "<20030808193656.5109.1@llama.ni-s.u-net.com>"); 1N/Aok(" \x{1E01}" =~ qr/\x{1E00}/i, 1N/A "<20030808193656.5109.1@llama.ni-s.u-net.com>"); 1N/Aok(" \x{10428}" =~ qr/\x{10400}/i, 1N/A "<20030808193656.5109.1@llama.ni-s.u-net.com>"); 1N/Aok(" \x{1E01}x" =~ qr/\x{1E00}X/i, 1N/A "<20030808193656.5109.1@llama.ni-s.u-net.com>"); 1N/A # [perl #23769] Unicode regex broken on simple example 1N/A my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; 1N/A ok($s =~ /\x{a0}/, "[perl #23769]"); 1N/A ok($s =~ /\x{a0}+/, "[perl #23769]"); 1N/A ok("aaa\x{100}" =~ /(a+)/, "[perl #23769] easy invariant"); 1N/A ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, "[perl #23769] regrepeat invariant"); 1N/A ok($1 eq "\xa0\xa0\xa0", "[perl #23769]"); 1N/A ok("ababab\x{100} " =~ /((?:ab)+)/, "[perl #23769] hard invariant"); 1N/A ok($1 eq "ababab", "[perl #23769]"); 1N/A ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, "[perl #23769] hard variant"); 1N/A ok($1 eq "\xa0\xa1\xa0\xa1\xa0\xa1", "[perl #23769]"); 1N/A ok("aaa\x{100} " =~ /(a+?)/, "[perl #23769] easy invariant"); 1N/A ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, "[perl #23769] regrepeat variant"); 1N/A ok("ababab\x{100} " =~ /((?:ab)+?)/, "[perl #23769] hard invariant"); 1N/A ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, "[perl #23769] hard variant"); 1N/A ok($1 eq "\xa0\xa1", "[perl #23769]"); 1N/A ok("\xc4\xc4\xc4" !~ /(\x{100}+)/, "[perl #23769] don't match first byte of utf8 representation"); 1N/A ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation"); 1N/A '\x{...} misparsed in regexp near 127 char EXACT limit' 1N/A ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8"); 1N/A ok(@w==0, "No warnings"); 1N/A my($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; 1N/A}, 'captures can move backwards in string');