This patch is modified version of a fix from upstream. This is already fixed in
perl 5.22.
--- perl-5.12.5/regexec.c 2016-05-16 14:45:14.336482535 +0200
+++ perl-5.12.5/regexec.c 2016-05-16 14:40:11.221253416 +0200
@@ -6124,6 +6124,10 @@ S_reghop3(U8 *s, I32 off, const U8* lim)
if (UTF8_IS_CONTINUED(*s)) {
while (s > lim && UTF8_IS_CONTINUATION(*s))
s--;
+ if (! UTF8_IS_START(*s)) {
+ dTHX;
+ Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
+ }
}
/* XXX could check well-formedness here */
}
@@ -6155,6 +6159,10 @@ S_reghop4(U8 *s, I32 off, const U8* llim
if (UTF8_IS_CONTINUED(*s)) {
while (s > llim && UTF8_IS_CONTINUATION(*s))
s--;
+ if (! UTF8_IS_START(*s)) {
+ dTHX;
+ Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
+ }
}
/* XXX could check well-formedness here */
}
@@ -6184,6 +6192,10 @@ S_reghopmaybe3(U8* s, I32 off, const U8*
if (UTF8_IS_CONTINUED(*s)) {
while (s > lim && UTF8_IS_CONTINUATION(*s))
s--;
+ if (! UTF8_IS_START(*s)) {
+ dTHX;
+ Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
+ }
}
/* XXX could check well-formedness here */
}
--- perl-5.12.5/t/re/pat.t Sun Nov 4 00:26:03 2012
+++ perl-5.12.5/t/re/pat.t Mon May 16 19:37:07 2016
@@ -23,7 +23,7 @@ BEGIN {
}
-plan tests => 309; # Update this when adding/deleting tests.
+plan tests => 310; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -32,6 +32,40 @@ run_tests() unless caller;
#
sub run_tests {
+ { # Test that we handle some malformed UTF-8 without looping [perl
+ # #123562]
+
+ # This test uses routines from test.pl. Sadly the routines differ
+ # from what is used by the rest of the file (from ReTest.pl). To make
+ # this test work I load test.pl and run it in my new namespace
+ # MyLocal. The problem is that the function _ok has static variable
+ # holding number of tests executed. We need to have the MyLocal::_ok
+ # replaced by main::_ok so that it's properly accounted for.
+
+ my $code='
+ package MyLocal;
+ do "test.pl"; # needed to load watchdog()
+
+ use Encode qw(_utf8_on);
+ my $malformed = "a\x80\n";
+ _utf8_on($malformed);
+ watchdog(3);
+ $malformed =~ /(\n\r|\r)$/;
+ print q(No infinite loop here!);
+ ';
+ {
+ package MyLocal;
+ do "test.pl";
+ my $ref = \*_ok;
+ *$ref = \*main::_ok;
+ }
+
+ MyLocal::fresh_perl_like($code, qr/Malformed UTF-8 character/, {},
+ "test that we handle some UTF-8 malformations without looping" );
+ }
+
+
+
{
my $x = "abc\ndef\n";