#!./perl
my $has_perlio;
BEGIN {
chdir 't' if -d 't';
print <<EOF;
# Since you don't have perlio you might get failures with UTF-8 locales.
EOF
}
}
# NOTE!
#
# Think carefully before adding tests here. In general this should be
# used only for about three categories of tests:
#
# (1) tests that absolutely require 'use utf8', and since that in general
# shouldn't be needed as the utf8 is being obsoleted, this should
# have rather few tests. If you want to test Unicode and regexes,
#
# (2) tests that have to do with Unicode tokenizing (though it's likely
# that all the other Unicode tests sprinkled around the t/**/*.t are
# going to catch that)
#
# (3) complicated tests that simultaneously stress so many Unicode features
# that deciding into which other test script the tests should go to
# is hard -- maybe consider breaking up the complicated test
#
#
{
# bug id 20001009.001
my ($a, $b);
my $test = 68;
}
{
# bug id 20000730.004
for my $s ("\x{263a}",
"" . "\x{263a}",
"\x{263a}" . "",
) {
my $length_bytes;
my $regex_chars = @regex_chars;
my @split_chars = split //, $s;
my $split_chars = @split_chars;
"1/1/1/3");
}
for my $s ("\x{263a}" . "\x{263a}",
"\x{263a}\x{263a}",
"\x{263a}" x 2,
$smiley x 2,
) {
my $length_bytes;
my $regex_chars = @regex_chars;
my @split_chars = split //, $s;
my $split_chars = @split_chars;
"2/2/2/6");
}
}
{
my $w = 0;
my $x = eval q/"\\/ . "\x{100}" . q/"/;;
}
{
use warnings;
use strict;
my $show = q(
sub show {
my $result;
foreach @_;
}
1;
);
# If I'm right 60 is '>' in ASCII, ' ' in EBCDIC
# 173 is not punctuation in either ASCII or EBCDIC
my (@char);
foreach (60, 173, 257, 65532) {
# I don't want to use map {ord} and I've no need to hardcode the UTF
# version
my $charsubst = $char;
# Not testing this one against map {ord}
my $char_as_ord
push @char, [$_, $char, $charsubst, $char_as_ord];
}
# Now we've done all the UTF8 munching hopefully we're safe
my @tests = (
['check our detection program works',
['check literal 8 bit input',
['check no utf8; makes no change',
# Now we do the real byte sequences that are valid UTF8
["the utf8 sequence for chr $_->[0]",
["no utf8; for the utf8 sequence for chr $_->[0]",
["use utf8; for the utf8 sequence for chr $_->[0]",
} @char),
# Interpolation of hex characters needs to take place now, as we're
# testing feeding malformed utf8 into perl. Bug now fixed was an
# "out of memory" error. We really need the "" [rather than qq()
# or q()] to get the best explosion.
["!Feed malformed utf8 into perl.", <<"BANG",
],
);
print "# Possible delay...\n";
} else {
}
}
print
"# Again! Again! [but this time as eval, and not the explosive one]\n";
# and now we've safely done them all as separate files, check that the
# evals do the same thing. Hopefully doing it later successfully decouples
# the previous tests from anything messy that may go wrong with the evals.
next if $why =~ m/^!/; # Goes bang.
if ($@) {
}
}
# See what the tokeniser does with hash keys.
print "# What does the tokeniser do with utf8 hash keys?\n";
# This is the control - I don't expect it to fail
["assign utf8 for chr $_->[0] to a hash",
qr/^>$_->[2]<$/],
["no utf8; assign utf8 for chr $_->[0] to a hash",
qr/^>$_->[2]<$/],
["use utf8; assign utf8 for chr $_->[0] to a hash",
qr/^>$_->[0]<$/],
# Now check literal $h{"x"} constructions.
["\$h{\"x\"} construction, where x is utf8 for chr $_->[0]",
qr/^>$_->[2]<$/],
["no utf8; \$h{\"x\"} construction, where x is utf8 for chr $_->[0]",
qr/^>$_->[2]<$/],
["use utf8; \$h{\"x\"} construction, where x is utf8 for chr $_->[0]",
qr/^>$_->[0]<$/],
# Now check "x" => constructions.
["assign \"x\"=>1 to a hash, where x is utf8 for chr $_->[0]",
qr/^>$_->[2]<$/],
["no utf8; assign \"x\"=>1 to a hash, where x is utf8 for chr $_->[0]",
qr/^>$_->[2]<$/],
["use utf8; assign \"x\"=>1 to a hash, where x is utf8 for chr $_->[0]",
qr/^>$_->[0]<$/],
# Check copies of hashes made from literal utf8 keys
["assign utf8 for chr $_->[0] to a hash, then copy it",
qr/^>$_->[2]<$/],
["no utf8; assign utf8 for chr $_->[0] to a hash, then copy it",
qr/^>$_->[2]<$/],
["use utf8; assign utf8 for chr $_->[0] to a hash, then copy it",
qr/^>$_->[0]<$/],
} @char);
# print "# $prog\n";
}
}
#
# bug fixed by change #17928
# separate perl used because we rely on 'strict' not yet loaded;
# before the patch, the eval died with an error like:
# "my" variable $strict::VERSION can't be in a package
#
SKIP: {
{
use utf8;
eval $code;
print $@ if $@;
}
}
{
use utf8;
$a = <<'END';
0 ....... 1 ....... 2 ....... 3 ....... 4 ....... 5 ....... 6 ....... 7 .......
my (@i, $s);
@i = ();
@i = ();
@i = ();
}
SKIP: {
use utf8;
"utf8 quote delimiters [perl #16823]");};
}
# Test the "internals".
{
my $a = "A";
# encode() clears the UTF-8 flag (unlike upgrade()).
}
{
"utf8::encode should refuse to touch read-only values");
}
{
my $a = "456\xb6";
my $b = "123456\xb6";
$b =~ s/^...//;
}