locale.t revision 7c478bd95313f5f23a4c958a745db2134aa03244
#!./perl -wT
BEGIN {
chdir 't' if -d 't';
print "1..0\n";
exit;
}
$| = 1;
}
use strict;
my $debug = 1;
use Dumpvalue;
quoteHighBit => 0,
);
sub debug {
return unless $debug;
my($mess) = join "", @_;
chop $mess;
}
sub debugf {
printf @_ if $debug;
}
my $have_setlocale = 0;
eval {
require POSIX;
import POSIX ':locale_h';
};
# and mingw32 uses said silly CRT
$have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i);
# UWIN seems to loop after test 98, just skip for now
$have_setlocale = 0 if ($^O =~ /^uwin/);
my $last = $have_setlocale ? &last : &last_without_setlocale;
use vars qw(&LC_ALL);
$a = 'abc %';
sub ok {
my ($n, $result) = @_;
print 'not ' unless ($result);
}
# First we'll do a lot of taint checking for locales.
# This is the easiest to test, actually, as any locale,
# even the default locale will taint under 'use locale'.
sub is_tainted { # hello, camel two.
no warnings 'uninitialized' ;
my $dummy;
not eval { $dummy = join("", @_), kill 0; 1 }
}
sub check_taint ($$) {
ok $_[0], is_tainted($_[1]);
}
sub check_taint_not ($$) {
ok $_[0], not is_tainted($_[1]);
}
use locale; # engage locale and therefore locale taint.
check_taint_not 1, $a;
check_taint 2, uc($a);
check_taint 3, "\U$a";
check_taint 4, ucfirst($a);
check_taint 5, "\u$a";
check_taint 6, lc($a);
check_taint 7, "\L$a";
check_taint 8, lcfirst($a);
check_taint 9, "\l$a";
check_taint_not 10, sprintf('%e', 123.456);
check_taint_not 11, sprintf('%f', 123.456);
check_taint_not 12, sprintf('%g', 123.456);
check_taint_not 13, sprintf('%d', 123.456);
check_taint_not 14, sprintf('%x', 123.456);
$_ = $a; # untaint $_
$_ = uc($a); # taint $_
check_taint 15, $_;
/(\w)/; # taint $&, $`, $', $+, $1.
check_taint 16, $&;
check_taint 17, $`;
check_taint 18, $';
check_taint 19, $+;
check_taint 20, $1;
check_taint_not 21, $2;
/(.)/; # untaint $&, $`, $', $+, $1.
check_taint_not 22, $&;
check_taint_not 23, $`;
check_taint_not 24, $';
check_taint_not 25, $+;
check_taint_not 26, $1;
check_taint_not 27, $2;
/(\W)/; # taint $&, $`, $', $+, $1.
check_taint 28, $&;
check_taint 29, $`;
check_taint 30, $';
check_taint 31, $+;
check_taint 32, $1;
check_taint_not 33, $2;
/(\s)/; # taint $&, $`, $', $+, $1.
check_taint 34, $&;
check_taint 35, $`;
check_taint 36, $';
check_taint 37, $+;
check_taint 38, $1;
check_taint_not 39, $2;
/(\S)/; # taint $&, $`, $', $+, $1.
check_taint 40, $&;
check_taint 41, $`;
check_taint 42, $';
check_taint 43, $+;
check_taint 44, $1;
check_taint_not 45, $2;
$_ = $a; # untaint $_
check_taint_not 46, $_;
/(b)/; # this must not taint
check_taint_not 47, $&;
check_taint_not 48, $`;
check_taint_not 49, $';
check_taint_not 50, $+;
check_taint_not 51, $1;
check_taint_not 52, $2;
$_ = $a; # untaint $_
check_taint_not 53, $_;
$b = uc($a); # taint $b
s/(.+)/$b/; # this must taint only the $_
check_taint 54, $_;
check_taint_not 55, $&;
check_taint_not 56, $`;
check_taint_not 57, $';
check_taint_not 58, $+;
check_taint_not 59, $1;
check_taint_not 60, $2;
$_ = $a; # untaint $_
s/(.+)/b/; # this must not taint
check_taint_not 61, $_;
check_taint_not 62, $&;
check_taint_not 63, $`;
check_taint_not 64, $';
check_taint_not 65, $+;
check_taint_not 66, $1;
check_taint_not 67, $2;
$b = $a; # untaint $b
($b = $a) =~ s/\w/$&/;
check_taint 68, $b; # $b should be tainted.
check_taint_not 69, $a; # $a should be not.
$_ = $a; # untaint $_
s/(\w)/\l$1/; # this must taint
check_taint 70, $_;
check_taint 71, $&;
check_taint 72, $`;
check_taint 73, $';
check_taint 74, $+;
check_taint 75, $1;
check_taint_not 76, $2;
$_ = $a; # untaint $_
s/(\w)/\L$1/; # this must taint
check_taint 77, $_;
check_taint 78, $&;
check_taint 79, $`;
check_taint 80, $';
check_taint 81, $+;
check_taint 82, $1;
check_taint_not 83, $2;
$_ = $a; # untaint $_
s/(\w)/\u$1/; # this must taint
check_taint 84, $_;
check_taint 85, $&;
check_taint 86, $`;
check_taint 87, $';
check_taint 88, $+;
check_taint 89, $1;
check_taint_not 90, $2;
$_ = $a; # untaint $_
s/(\w)/\U$1/; # this must taint
check_taint 91, $_;
check_taint 92, $&;
check_taint 93, $`;
check_taint 94, $';
check_taint 95, $+;
check_taint 96, $1;
check_taint_not 97, $2;
# After all this tainting $a should be cool.
check_taint_not 98, $a;
sub last_without_setlocale { 98 }
# I think we've seen quite enough of taint.
# Let us do some *real* locale work now,
# unless setlocale() is missing (i.e. minitest).
exit unless $have_setlocale;
# Find locales.
debug "# Scanning for locales...\n";
# Note that it's okay that some languages have their native names
# capitalized here even though that's not "right". They are lowercased
# anyway later during the scanning process (and besides, some clueless
# vendor might have them capitalized errorneously anyway).
my $locales = <<EOF;
Afrikaans:af:za:1 15
Arabic:ar:dz eg sa:6 arabic8
Brezhoneg Breton:br:fr:1 15
Bulgarski Bulgarian:bg:bg:5
Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC
Hrvatski Croatian:hr:hr:2
Cymraeg Welsh:cy:cy:1 14 15
Czech:cs:cz:2
Dansk Danish:dk:da:1 15
Nederlands Dutch:nl:be nl:1 15
English American British:en:au ca gb ie nz us uk zw:1 15 cp850
Esperanto:eo:eo:3
Eesti Estonian:et:ee:4 6 13
Suomi Finnish:fi:fi:1 15
Flamish::fl:1 15
Deutsch German:de:at be ch de lu:1 15
Euskaraz Basque:eu:es fr:1 15
Galego Galician:gl:es:1 15
Ellada Greek:el:gr:7 g8
Frysk:fy:nl:1 15
Greenlandic:kl:gl:4 6
Hebrew:iw:il:8 hebrew8
Hungarian:hu:hu:2
Indonesian:in:id:1 15
Gaeilge Irish:ga:IE:1 14 15
Italiano Italian:it:ch it:1 15
Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis
Korean:ko:kr:
Latine Latin:la:va:1 15
Latvian:lv:lv:4 6 13
Lithuanian:lt:lt:4 6 13
Macedonian:mk:mk:1 15
Maltese:mt:mt:3
Moldovan:mo:mo:2
Norsk Norwegian:no no\@nynorsk:no:1 15
Occitan:oc:es:1 15
Polski Polish:pl:pl:2
Rumanian:ro:ro:2
Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866
Serbski Serbian:sr:yu:5
Slovak:sk:sk:2
Slovene Slovenian:sl:si:2
Sqhip Albanian:sq:sq:1 15
Svenska Swedish:sv:fi se:1 15
Thai:th:th:11 tis620
Turkish:tr:tr:9 turkish8
Yiddish:yi::1 15
EOF
# These cause heartburn. Broken locales?
}
} else {
}
my @Locale;
my $Locale;
my @Alnum_;
my @utf8locale;
my %utf8skip;
sub getalnum_ {
}
sub trylocale {
my $locale = shift;
}
}
sub decode_encodings {
my @enc;
if (/^(\d+)$/) {
}
} else {
}
}
}
return @enc;
}
foreach (0..15) {
}
# Sanitize the environment so that we can run the external 'locale'
# program without the taint mode getting grumpy.
# $ENV{PATH} is special in VMS.
# Other subversive stuff.
}
# The SYS$I18N_LOCALE logical name search list was not present on
# VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions.
}
} else {
# This is going to be slow.
}
}
}
}
}
}
}
}
}
}
my %Problem;
my %Okay;
my %Testing;
my @Neoalpha;
my %Neoalpha;
sub tryneoalpha {
} else {
}
}
foreach (99..103) {
}
next;
}
# Sieve the uppercase and the lowercase.
$UPPER{$_} = $_;
}
$lower{$_} = $_;
}
}
}
}
}
}
# Find the alphabets that are not alphabets in the default locale.
{
$Neoalpha{$_} = $_;
}
}
# If we have no Neoalphas the remaining tests are no-ops.
foreach (99..102) {
}
} else {
# Test \w.
my $badutf8;
{
};
}
debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
} else {
} else {
}
}
# Cross-check the whole 8-bit character set.
tryneoalpha($Locale, 100,
}
# Test for read-only scalars' locale vs non-locale comparisons.
{
$a = "qwerty";
{
use locale;
}
}
{
for (0..9) {
# Select a slice.
# Select a slice one character on.
? (" ", "not ", 1)
: ("not ", " ", -1));
# all these tests should FAIL (return 0).
# Exact lt or gt cannot be tested because
# in some locales, say, eacute and E may test equal.
@test =
(
'not ($lesser ne $greater)', # 2
' ($lesser eq $greater)', # 3
'not ($greater ne $lesser )', # 8
' ($greater eq $lesser )', # 9
'not (($lesser cmp $greater) == -($sign))' # 11
);
@test{@test} = 0 x @test;
$test = 0;
}
}
}
last;
}
}
}
}
use locale;
my ($x, $y) = (1.23, 1.23);
$a = "$x";
$b = "$y";
my $c = "$x";
my $d = "$y";
{
use warnings;
my $w = 0;
sub {
print "# @_\n";
$w++;
};
# The == (among other ops) used to warn for locales
# that had something else than "." as the radix character.
{
# no locale; # XXX did this ever work correctly?
my $e = "$x";
}
my $f = "1.23";
my $g = 2.34;
}
# Does taking lc separately differ from taking
# the lc "in-line"? (This was the bug 19990704.002, change #3568.)
# The bug was in the caching of the 'o'-magic.
{
use locale;
sub lcA {
}
sub lcB {
}
my $x = "ab";
my $y = "aa";
my $z = "AB";
tryneoalpha($Locale, 116,
}
# Does lc of an UPPER (if different from the UPPER) match
# case-insensitively the UPPER, and does the UPPER match
# case-insensitively the lc of the UPPER. And vice versa.
{
use locale;
my @f = ();
my $y = lc $x;
print "# UPPER $x lc $y ",
$x =~ /$y/i ? 1 : 0, " ",
$y =~ /$x/i ? 1 : 0, "\n" if 0;
#
# If $x and $y contain regular expression characters
# AND THEY lowercase (/i) to regular expression characters,
# regcomp() will be mightily confused. No, the \Q doesn't
# help here (maybe regex engine internal lowercasing
# is done after the \Q?) An example of this happening is
# the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
# the chr(173) (the "[") is the lowercase of the chr(235).
#
# Similarly losing EBCDIC locales include cs_cz, cs_CZ,
# et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
# pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
# sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
#
# Similar things can happen even under (bastardised)
# non-EBCDIC locales: in many European countries before the
# advent of ISO 8859-x nationally customised versions of
# ISO 646 were devised, reusing certain punctuation
# characters for modified characters needed by the
# stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
#
next;
}
# With utf8 both will fail since the locale concept
my $y = uc $x;
print "# lower $x uc $y ",
$x =~ /$y/i ? 1 : 0, " ",
$y =~ /$x/i ? 1 : 0, "\n" if 0;
next;
}
# With utf8 both will fail since the locale concept
}
if (@f) {
print "# failed 117 locale '$Locale' characters @f\n"
}
}
}
}
# Recount the errors.
if ($_ == 102) {
print "# The failure of test 102 is not necessarily fatal.\n";
print "# It usually indicates a problem in the environment,\n";
print "# not in Perl itself.\n";
}
print "not ";
}
print "ok $_\n";
}
# Give final advice.
my $didwarn = 0;
$f =~ s/(.{50,60}) /$1\n#\t/g;
print
"#\n",
"# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
"#\t", $f, "\n#\n",
"# on your system may have errors because the locale test $_\n",
"# failed in ", (@f == 1 ? "that locale" : "those locales"),
".\n";
print <<EOW;
#
# If your users are not using these locales you are safe for the moment,
# but please report this failure first to perlbug\@perl.com using the
# perlbug script (as described in the INSTALL file) so that the exact
# details of the failures can be sorted out first and then your operating
# system supplier can be alerted about these anomalies.
#
EOW
$didwarn = 1;
}
}
# Tell which locales were okay and which were not.
my (@s, @F);
my $p = 0;
$p++ if $Problem{$t}{$l};
}
push @s, $l if $p == 0;
}
if (@s) {
$s =~ s/(.{50,60}) /$1\n#\t/g;
"# The following locales\n#\n",
"#\t", $s, "\n#\n",
"# tested okay.\n#\n",
} else {
}
if (@F) {
$F =~ s/(.{50,60}) /$1\n#\t/g;
"# The following locales\n#\n",
"#\t", $F, "\n#\n",
"# had problems.\n#\n",
} else {
}
$S =~ s/(.{50,60}) /$1\n#\t/g;
"#\t", $S, "\n#\n",
"# were skipped for the tests ",
"# because UTF-8 and locales do not work together in Perl.\n#\n";
}
}
sub last { 117 }
# eof