Alias.pm revision 7c478bd95313f5f23a4c958a745db2134aa03244
use strict;
no warnings 'redefine';
use Encode;
sub DEBUG () { 0 }
use base qw(Exporter);
# Public, encouraged API is exported by default
our @EXPORT =
qw (
);
our @Alias; # ordered matching list
our %Alias; # cached known aliases
sub find_alias
{
my $class = shift;
my $find = shift;
{
{
my $new;
{
DEBUG and warn "eval $val";
DEBUG and $@ and warn "$val, $@";
}
elsif (ref($alias) eq 'CODE')
{
}
{
}
if (defined($new))
{
DEBUG and warn "$alias, $new";
if ($enc)
{
last;
}
}
}
}
if (DEBUG){
my $name;
}else{
$name = "";
}
warn "find_alias($class, $find)->name = $name";
}
}
sub define_alias
{
while (@_)
{
# clear %Alias cache to allow overrides
if (ref($alias)){
my @a = keys %Alias;
for my $k (@a){
{
DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
elsif (ref($alias) eq 'CODE')
{
DEBUG and warn "delete \$Alias\{$k\}";
}
}
}else{
DEBUG and warn "delete \$Alias\{$alias\}";
}
}
}
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
# Allow winlatin1 style names as well
our %Winlatin2cp = (
'latin1' => 1252,
'latin2' => 1250,
'cyrillic' => 1251,
'greek' => 1253,
'turkish' => 1254,
'hebrew' => 1255,
'arabic' => 1256,
'baltic' => 1257,
'vietnamese' => 1258,
);
init_aliases();
sub undef_aliases{
@Alias = ();
%Alias = ();
}
sub init_aliases
{
# Try all-lower-case version should all else fails
define_alias( qr/^(.*)$/ => '"\L$1"' );
);
# ASCII
# Allow variants of iso-8859-1 etc.
# At least HP-UX has these.
# More HP stuff.
# The Official name of ASCII.
# This is a font issue, not an encoding issue.
# (The currency symbol of the Latin 1 upper half
# has been redefined as the euro symbol.)
=> 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
# Common names for non-latin prefered MIME names
'cyrillic' => 'iso-8859-5',
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
'thai' => 'iso-8859-11',
'tis620' => 'iso-8859-11',
);
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
# And windows-* is registered in IANA!
# Sometimes seen with a leading zero.
# define_alias( qr/\bcp037\b/i => '"cp37"');
# Mac Mappings
# predefined in *.ucm; unneeded
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
# Ououououou. gone. They are differente!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
# Standardize on the dashed versions.
# define_alias( qr/\butf8$/i => '"utf-8"' );
# for Encode::CN
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
# CP936 doesn't have vendor-addon for GBK, so they're identical.
# This fixes gb2312 vs. euc-cn confusion, practically
# for Encode::JP
# for Encode::KR
# This fixes ksc5601 vs. euc-kr confusion, practically
# for Encode::TW
}
# utf8 is blessed :)
# At last, Map white space and _ to '-'
}
1;
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
# TODO: Armenian encoding ARMSCII-8
# TODO: Hebrew encoding ISO-8859-8-1
# TODO: Thai encoding TCVN
# TODO: Vietnamese encodings VPS
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
# Kannada Khmer Korean Laotian Malayalam Mongolian
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
=head1 NAME
Encode::Alias - alias definitions to encodings
=head1 SYNOPSIS
use Encode;
use Encode::Alias;
define_alias( newName => ENCODING);
=head1 DESCRIPTION
Allows newName to be used as an alias for ENCODING. ENCODING may be
either the name of an encoding or an encoding object (as described
in L<Encode>).
Currently I<newName> can be specified in the following ways:
=over 4
=item As a simple string.
=item As a qr// compiled regular expression, e.g.:
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
in order to allow C<$1> etc. to be substituted. The example is one
way to alias names as used in X11 fonts to the MIME names for the
iso-8859-* family. Note the double quotes inside the single quotes.
(or, you don't have to do this yourself because this example is predefined)
If you are using a regex here, you have to use the quotes as shown or
it won't work. Also note that regex handling is tricky even for the
experienced. Use this feature with caution.
=item As a code reference, e.g.:
define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
The same effect as the example above in a different way. The coderef
takes the alias name as an argument and returns a canonical name on
success or undef if not. Note the second argument is not required.
Use this with even more caution than the regex version.
=back
=head3 Changes in code reference aliasing
As of Encode 1.87, the older form
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
no longer works.
Encode up to 1.86 internally used "local $_" to implement ths older
form. But consider the code below;
use Encode;
$_ = "eeeee" ;
while (/(e)/g) {
my $utf = decode('aliased-encoding-name', $1);
print "position:",pos,"\n";
}
Prior to Encode 1.86 this fails because of "local $_".
=head2 Alias overloading
You can override predefined aliases by simply applying define_alias().
The new alias is always evaluated first, and when neccessary,
define_alias() flushes the internal cache to make the new definition
available.
# superset of SHIFT_JIS
If you want to zap all predefined aliases, you can use
Encode::Alias->undef_aliases;
to do so. And
Encode::Alias->init_aliases;
gets the factory settings back.
=head1 SEE ALSO
L<Encode>, L<Encode::Supported>
=cut