1N/Apackage Encode::Alias;
1N/Ause strict;
1N/Ano warnings 'redefine';
1N/Ause Encode;
1N/Aour $VERSION = do { my @r = (q$Revision: 1.38 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
1N/Asub DEBUG () { 0 }
1N/A
1N/Ause base qw(Exporter);
1N/A
1N/A# Public, encouraged API is exported by default
1N/A
1N/Aour @EXPORT =
1N/A qw (
1N/A define_alias
1N/A find_alias
1N/A );
1N/A
1N/Aour @Alias; # ordered matching list
1N/Aour %Alias; # cached known aliases
1N/A
1N/Asub find_alias
1N/A{
1N/A my $class = shift;
1N/A my $find = shift;
1N/A unless (exists $Alias{$find})
1N/A {
1N/A $Alias{$find} = undef; # Recursion guard
1N/A for (my $i=0; $i < @Alias; $i += 2)
1N/A {
1N/A my $alias = $Alias[$i];
1N/A my $val = $Alias[$i+1];
1N/A my $new;
1N/A if (ref($alias) eq 'Regexp' && $find =~ $alias)
1N/A {
1N/A DEBUG and warn "eval $val";
1N/A $new = eval $val;
1N/A DEBUG and $@ and warn "$val, $@";
1N/A }
1N/A elsif (ref($alias) eq 'CODE')
1N/A {
1N/A DEBUG and warn "$alias", "->", "($find)";
1N/A $new = $alias->($find);
1N/A }
1N/A elsif (lc($find) eq lc($alias))
1N/A {
1N/A $new = $val;
1N/A }
1N/A if (defined($new))
1N/A {
1N/A next if $new eq $find; # avoid (direct) recursion on bugs
1N/A DEBUG and warn "$alias, $new";
1N/A my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
1N/A if ($enc)
1N/A {
1N/A $Alias{$find} = $enc;
1N/A last;
1N/A }
1N/A }
1N/A }
1N/A }
1N/A if (DEBUG){
1N/A my $name;
1N/A if (my $e = $Alias{$find}){
1N/A $name = $e->name;
1N/A }else{
1N/A $name = "";
1N/A }
1N/A warn "find_alias($class, $find)->name = $name";
1N/A }
1N/A return $Alias{$find};
1N/A}
1N/A
1N/Asub define_alias
1N/A{
1N/A while (@_)
1N/A {
1N/A my ($alias,$name) = splice(@_,0,2);
1N/A unshift(@Alias, $alias => $name); # newer one has precedence
1N/A # clear %Alias cache to allow overrides
1N/A if (ref($alias)){
1N/A my @a = keys %Alias;
1N/A for my $k (@a){
1N/A if (ref($alias) eq 'Regexp' && $k =~ $alias)
1N/A {
1N/A DEBUG and warn "delete \$Alias\{$k\}";
1N/A delete $Alias{$k};
1N/A }
1N/A elsif (ref($alias) eq 'CODE')
1N/A {
1N/A DEBUG and warn "delete \$Alias\{$k\}";
1N/A delete $Alias{$alias->($name)};
1N/A }
1N/A }
1N/A }else{
1N/A DEBUG and warn "delete \$Alias\{$alias\}";
1N/A delete $Alias{$alias};
1N/A }
1N/A }
1N/A}
1N/A
1N/A# Allow latin-1 style names as well
1N/A # 0 1 2 3 4 5 6 7 8 9 10
1N/Aour @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
1N/A# Allow winlatin1 style names as well
1N/Aour %Winlatin2cp = (
1N/A 'latin1' => 1252,
1N/A 'latin2' => 1250,
1N/A 'cyrillic' => 1251,
1N/A 'greek' => 1253,
1N/A 'turkish' => 1254,
1N/A 'hebrew' => 1255,
1N/A 'arabic' => 1256,
1N/A 'baltic' => 1257,
1N/A 'vietnamese' => 1258,
1N/A );
1N/A
1N/Ainit_aliases();
1N/A
1N/Asub undef_aliases{
1N/A @Alias = ();
1N/A %Alias = ();
1N/A}
1N/A
1N/Asub init_aliases
1N/A{
1N/A undef_aliases();
1N/A
1N/A # Try all-lower-case version should all else fails
1N/A define_alias( qr/^(.*)$/ => '"\L$1"' );
1N/A
1N/A # UTF/UCS stuff
1N/A define_alias( qr/^UTF-?7$/i => '"UTF-7"');
1N/A define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
1N/A define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
1N/A qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
1N/A qr/^iso-10646-1$/i => '"UCS-2BE"' );
1N/A define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"',
1N/A qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"',
1N/A qr/^UTF(16|32)$/i => '"UTF-$1"',
1N/A );
1N/A # ASCII
1N/A define_alias(qr/^(?:US-?)ascii$/i => '"ascii"');
1N/A define_alias('C' => 'ascii');
1N/A define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"');
1N/A # Allow variants of iso-8859-1 etc.
1N/A define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
1N/A
1N/A # At least HP-UX has these.
1N/A define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
1N/A
1N/A # More HP stuff.
1N/A define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
1N/A
1N/A # The Official name of ASCII.
1N/A define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
1N/A
1N/A # This is a font issue, not an encoding issue.
1N/A # (The currency symbol of the Latin 1 upper half
1N/A # has been redefined as the euro symbol.)
1N/A define_alias( qr/^(.+)\@euro$/i => '"$1"' );
1N/A
1N/A define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
1N/A => 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
1N/A
1N/A define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
1N/A hebrew|arabic|baltic|vietnamese)$/ix =>
1N/A '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
1N/A
1N/A # Common names for non-latin prefered MIME names
1N/A define_alias( 'ascii' => 'US-ascii',
1N/A 'cyrillic' => 'iso-8859-5',
1N/A 'arabic' => 'iso-8859-6',
1N/A 'greek' => 'iso-8859-7',
1N/A 'hebrew' => 'iso-8859-8',
1N/A 'thai' => 'iso-8859-11',
1N/A 'tis620' => 'iso-8859-11',
1N/A );
1N/A
1N/A # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
1N/A # And Microsoft has their own naming (again, surprisingly).
1N/A # And windows-* is registered in IANA!
1N/A define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"');
1N/A
1N/A # Sometimes seen with a leading zero.
1N/A # define_alias( qr/\bcp037\b/i => '"cp37"');
1N/A
1N/A # Mac Mappings
1N/A # predefined in *.ucm; unneeded
1N/A # define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
1N/A define_alias( qr/^mac_(.*)$/i => '"mac$1"');
1N/A # Ououououou. gone. They are differente!
1N/A # define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
1N/A
1N/A # Standardize on the dashed versions.
1N/A # define_alias( qr/\butf8$/i => '"utf-8"' );
1N/A define_alias( qr/\bkoi8[\s-_]*([ru])$/i => '"koi8-$1"' );
1N/A
1N/A unless ($Encode::ON_EBCDIC){
1N/A # for Encode::CN
1N/A define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
1N/A define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
1N/A # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
1N/A # CP936 doesn't have vendor-addon for GBK, so they're identical.
1N/A define_alias( qr/^gbk$/i => '"cp936"');
1N/A # This fixes gb2312 vs. euc-cn confusion, practically
1N/A define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
1N/A # for Encode::JP
1N/A define_alias( qr/\bjis$/i => '"7bit-jis"' );
1N/A define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
1N/A define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
1N/A define_alias( qr/\bujis$/i => '"euc-jp"' );
1N/A define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
1N/A define_alias( qr/\bsjis$/i => '"shiftjis"' );
1N/A # for Encode::KR
1N/A define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
1N/A define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
1N/A # This fixes ksc5601 vs. euc-kr confusion, practically
1N/A define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
1N/A define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
1N/A define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
1N/A # for Encode::TW
1N/A define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
1N/A define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
1N/A define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
1N/A define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
1N/A define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
1N/A }
1N/A # utf8 is blessed :)
1N/A define_alias( qr/^UTF-8$/i => '"utf8"',);
1N/A # At last, Map white space and _ to '-'
1N/A define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
1N/A}
1N/A
1N/A1;
1N/A__END__
1N/A
1N/A# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
1N/A# TODO: HP-UX '15' encodings japanese15 korean15 roi15
1N/A# TODO: Cyrillic encoding ISO-IR-111 (useful?)
1N/A# TODO: Armenian encoding ARMSCII-8
1N/A# TODO: Hebrew encoding ISO-8859-8-1
1N/A# TODO: Thai encoding TCVN
1N/A# TODO: Vietnamese encodings VPS
1N/A# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
1N/A# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
1N/A# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
1N/A# Kannada Khmer Korean Laotian Malayalam Mongolian
1N/A# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
1N/A
1N/A=head1 NAME
1N/A
1N/AEncode::Alias - alias definitions to encodings
1N/A
1N/A=head1 SYNOPSIS
1N/A
1N/A use Encode;
1N/A use Encode::Alias;
1N/A define_alias( newName => ENCODING);
1N/A
1N/A=head1 DESCRIPTION
1N/A
1N/AAllows newName to be used as an alias for ENCODING. ENCODING may be
1N/Aeither the name of an encoding or an encoding object (as described
1N/Ain L<Encode>).
1N/A
1N/ACurrently I<newName> can be specified in the following ways:
1N/A
1N/A=over 4
1N/A
1N/A=item As a simple string.
1N/A
1N/A=item As a qr// compiled regular expression, e.g.:
1N/A
1N/A define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
1N/A
1N/AIn this case, if I<ENCODING> is not a reference, it is C<eval>-ed
1N/Ain order to allow C<$1> etc. to be substituted. The example is one
1N/Away to alias names as used in X11 fonts to the MIME names for the
1N/Aiso-8859-* family. Note the double quotes inside the single quotes.
1N/A
1N/A(or, you don't have to do this yourself because this example is predefined)
1N/A
1N/AIf you are using a regex here, you have to use the quotes as shown or
1N/Ait won't work. Also note that regex handling is tricky even for the
1N/Aexperienced. Use this feature with caution.
1N/A
1N/A=item As a code reference, e.g.:
1N/A
1N/A define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
1N/A
1N/AThe same effect as the example above in a different way. The coderef
1N/Atakes the alias name as an argument and returns a canonical name on
1N/Asuccess or undef if not. Note the second argument is not required.
1N/AUse this with even more caution than the regex version.
1N/A
1N/A=back
1N/A
1N/A=head3 Changes in code reference aliasing
1N/A
1N/AAs of Encode 1.87, the older form
1N/A
1N/A define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
1N/A
1N/Ano longer works.
1N/A
1N/AEncode up to 1.86 internally used "local $_" to implement ths older
1N/Aform. But consider the code below;
1N/A
1N/A use Encode;
1N/A $_ = "eeeee" ;
1N/A while (/(e)/g) {
1N/A my $utf = decode('aliased-encoding-name', $1);
1N/A print "position:",pos,"\n";
1N/A }
1N/A
1N/APrior to Encode 1.86 this fails because of "local $_".
1N/A
1N/A=head2 Alias overloading
1N/A
1N/AYou can override predefined aliases by simply applying define_alias().
1N/AThe new alias is always evaluated first, and when neccessary,
1N/Adefine_alias() flushes the internal cache to make the new definition
1N/Aavailable.
1N/A
1N/A # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
1N/A # superset of SHIFT_JIS
1N/A
1N/A define_alias( qr/shift.*jis$/i => '"cp932"' );
1N/A define_alias( qr/sjis$/i => '"cp932"' );
1N/A
1N/AIf you want to zap all predefined aliases, you can use
1N/A
1N/A Encode::Alias->undef_aliases;
1N/A
1N/Ato do so. And
1N/A
1N/A Encode::Alias->init_aliases;
1N/A
1N/Agets the factory settings back.
1N/A
1N/A=head1 SEE ALSO
1N/A
1N/AL<Encode>, L<Encode::Supported>
1N/A
1N/A=cut
1N/A