Cross Reference: Alias.pm
xref
: /
osnet-11
/
usr
/
src
/
cmd
/
perl
/
5.8.4
/
distrib
/
ext
/
Encode
/
lib
/
Encode
/
Alias.pm
Home
History
Annotate
Line#
Navigate
Download
Search
only in
./
1
N/A
package
Encode
::
Alias
;
1
N/A
use
strict
;
1
N/A
no
warnings
'redefine'
;
1
N/A
use
Encode
;
1
N/A
our
$
VERSION
=
do
{
my
@r = (q$
Revision
:
1.38
$ =~ /\d+/g);
sprintf
"%d."
.
"%02d"
x $
#r, @r };
1
N/A
sub
DEBUG
() {
0
}
1
N/A
1
N/A
use
base
qw
(
Exporter
);
1
N/A
1
N/A
# Public, encouraged API is exported by default
1
N/A
1
N/A
our
@
EXPORT
=
1
N/A
qw
(
1
N/A
define_alias
1
N/A
find_alias
1
N/A
);
1
N/A
1
N/A
our
@
Alias
;
# ordered matching list
1
N/A
our
%
Alias
;
# cached known aliases
1
N/A
1
N/A
sub
find_alias
1
N/A
{
1
N/A
my
$
class
=
shift
;
1
N/A
my
$
find
=
shift
;
1
N/A
unless
(
exists
$
Alias
{$
find
})
1
N/A
{
1
N/A
$
Alias
{$
find
} =
undef
;
# Recursion guard
1
N/A
for
(
my
$i=
0
; $i < @
Alias
; $i +=
2
)
1
N/A
{
1
N/A
my
$
alias
= $
Alias
[$i];
1
N/A
my
$
val
= $
Alias
[$i+
1
];
1
N/A
my
$
new
;
1
N/A
if
(
ref
($
alias
)
eq
'Regexp'
&& $
find
=~ $
alias
)
1
N/A
{
1
N/A
DEBUG
and
warn
"eval $val"
;
1
N/A
$
new
=
eval
$
val
;
1
N/A
DEBUG
and
$@
and
warn
"$val, $@"
;
1
N/A
}
1
N/A
elsif
(
ref
($
alias
)
eq
'CODE'
)
1
N/A
{
1
N/A
DEBUG
and
warn
"$alias"
,
"->"
,
"($find)"
;
1
N/A
$
new
= $
alias
->($
find
);
1
N/A
}
1
N/A
elsif
(
lc
($
find
)
eq
lc
($
alias
))
1
N/A
{
1
N/A
$
new
= $
val
;
1
N/A
}
1
N/A
if
(
defined
($
new
))
1
N/A
{
1
N/A
next
if
$
new
eq
$
find
;
# avoid (direct) recursion on bugs
1
N/A
DEBUG
and
warn
"$alias, $new"
;
1
N/A
my
$
enc
= (
ref
($
new
)) ? $
new
:
Encode
::
find_encoding
($
new
);
1
N/A
if
($
enc
)
1
N/A
{
1
N/A
$
Alias
{$
find
} = $
enc
;
1
N/A
last
;
1
N/A
}
1
N/A
}
1
N/A
}
1
N/A
}
1
N/A
if
(
DEBUG
){
1
N/A
my
$
name
;
1
N/A
if
(
my
$e = $
Alias
{$
find
}){
1
N/A
$
name
= $e->
name
;
1
N/A
}
else
{
1
N/A
$
name
=
""
;
1
N/A
}
1
N/A
warn
"find_alias($class, $find)->name = $name"
;
1
N/A
}
1
N/A
return
$
Alias
{$
find
};
1
N/A
}
1
N/A
1
N/A
sub
define_alias
1
N/A
{
1
N/A
while
(@_)
1
N/A
{
1
N/A
my
($
alias
,$
name
) =
splice
(@_,
0
,
2
);
1
N/A
unshift
(@
Alias
, $
alias
=> $
name
);
# newer one has precedence
1
N/A
# clear %Alias cache to allow overrides
1
N/A
if
(
ref
($
alias
)){
1
N/A
my
@a =
keys
%
Alias
;
1
N/A
for
my
$k (@a){
1
N/A
if
(
ref
($
alias
)
eq
'Regexp'
&& $k =~ $
alias
)
1
N/A
{
1
N/A
DEBUG
and
warn
"delete \$Alias\{$k\}"
;
1
N/A
delete
$
Alias
{$k};
1
N/A
}
1
N/A
elsif
(
ref
($
alias
)
eq
'CODE'
)
1
N/A
{
1
N/A
DEBUG
and
warn
"delete \$Alias\{$k\}"
;
1
N/A
delete
$
Alias
{$
alias
->($
name
)};
1
N/A
}
1
N/A
}
1
N/A
}
else
{
1
N/A
DEBUG
and
warn
"delete \$Alias\{$alias\}"
;
1
N/A
delete
$
Alias
{$
alias
};
1
N/A
}
1
N/A
}
1
N/A
}
1
N/A
1
N/A
# Allow latin-1 style names as well
1
N/A
# 0 1 2 3 4 5 6 7 8 9 10
1
N/A
our
@
Latin2iso
= (
0
,
1
,
2
,
3
,
4
,
9
,
10
,
13
,
14
,
15
,
16
);
1
N/A
# Allow winlatin1 style names as well
1
N/A
our
%
Winlatin2cp
= (
1
N/A
'latin1'
=>
1252
,
1
N/A
'latin2'
=>
1250
,
1
N/A
'cyrillic'
=>
1251
,
1
N/A
'greek'
=>
1253
,
1
N/A
'turkish'
=>
1254
,
1
N/A
'hebrew'
=>
1255
,
1
N/A
'arabic'
=>
1256
,
1
N/A
'baltic'
=>
1257
,
1
N/A
'vietnamese'
=>
1258
,
1
N/A
);
1
N/A
1
N/A
init_aliases
();
1
N/A
1
N/A
sub
undef_aliases
{
1
N/A
@
Alias
= ();
1
N/A
%
Alias
= ();
1
N/A
}
1
N/A
1
N/A
sub
init_aliases
1
N/A
{
1
N/A
undef_aliases
();
1
N/A
1
N/A
# Try all-lower-case version should all else fails
1
N/A
define_alias
(
qr
/^(.*)$/ =>
'"\L$1"'
);
1
N/A
1
N/A
#
UTF
/
UCS
stuff
1
N/A
define_alias
(
qr
/^
UTF
-?
7
$/i =>
'"UTF-7"'
);
1
N/A
define_alias
(
qr
/^
UCS
-?
2
-?
LE
$/i =>
'"UCS-2LE"'
);
1
N/A
define_alias
(
qr
/^
UCS
-?
2
-?(
BE
)?$/i =>
'"UCS-2BE"'
,
1
N/A
qr
/^
UCS
-?
4
-?(
BE
|
LE
)?$/i =>
'uc("UTF-32$1")'
,
1
N/A
qr
/^
iso
-
10646
-
1
$/i =>
'"UCS-2BE"'
);
1
N/A
define_alias
(
qr
/^
UTF
(
16
|
32
)-?
BE
$/i =>
'"UTF-$1BE"'
,
1
N/A
qr
/^
UTF
(
16
|
32
)-?
LE
$/i =>
'"UTF-$1LE"'
,
1
N/A
qr
/^
UTF
(
16
|
32
)$/i =>
'"UTF-$1"'
,
1
N/A
);
1
N/A
# ASCII
1
N/A
define_alias
(
qr
/^(?:
US
-?)
ascii
$/i =>
'"ascii"'
);
1
N/A
define_alias
(
'C'
=>
'ascii'
);
1
N/A
define_alias
(
qr
/\
bISO
[-_]?
646
[-_]?
US
$/i =>
'"ascii"'
);
1
N/A
# Allow variants of iso-8859-1 etc.
1
N/A
define_alias
(
qr
/\
biso
[-_]?(\d+)[-_](\d+)$/i =>
'"iso-$1-$2"'
);
1
N/A
1
N/A
# At least HP-UX has these.
1
N/A
define_alias
(
qr
/\
biso8859
(\d+)$/i =>
'"iso-8859-$1"'
);
1
N/A
1
N/A
# More HP stuff.
1
N/A
define_alias
(
qr
/\b(?:
hp
-)?(
arabic
|
greek
|
hebrew
|
kana
|
roman
|
thai
|
turkish
)
8
$/i =>
'"${1}8"'
);
1
N/A
1
N/A
# The Official name of ASCII.
1
N/A
define_alias
(
qr
/\
bANSI
[-_]?
X3
\.
4
[-_]?
1968
$/i =>
'"ascii"'
);
1
N/A
1
N/A
# This is a font issue, not an encoding issue.
1
N/A
# (The currency symbol of the Latin 1 upper half
1
N/A
# has been redefined as the euro symbol.)
1
N/A
define_alias
(
qr
/^(.+)\@
euro
$/i =>
'"$1"'
);
1
N/A
1
N/A
define_alias
(
qr
/\b(?:
iso
[-_]?)?
latin
[-_]?(\d+)$/i
1
N/A
=>
'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
);
1
N/A
1
N/A
define_alias
(
qr
/\
bwin
(
latin
[
12
]|
cyrillic
|
baltic
|
greek
|
turkish
|
1
N/A
hebrew
|
arabic
|
baltic
|
vietnamese
)$/
ix
=>
1
N/A
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
);
1
N/A
1
N/A
# Common names for non-latin prefered MIME names
1
N/A
define_alias
(
'ascii'
=>
'US-ascii'
,
1
N/A
'cyrillic'
=>
'iso-8859-5'
,
1
N/A
'arabic'
=>
'iso-8859-6'
,
1
N/A
'greek'
=>
'iso-8859-7'
,
1
N/A
'hebrew'
=>
'iso-8859-8'
,
1
N/A
'thai'
=>
'iso-8859-11'
,
1
N/A
'tis620'
=>
'iso-8859-11'
,
1
N/A
);
1
N/A
1
N/A
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
1
N/A
# And Microsoft has their own naming (again, surprisingly).
1
N/A
# And windows-* is registered in IANA!
1
N/A
define_alias
(
qr
/\b(?:
cp
|
ibm
|
ms
|
windows
)[-_ ]?(\d{
2
,
4
})$/i =>
'"cp$1"'
);
1
N/A
1
N/A
# Sometimes seen with a leading zero.
1
N/A
# define_alias( qr/\bcp037\b/i => '"cp37"');
1
N/A
1
N/A
# Mac Mappings
1
N/A
# predefined in *.ucm; unneeded
1
N/A
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
1
N/A
define_alias
(
qr
/^
mac_
(.*)$/i =>
'"mac$1"'
);
1
N/A
# Ououououou. gone. They are differente!
1
N/A
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
1
N/A
1
N/A
# Standardize on the dashed versions.
1
N/A
# define_alias( qr/\butf8$/i => '"utf-8"' );
1
N/A
define_alias
(
qr
/\
bkoi8
[\s-_]*([
ru
])$/i =>
'"koi8-$1"'
);
1
N/A
1
N/A
unless
($
Encode
::
ON_EBCDIC
){
1
N/A
# for Encode::CN
1
N/A
define_alias
(
qr
/\
beuc
.*
cn
$/i =>
'"euc-cn"'
);
1
N/A
define_alias
(
qr
/\
bcn
.*
euc
$/i =>
'"euc-cn"'
);
1
N/A
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
1
N/A
# CP936 doesn't have vendor-addon for GBK, so they're identical.
1
N/A
define_alias
(
qr
/^
gbk
$/i =>
'"cp936"'
);
1
N/A
# This fixes gb2312 vs. euc-cn confusion, practically
1
N/A
define_alias
(
qr
/\
bGB
[-_ ]?
2312
(?!-?
raw
)/i =>
'"euc-cn"'
);
1
N/A
# for Encode::JP
1
N/A
define_alias
(
qr
/\
bjis
$/i =>
'"7bit-jis"'
);
1
N/A
define_alias
(
qr
/\
beuc
.*
jp
$/i =>
'"euc-jp"'
);
1
N/A
define_alias
(
qr
/\
bjp
.*
euc
$/i =>
'"euc-jp"'
);
1
N/A
define_alias
(
qr
/\
bujis
$/i =>
'"euc-jp"'
);
1
N/A
define_alias
(
qr
/\
bshift
.*
jis
$/i =>
'"shiftjis"'
);
1
N/A
define_alias
(
qr
/\
bsjis
$/i =>
'"shiftjis"'
);
1
N/A
# for Encode::KR
1
N/A
define_alias
(
qr
/\
beuc
.*
kr
$/i =>
'"euc-kr"'
);
1
N/A
define_alias
(
qr
/\
bkr
.*
euc
$/i =>
'"euc-kr"'
);
1
N/A
# This fixes ksc5601 vs. euc-kr confusion, practically
1
N/A
define_alias
(
qr
/(?:x-)?
uhc
$/i =>
'"cp949"'
);
1
N/A
define_alias
(
qr
/(?:x-)?
windows
-
949
$/i =>
'"cp949"'
);
1
N/A
define_alias
(
qr
/\
bks_c_5601
-
1987
$/i =>
'"cp949"'
);
1
N/A
# for Encode::TW
1
N/A
define_alias
(
qr
/\
bbig
-?
5
$/i =>
'"big5-eten"'
);
1
N/A
define_alias
(
qr
/\
bbig5
-?
et
(?:
en
)?$/i =>
'"big5-eten"'
);
1
N/A
define_alias
(
qr
/\
btca
[-_]?
big5
$/i =>
'"big5-eten"'
);
1
N/A
define_alias
(
qr
/\
bbig5
-?
hk
(?:
scs
)?$/i =>
'"big5-hkscs"'
);
1
N/A
define_alias
(
qr
/\
bhk
(?:
scs
)?[-_]?
big5
$/i =>
'"big5-hkscs"'
);
1
N/A
}
1
N/A
# utf8 is blessed :)
1
N/A
define_alias
(
qr
/^
UTF
-
8
$/i =>
'"utf8"'
,);
1
N/A
# At last, Map white space and _ to '-'
1
N/A
define_alias
(
qr
/^(\S+)[\
s_
]+(.*)$/i =>
'"$1-$2"'
);
1
N/A
}
1
N/A
1
N/A
1
;
1
N/A
__END__
1
N/A
1
N/A
# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
1
N/A
# TODO: HP-UX '15' encodings japanese15 korean15 roi15
1
N/A
# TODO: Cyrillic encoding ISO-IR-111 (useful?)
1
N/A
# TODO: Armenian encoding ARMSCII-8
1
N/A
# TODO: Hebrew encoding ISO-8859-8-1
1
N/A
# TODO: Thai encoding TCVN
1
N/A
# TODO: Vietnamese encodings VPS
1
N/A
# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
1
N/A
# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
1
N/A
# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
1
N/A
# Kannada Khmer Korean Laotian Malayalam Mongolian
1
N/A
# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
1
N/A
1
N/A
=head1 NAME
1
N/A
1
N/A
Encode::Alias - alias definitions to encodings
1
N/A
1
N/A
=head1 SYNOPSIS
1
N/A
1
N/A
use Encode;
1
N/A
use Encode::Alias;
1
N/A
define_alias( newName => ENCODING);
1
N/A
1
N/A
=head1 DESCRIPTION
1
N/A
1
N/A
Allows newName to be used as an alias for ENCODING. ENCODING may be
1
N/A
either the name of an encoding or an encoding object (as described
1
N/A
in L<Encode>).
1
N/A
1
N/A
Currently I<newName> can be specified in the following ways:
1
N/A
1
N/A
=over 4
1
N/A
1
N/A
=item As a simple string.
1
N/A
1
N/A
=item As a qr// compiled regular expression, e.g.:
1
N/A
1
N/A
define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
1
N/A
1
N/A
In this case, if I<ENCODING> is not a reference, it is C<eval>-ed
1
N/A
in order to allow C<$1> etc. to be substituted. The example is one
1
N/A
way to alias names as used in X11 fonts to the MIME names for the
1
N/A
iso-8859-* family. Note the double quotes inside the single quotes.
1
N/A
1
N/A
(or, you don't have to do this yourself because this example is predefined)
1
N/A
1
N/A
If you are using a regex here, you have to use the quotes as shown or
1
N/A
it won't work. Also note that regex handling is tricky even for the
1
N/A
experienced. Use this feature with caution.
1
N/A
1
N/A
=item As a code reference, e.g.:
1
N/A
1
N/A
define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
1
N/A
1
N/A
The same effect as the example above in a different way. The coderef
1
N/A
takes the alias name as an argument and returns a canonical name on
1
N/A
success or undef if not. Note the second argument is not required.
1
N/A
Use this with even more caution than the regex version.
1
N/A
1
N/A
=back
1
N/A
1
N/A
=head3 Changes in code reference aliasing
1
N/A
1
N/A
As of Encode 1.87, the older form
1
N/A
1
N/A
define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
1
N/A
1
N/A
no longer works.
1
N/A
1
N/A
Encode up to 1.86 internally used "local $_" to implement ths older
1
N/A
form. But consider the code below;
1
N/A
1
N/A
use Encode;
1
N/A
$_ = "eeeee" ;
1
N/A
while (/(e)/g) {
1
N/A
my $utf = decode('aliased-encoding-name', $1);
1
N/A
print "position:",pos,"\n";
1
N/A
}
1
N/A
1
N/A
Prior to Encode 1.86 this fails because of "local $_".
1
N/A
1
N/A
=head2 Alias overloading
1
N/A
1
N/A
You can override predefined aliases by simply applying define_alias().
1
N/A
The new alias is always evaluated first, and when neccessary,
1
N/A
define_alias() flushes the internal cache to make the new definition
1
N/A
available.
1
N/A
1
N/A
# redirect SHIFT_JIS to
MS
/
IBM
Code Page 932, which is a
1
N/A
# superset of SHIFT_JIS
1
N/A
1
N/A
define_alias(
qr
/
shift
.*jis$/i => '"cp932"' );
1
N/A
define_alias(
qr
/
sjis
$/i => '"cp932"' );
1
N/A
1
N/A
If you want to zap all predefined aliases, you can use
1
N/A
1
N/A
Encode::Alias->undef_aliases;
1
N/A
1
N/A
to do so. And
1
N/A
1
N/A
Encode::Alias->init_aliases;
1
N/A
1
N/A
gets the factory settings back.
1
N/A
1
N/A
=head1 SEE ALSO
1
N/A
1
N/A
L<Encode>, L<Encode::Supported>
1
N/A
1
N/A
=cut
1
N/A