Cross Reference: Header.pm
xref
: /
osnet-11
/
usr
/
src
/
cmd
/
perl
/
5.8.4
/
distrib
/
ext
/
Encode
/
lib
/
Encode
/
MIME
/
Header.pm
Home
History
Annotate
Line#
Navigate
Download
Search
only in
./
1
N/A
package
Encode
::
MIME
::
Header
;
1
N/A
use
strict
;
1
N/A
# use warnings;
1
N/A
our
$
VERSION
=
do
{
my
@r = (q$
Revision
:
1.9
$ =~ /\d+/g);
sprintf
"%d."
.
"%02d"
x $
#r, @r };
1
N/A
use
Encode
qw
(
find_encoding
encode_utf8
decode_utf8
);
1
N/A
use
MIME
::
Base64
;
1
N/A
use
Carp
;
1
N/A
1
N/A
my
%
seed
=
1
N/A
(
1
N/A
decode_b
=>
'1'
,
# decodes 'B' encoding ?
1
N/A
decode_q
=>
'1'
,
# decodes 'Q' encoding ?
1
N/A
encode
=>
'B'
,
# encode with 'B' or 'Q' ?
1
N/A
bpl
=>
75
,
# bytes per line
1
N/A
);
1
N/A
1
N/A
$
Encode
::
Encoding
{
'MIME-Header'
} =
1
N/A
bless
{
1
N/A
%
seed
,
1
N/A
Name
=>
'MIME-Header'
,
1
N/A
} =>
__PACKAGE__
;
1
N/A
1
N/A
$
Encode
::
Encoding
{
'MIME-B'
} =
1
N/A
bless
{
1
N/A
%
seed
,
1
N/A
decode_q
=>
0
,
1
N/A
Name
=>
'MIME-B'
,
1
N/A
} =>
__PACKAGE__
;
1
N/A
1
N/A
$
Encode
::
Encoding
{
'MIME-Q'
} =
1
N/A
bless
{
1
N/A
%
seed
,
1
N/A
decode_q
=>
1
,
1
N/A
encode
=>
'Q'
,
1
N/A
Name
=>
'MIME-Q'
,
1
N/A
} =>
__PACKAGE__
;
1
N/A
1
N/A
use
base
qw
(
Encode
::
Encoding
);
1
N/A
1
N/A
sub
needs_lines
{
1
}
1
N/A
sub
perlio_ok
{
0
};
1
N/A
1
N/A
sub
decode
($$;$){
1
N/A
use
utf8
;
1
N/A
my
($
obj
, $
str
, $
chk
) = @_;
1
N/A
# zap spaces between encoded words
1
N/A
$
str
=~ s/\?=\s+=\?/\?==\?/
gos
;
1
N/A
# multi-line header to single line
1
N/A
$
str
=~ s/(:?\r|\n|\r\n)[ \t]//
gos
;
1
N/A
$
str
=~
1
N/A
s{
1
N/A
=\?
# begin encoded word
1
N/A
([
0
-
9
A-
Za
-z\-_]+)
# charset (encoding)
1
N/A
\?([
QqBb
])\?
# delimiter
1
N/A
(.*?)
# Base64-encodede contents
1
N/A
\?=
# end encoded word
1
N/A
}{
1
N/A
if
(
uc
($
2
)
eq
'B'
){
1
N/A
$
obj
->{
decode_b
}
or
croak
qq
(
MIME
"B"
unsupported
);
1
N/A
decode_b
($
1
, $
3
);
1
N/A
}
elsif
(
uc
($
2
)
eq
'Q'
){
1
N/A
$
obj
->{
decode_q
}
or
croak
qq
(
MIME
"Q"
unsupported
);
1
N/A
decode_q
($
1
, $
3
);
1
N/A
}
else
{
1
N/A
croak
qq
(
MIME
"$2"
encoding
is
nonexistent
!);
1
N/A
}
1
N/A
}
egox
;
1
N/A
$_[
1
] =
''
if
$
chk
;
1
N/A
return
$
str
;
1
N/A
}
1
N/A
1
N/A
sub
decode_b
{
1
N/A
my
$
enc
=
shift
;
1
N/A
my
$d =
find_encoding
($
enc
)
or
croak
qq
(
Unknown
encoding
"$enc"
);
1
N/A
my
$
db64
=
decode_base64
(
shift
);
1
N/A
return
$d->
name
eq
'utf8'
?
1
N/A
Encode
::
decode_utf8
($
db64
) : $d->
decode
($
db64
,
Encode
::
FB_PERLQQ
);
1
N/A
}
1
N/A
1
N/A
sub
decode_q
{
1
N/A
my
($
enc
, $q) = @_;
1
N/A
my
$d =
find_encoding
($
enc
)
or
croak
qq
(
Unknown
encoding
"$enc"
);
1
N/A
$q =~ s/_/ /
go
;
1
N/A
$q =~ s/=([
0
-
9
A-
Fa
-f]{
2
})/
pack
(
"C"
,
hex
($
1
))/
ego
;
1
N/A
return
$d->
name
eq
'utf8'
?
1
N/A
Encode
::
decode_utf8
($q) : $d->
decode
($q,
Encode
::
FB_PERLQQ
);
1
N/A
}
1
N/A
1
N/A
my
$
especials
=
1
N/A
join
(
'|'
=>
1
N/A
map
{
quotemeta
(
chr
($_))}
1
N/A
unpack
(
"C*"
,
qq
{()<>@,;:\
"\'/[]?.=}));
1
N/A
1
N/A
my
$
re_encoded_word
=
1
N/A
qr
{
1
N/A
(?:
1
N/A
=\?
# begin encoded word
1
N/A
(?:[
0
-
9
A-
Za
-z\-_]+)
# charset (encoding)
1
N/A
\?(?:[
QqBb
])\?
# delimiter
1
N/A
(?:.*?)
# Base64-encodede contents
1
N/A
\?=
# end encoded word
1
N/A
)
1
N/A
}
xo
;
1
N/A
1
N/A
my
$
re_especials
=
qr
{$
re_encoded_word
|$
especials
}
xo
;
1
N/A
1
N/A
sub
encode
($$;$){
1
N/A
my
($
obj
, $
str
, $
chk
) = @_;
1
N/A
my
@
line
= ();
1
N/A
for
my
$
line
(
split
/\r|\n|\r\n/o, $
str
){
1
N/A
my
(@
word
, @
subline
);
1
N/A
for
my
$
word
(
split
/($
re_especials
)/o, $
line
){
1
N/A
if
($
word
=~ /[^\
x00
-\
x7f
]/o
or
$
word
=~ /^$
re_encoded_word
$/o){
1
N/A
push
@
word
, $
obj
->
_encode
($
word
);
1
N/A
}
else
{
1
N/A
push
@
word
, $
word
;
1
N/A
}
1
N/A
}
1
N/A
my
$
subline
=
''
;
1
N/A
for
my
$
word
(@
word
){
1
N/A
use
bytes
();
1
N/A
if
(
bytes
::
length
($
subline
) +
bytes
::
length
($
word
) > $
obj
->{
bpl
}){
1
N/A
push
@
subline
, $
subline
;
1
N/A
$
subline
=
''
;
1
N/A
}
1
N/A
$
subline
.= $
word
;
1
N/A
}
1
N/A
$
subline
and
push
@
subline
, $
subline
;
1
N/A
push
@
line
,
join
(
"\n "
=> @
subline
);
1
N/A
}
1
N/A
$_[
1
] =
''
if
$
chk
;
1
N/A
return
join
(
"\n"
, @
line
);
1
N/A
}
1
N/A
1
N/A
use
constant
HEAD
=>
'=?UTF-8?'
;
1
N/A
use
constant
TAIL
=>
'?='
;
1
N/A
use
constant
SINGLE
=> { B => \&
_encode_b
, Q => \&
_encode_q
, };
1
N/A
1
N/A
sub
_encode
{
1
N/A
my
($o, $
str
) = @_;
1
N/A
my
$
enc
= $o->{
encode
};
1
N/A
my
$
llen
= ($o->{
bpl
} -
length
(
HEAD
) -
2
-
length
(
TAIL
));
1
N/A
# to coerce a floating-point arithmetics, the following contains
1
N/A
# .0 in numbers -- dankogai
1
N/A
$
llen
*= $
enc
eq
'B'
?
3.0
/
4.0
:
1.0
/
3.0
;
1
N/A
my
@
result
= ();
1
N/A
my
$
chunk
=
''
;
1
N/A
while
(
length
(
my
$
chr
=
substr
($
str
,
0
,
1
,
''
))){
1
N/A
use
bytes
();
1
N/A
if
(
bytes
::
length
($
chunk
) +
bytes
::
length
($
chr
) > $
llen
){
1
N/A
push
@
result
,
SINGLE
->{$
enc
}($
chunk
);
1
N/A
$
chunk
=
''
;
1
N/A
}
1
N/A
$
chunk
.= $
chr
;
1
N/A
}
1
N/A
$
chunk
and
push
@
result
,
SINGLE
->{$
enc
}($
chunk
);
1
N/A
return
@
result
;
1
N/A
}
1
N/A
1
N/A
sub
_encode_b
{
1
N/A
HEAD
.
'B?'
.
encode_base64
(
encode_utf8
(
shift
),
''
) .
TAIL
;
1
N/A
}
1
N/A
1
N/A
sub
_encode_q
{
1
N/A
my
$
chunk
=
shift
;
1
N/A
$
chunk
=~ s{
1
N/A
([^
0
-
9
A-
Za
-z])
1
N/A
}{
1
N/A
join
(
""
=>
map
{
sprintf
"=%02X"
, $_}
unpack
(
"C*"
, $
1
))
1
N/A
}
egox
;
1
N/A
return
decode_utf8
(
HEAD
.
'Q?'
. $
chunk
.
TAIL
);
1
N/A
}
1
N/A
1
N/A
1
;
1
N/A
__END__
1
N/A
1
N/A
=head1 NAME
1
N/A
1
N/A
Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
1
N/A
1
N/A
=head1 SYNOPSIS
1
N/A
1
N/A
use Encode
qw
/
encode
decode/;
1
N/A
$utf8 = decode('MIME-Header', $header);
1
N/A
$header = encode('MIME-Header', $utf8);
1
N/A
1
N/A
=head1 ABSTRACT
1
N/A
1
N/A
This module implements RFC 2047 Mime Header Encoding. There are 3
1
N/A
variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
1
N/A
difference is described below
1
N/A
1
N/A
decode() encode()
1
N/A
----------------------------------------------
1
N/A
MIME-Header Both B and Q =?UTF-8?B?....?=
1
N/A
MIME-B B only; Q croaks =?UTF-8?B?....?=
1
N/A
MIME-Q Q only; B croaks =?UTF-8?Q?....?=
1
N/A
1
N/A
=head1 DESCRIPTION
1
N/A
1
N/A
When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
1
N/A
is extracted and decoded for I<X> encoding (B for Base64, Q for
1
N/A
Quoted-Printable). Then the decoded chunk is fed to
1
N/A
decode(I<encoding>). So long as I<encoding> is supported by Encode,
1
N/A
any source encoding is fine.
1
N/A
1
N/A
When you encode, it just encodes UTF-8 string with I<X> encoding then
1
N/A
quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
1
N/A
encode are left as is and long lines are folded within 76 bytes per
1
N/A
line.
1
N/A
1
N/A
=head1 BUGS
1
N/A
1
N/A
It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
1
N/A
and =?ISO-8859-1?= but that makes the implementation too complicated.
1
N/A
These days major mail agents all support =?UTF-8? so I think it is
1
N/A
just good enough.
1
N/A
1
N/A
=head1 SEE ALSO
1
N/A
1
N/A
L<Encode>
1
N/A
1
N/A
RFC 2047, L<
http://www.faqs.org/rfcs/rfc2047.html
> and many other
1
N/A
locations.
1
N/A
1
N/A
=cut