Encode/Unicode/UTF7.pm

1N/A#
1N/A# $Id: UTF7.pm,v 0.2 2003/05/19 04:56:03 dankogai Exp $
1N/A#
1N/Apackage Encode::Unicode::UTF7;
1N/Ause strict;
1N/Ano warnings 'redefine';
1N/Ause base qw(Encode::Encoding);
1N/A__PACKAGE__->Define('UTF-7');
1N/Aour $VERSION = do { my @r = (q$Revision: 0.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
1N/Ause MIME::Base64;
1N/Ause Encode;
1N/A
1N/A#
1N/A# Algorithms taken from Unicode::String by Gisle Aas
1N/A#
1N/A
1N/Aour $OPTIONAL_DIRECT_CHARS = 1;
1N/Amy $specials =   quotemeta "\'(),-./:?";
1N/A$OPTIONAL_DIRECT_CHARS and
1N/A    $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
1N/A# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
1N/A# We use qr/[\n\r\t\ ] instead
1N/Amy $re_asis =     qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
1N/Amy $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
1N/Amy $e_utf16 = find_encoding("UTF-16BE");
1N/A
1N/Asub needs_lines { 1 };
1N/A
1N/Asub encode($$;$){
1N/A    my ($obj, $str, $chk) = @_;
1N/A    my $len = length($str);
1N/A    pos($str) = 0;
1N/A    my $bytes = '';
1N/A    while (pos($str) < $len){
1N/A    if    ($str =~ /\G($re_asis+)/ogc){
1N/A        $bytes .= $1;
1N/A    }elsif($str =~ /\G($re_encoded+)/ogsc){
1N/A        if ($1 eq "+"){
1N/A        $bytes .= "+-";
1N/A        }else{
1N/A        my $base64 = encode_base64($e_utf16->encode($1), '');
1N/A        $base64 =~ s/=+$//;
1N/A        $bytes .= "+$base64-";
1N/A        }
1N/A    }else{
1N/A        die "This should not happen! (pos=" . pos($str) . ")";
1N/A    }
1N/A    }
1N/A    $_[1] = '' if $chk;
1N/A    return $bytes;
1N/A}
1N/A
1N/Asub decode{
1N/A    my ($obj, $bytes, $chk) = @_;
1N/A    my $len = length($bytes);
1N/A    my $str = "";
1N/A    while (pos($bytes) < $len) {
1N/A    if    ($bytes =~ /\G([^+]+)/ogc) {
1N/A        $str .= $1;
1N/A    }elsif($bytes =~ /\G\+-/ogc) {
1N/A        $str .= "+";
1N/A    }elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) {
1N/A        my $base64 = $1;
1N/A        my $pad = length($base64) % 4;
1N/A        $base64 .= "=" x (4 - $pad) if $pad;
1N/A        $str .= $e_utf16->decode(decode_base64($base64));
1N/A    }elsif($bytes =~ /\G\+/ogc) {
1N/A        $^W and warn "Bad UTF7 data escape";
1N/A        $str .= "+";
1N/A    }else{
1N/A        die "This should not happen " . pos($bytes);
1N/A    }
1N/A    }
1N/A    $_[1] = '' if $chk;
1N/A    return $str;
1N/A}
1N/A1;
1N/A__END__
1N/A
1N/A=head1 NAME
1N/A
1N/AEncode::Unicode::UTF7 -- UTF-7 encoding
1N/A
1N/A=head1 SYNOPSIS
1N/A
1N/A    use Encode qw/encode decode/;
1N/A    $utf7 = encode("UTF-7", $utf8);
1N/A    $utf8 = decode("UTF-7", $ucs2);
1N/A
1N/A=head1 ABSTRACT
1N/A
1N/AThis module implements UTF-7 encoding documented in RFC 2152.  UTF-7,
1N/Aas its name suggests, is a 7-bit re-encoded version of UTF-16BE.  It
1N/Ais designed to be MTA-safe and expected to be a standard way to
1N/Aexchange Unicoded mails via mails.  But with the advent of UTF-8 and
1N/A8-bit compliant MTAs, UTF-7 is hardly ever used.
1N/A
1N/AUTF-7 was not supported by Encode until version 1.95 because of that.
1N/ABut Unicode::String, a module by Gisle Aas which adds Unicode supports
1N/Ato non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
1N/Aso Encode can supersede Unicode::String 100%.
1N/A
1N/A=head1 In Practice
1N/A
1N/AWhen you want to encode Unicode for mails and web pages, however, do
1N/Anot use UTF-7 unless you are sure your recipients and readers can
1N/Ahandle it.  Very few MUAs and WWW Browsers support these days (only
1N/AMozilla seems to support one).  For general cases, use UTF-8 for
1N/Amessage body and MIME-Header for header instead.
1N/A
1N/A=head1 SEE ALSO
1N/A
1N/AL<Encode>, L<Encode::Unicode>, L<Unicode::String>
1N/A
1N/ARFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
1N/A
1N/A=cut