1N/Apackage I18N::Collate;
1N/A
1N/Ause strict;
1N/Aour $VERSION = '1.00';
1N/A
1N/A=head1 NAME
1N/A
1N/AI18N::Collate - compare 8-bit scalar data according to the current locale
1N/A
1N/A=head1 SYNOPSIS
1N/A
1N/A use I18N::Collate;
1N/A setlocale(LC_COLLATE, 'locale-of-your-choice');
1N/A $s1 = new I18N::Collate "scalar_data_1";
1N/A $s2 = new I18N::Collate "scalar_data_2";
1N/A
1N/A=head1 DESCRIPTION
1N/A
1N/A ***
1N/A
1N/A WARNING: starting from the Perl version 5.003_06
1N/A the I18N::Collate interface for comparing 8-bit scalar data
1N/A according to the current locale
1N/A
1N/A HAS BEEN DEPRECATED
1N/A
1N/A That is, please do not use it anymore for any new applications
1N/A and please migrate the old applications away from it because its
1N/A functionality was integrated into the Perl core language in the
1N/A release 5.003_06.
1N/A
1N/A See the perllocale manual page for further information.
1N/A
1N/A ***
1N/A
1N/AThis module provides you with objects that will collate
1N/Aaccording to your national character set, provided that the
1N/APOSIX setlocale() function is supported on your system.
1N/A
1N/AYou can compare $s1 and $s2 above with
1N/A
1N/A $s1 le $s2
1N/A
1N/Ato extract the data itself, you'll need a dereference: $$s1
1N/A
1N/AThis module uses POSIX::setlocale(). The basic collation conversion is
1N/Adone by strxfrm() which terminates at NUL characters being a decent C
1N/Aroutine. collate_xfrm() handles embedded NUL characters gracefully.
1N/A
1N/AThe available locales depend on your operating system; try whether
1N/AC<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
1N/Adirect approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
1N/AC<ls /usr/lib/locale>. Not all the locales that your vendor supports
1N/Aare necessarily installed: please consult your operating system's
1N/Adocumentation and possibly your local system administration. The
1N/Alocale names are probably something like C<xx_XX.(ISO)?8859-N> or
1N/AC<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
1N/Avariant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
1N/AEuropean character set.
1N/A
1N/A=cut
1N/A
1N/A# I18N::Collate.pm
1N/A#
1N/A# Author: Jarkko Hietaniemi <F<jhi@iki.fi>>
1N/A# Helsinki University of Technology, Finland
1N/A#
1N/A# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood
1N/A# overloading magic much deeper than I and told
1N/A# how to cut the size of this code by more than half.
1N/A# (my first version did overload all of lt gt eq le ge cmp)
1N/A#
1N/A# Purpose: compare 8-bit scalar data according to the current locale
1N/A#
1N/A# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
1N/A#
1N/A# Exports: setlocale 1)
1N/A# collate_xfrm 2)
1N/A#
1N/A# Overloads: cmp # 3)
1N/A#
1N/A# Usage: use I18N::Collate;
1N/A# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
1N/A# $s1 = new I18N::Collate "scalar_data_1";
1N/A# $s2 = new I18N::Collate "scalar_data_2";
1N/A#
1N/A# now you can compare $s1 and $s2: $s1 le $s2
1N/A# to extract the data itself, you need to deref: $$s1
1N/A#
1N/A# Notes:
1N/A# 1) this uses POSIX::setlocale
1N/A# 2) the basic collation conversion is done by strxfrm() which
1N/A# terminates at NUL characters being a decent C routine.
1N/A# collate_xfrm handles embedded NUL characters gracefully.
1N/A# 3) due to cmp and overload magic, lt le eq ge gt work also
1N/A# 4) the available locales depend on your operating system;
1N/A# try whether "locale -a" shows them or man pages for
1N/A# "locale" or "nlsinfo" work or the more direct
1N/A# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
1N/A# Not all the locales that your vendor supports
1N/A# are necessarily installed: please consult your
1N/A# operating system's documentation.
1N/A# The locale names are probably something like
1N/A# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
1N/A# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
1N/A# variant of French (fr), ISO Latin (8859) 1 (-1)
1N/A# which is the Western European character set.
1N/A#
1N/A# Updated: 19961005
1N/A#
1N/A# ---
1N/A
1N/Ause POSIX qw(strxfrm LC_COLLATE);
1N/Ause warnings::register;
1N/A
1N/Arequire Exporter;
1N/A
1N/Aour @ISA = qw(Exporter);
1N/Aour @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
1N/Aour @EXPORT_OK = qw();
1N/A
1N/Ause overload qw(
1N/Afallback 1
1N/Acmp collate_cmp
1N/A);
1N/A
1N/Aour($LOCALE, $C);
1N/A
1N/Aour $please_use_I18N_Collate_even_if_deprecated = 0;
1N/Asub new {
1N/A my $new = $_[1];
1N/A
1N/A if (warnings::enabled() && $] >= 5.003_06) {
1N/A unless ($please_use_I18N_Collate_even_if_deprecated) {
1N/A warnings::warn <<___EOD___;
1N/A***
1N/A
1N/A WARNING: starting from the Perl version 5.003_06
1N/A the I18N::Collate interface for comparing 8-bit scalar data
1N/A according to the current locale
1N/A
1N/A HAS BEEN DEPRECATED
1N/A
1N/A That is, please do not use it anymore for any new applications
1N/A and please migrate the old applications away from it because its
1N/A functionality was integrated into the Perl core language in the
1N/A release 5.003_06.
1N/A
1N/A See the perllocale manual page for further information.
1N/A
1N/A***
1N/A___EOD___
1N/A $please_use_I18N_Collate_even_if_deprecated++;
1N/A }
1N/A }
1N/A
1N/A bless \$new;
1N/A}
1N/A
1N/Asub setlocale {
1N/A my ($category, $locale) = @_[0,1];
1N/A
1N/A POSIX::setlocale($category, $locale) if (defined $category);
1N/A # the current $LOCALE
1N/A $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
1N/A}
1N/A
1N/Asub C {
1N/A my $s = ${$_[0]};
1N/A
1N/A $C->{$LOCALE}->{$s} = collate_xfrm($s)
1N/A unless (defined $C->{$LOCALE}->{$s}); # cache when met
1N/A
1N/A $C->{$LOCALE}->{$s};
1N/A}
1N/A
1N/Asub collate_xfrm {
1N/A my $s = $_[0];
1N/A my $x = '';
1N/A
1N/A for (split(/(\000+)/, $s)) {
1N/A $x .= (/^\000/) ? $_ : strxfrm("$_\000");
1N/A }
1N/A
1N/A $x;
1N/A}
1N/A
1N/Asub collate_cmp {
1N/A &C($_[0]) cmp &C($_[1]);
1N/A}
1N/A
1N/A# init $LOCALE
1N/A
1N/A&I18N::Collate::setlocale();
1N/A
1N/A1; # keep require happy
1N/A