5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# $Id: UCD.pm,v 1.1 2003/06/04 00:27:53 marka Exp $
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# Copyright (c) 2000,2001 Japan Network Information Center.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# All rights reserved.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# By using this file, you agree to the terms and conditions set forth bellow.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# LICENSE TERMS AND CONDITIONS
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# The following License Terms and Conditions apply, unless a different
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# license is obtained from Japan Network Information Center ("JPNIC"),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# Chiyoda-ku, Tokyo 101-0047, Japan.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# 1. Use, Modification and Redistribution (including distribution of any
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# modified or derived work) in source and/or binary forms is permitted
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# under this License Terms and Conditions.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# 2. Redistribution of source code must retain the copyright notices as they
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# appear in each source code file, this License Terms and Conditions.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# 3. Redistribution in binary form must reproduce the Copyright Notice,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# this License Terms and Conditions, in the documentation and/or other
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# materials provided with the distribution. For the purposes of binary
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# distribution the "Copyright Notice" refers to the following language:
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# 4. The name of JPNIC may not be used to endorse or promote products
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# derived from this Software without specific prior written approval of
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# JPNIC.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewspackage UCD;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# UCD.pm -- parser for Unicode Character Database files.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# This file is an aggregation of the following modules, each of which
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# provides a parser for a specific data file of UCD.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# UCD::UnicodeData -- for UnicodeData.txt
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# UCD::CaseFolding -- for CaseFolding.txt
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# UCD::SpecialCasing -- for SpecialCasing.txt
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# UCD::CompositionExclusions -- for CompositionExclusions-1.txt
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# Each module provides two subroutines:
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# $line = getline(\*HANDLE);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# reads next non-comment line from HANDLE, and returns it.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# undef will be returned upon EOF.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# %fields = parse($line);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# parses a line and extract fields, and returns a list of
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews# field name and its value, suitable for assignment to a hash.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewspackage UCD::UnicodeData;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsuse strict;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsuse Carp;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub getline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $fh = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $s = <$fh>;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $s =~ s/\r?\n$// if $s;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $s;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub parseline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $s = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my @f = split /;/, $s, -1;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (CODE => hex($f[0]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews NAME => $f[1],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews CATEGORY => $f[2],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews CLASS => $f[3]+0,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews BIDI => $f[4],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews DECOMP => dcmap($f[5]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews DECIMAL => dvalue($f[6]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews DIGIT => dvalue($f[7]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews NUMERIC => dvalue($f[8]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews MIRRORED => $f[9] eq 'Y',
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews NAME10 => $f[10],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews COMMENT => $f[11],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews UPPER => ucode($f[12]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews LOWER => ucode($f[13]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews TITLE => ucode($f[14]));
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub dcmap {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $v = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return undef if $v eq '';
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $v =~ /^(?:(<[^>]+>)\s*)?(\S.*)/
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews or croak "invalid decomposition mapping \"$v\"";
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $tag = $1 || '';
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews [$tag, map {hex($_)} split(' ', $2)];
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub ucode {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $v = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return undef if $v eq '';
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews hex($v);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub dvalue {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $v = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return undef if $v eq '';
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $v;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewspackage UCD::CaseFolding;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsuse strict;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub getline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $fh = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews while (defined(my $s = <$fh>)) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews next if $s =~ /^\#/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews next if $s =~ /^\s*$/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $s =~ s/\r?\n$//;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return $s;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews undef;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub parseline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $s = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my @f = split /;\s*/, $s, -1;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (CODE => hex($f[0]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews TYPE => $f[1],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews MAP => [map(hex, split ' ', $f[2])],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews );
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewspackage UCD::SpecialCasing;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsuse strict;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub getline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $fh = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews while (defined(my $s = <$fh>)) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews next if $s =~ /^\#/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews next if $s =~ /^\s*$/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $s =~ s/\r?\n$//;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return $s;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews undef;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub parseline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $s = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my @f = split /;\s*/, $s, -1;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $cond = (@f > 5) ? $f[4] : undef;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (CODE => hex($f[0]),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews LOWER => [map(hex, split ' ', $f[1])],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews TITLE => [map(hex, split ' ', $f[2])],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews UPPER => [map(hex, split ' ', $f[3])],
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews CONDITION => $cond);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewspackage UCD::CompositionExclusions;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsuse strict;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub getline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $fh = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews while (defined(my $s = <$fh>)) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews next if $s =~ /^\#/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews next if $s =~ /^\s*$/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews $s =~ s/\r?\n$//;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return $s;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews undef;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewssub parseline {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews my $s = shift;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews m/^[0-9A-Fa-f]+/;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (CODE => hex($&));
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews1;