6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#! /usr/perl5/bin/perl
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# This file and its contents are supplied under the terms of the
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# Common Development and Distribution License ("CDDL"), version 1.0.
5aec55eb0591d2fcdd38d7dd5408a6ff3456e596Garrett D'Amore# You may only use this file in accordance with the terms of version
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# 1.0 of the CDDL.
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# A full copy of the text of the CDDL should have accompanied this
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# source. A copy is of the CDDL is also available via the Internet
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# at http://www.illumos.org/license/CDDL.
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# Copyright 2010 Nexenta Systems, Inc. All rights reserved.
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# This converts MAPPING files to localedef character maps
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# suitable for use with the UTF-8 derived localedef data.
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoresub ucs_to_utf8
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore{
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $ucs = shift;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore if ($ucs <= 0x7f) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", $ucs).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } elsif ($ucs <= 0x7ff) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", $ucs | 0xc0).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } elsif ($ucs <= 0xffff) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", $ucs | 0xe0).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } elsif ($ucs <= 0x1fffff) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", $ucs | 0xf0).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } elsif ($ucs <= 0x03ffffff) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } else {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ucs >>= 6;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore return ($utf8);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore}
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoremy %unames;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoremy %uvalues;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# This is not a general purpose Character Map parser, but its good enough
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore# for the stock one supplied with CLDR.
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore#
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoresub load_utf8_cm
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore{
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $file = shift;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore open(UTF8, "$file") || die "open";
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore while (<UTF8>) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore next if (/^#/);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore next if (/^\s*$/);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore next if (/^\s*CHARMAP\s*$/);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore next if (/^\s*END\s*CHARMAP\s*$/);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore chomp;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore @words = split /\s+/;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $name = $words[0];
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8val = $words[1];
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore if (defined($unames{$utf8val})) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $unames{$utf8val} .= "\n" .$name;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } else {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $unames{$utf8val} = $name;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $uvalues{$name} = $utf8val;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore close(UTF8);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore}
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoremy %map;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoresub load_map
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore{
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $file = shift;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore open(MAP, "$file") || die "open";
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore while (<MAP>) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore next if (/^#/);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore next if (/^\s*$/);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore chomp;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore @words = split /\s+/;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = $words[1];
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 =~ s/^\\x[0]*//;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $utf8 = ucs_to_utf8(hex($utf8));
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $val = $words[0];
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore if (defined ($map{$val})) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $map{$val} .= " ".$utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore } else {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $map{$val} = $utf8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore}
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoresub mb_str
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore{
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $val = shift;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $str = "";
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $val = hex($val);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore if ($val == 0) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore return ("\\x00");
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore while ($val) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $str = sprintf("\\x%02x", $val & 0xff).$str;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $val >>= 8;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore return ($str);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore}
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore$mf = shift(@ARGV);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoreload_utf8_cm("UTF-8.cm");
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoreload_map($mf);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoreprint("CHARMAP\n");
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoreforeach $val (sort (keys (%map))) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore #$utf8 = $map{$val};
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore foreach $utf8 (split / /, $map{$val}) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $ref = $unames{$utf8};
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore foreach $name (sort (split /\n/, $ref)) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore print "$name";
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore my $nt = int((64 - length($name) + 7) / 8);
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore while ($nt) {
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore print "\t";
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore $nt--;
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore print mb_str($val)."\n";
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore }
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amore}
6b5e5868e7ebf1aff3a5abd7d0c4ef0e5fbf3648Garrett D'Amoreprint "END CHARMAP\n";