cpmap.pl revision 4b9d6701570cb98fd36e209314239d104ec584d3
#
# Generate code page .c files from ftp.unicode.org descriptions
#
# Copyright 2000 Alexandre Julliard
#
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
# Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
# other than GPL or LGPL is available it will apply instead, Oracle elects to use only
# the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
# a choice of LGPL license versions is made available with the language indicating
# that LGPLv2 or any later version may be used, or where a choice of which version
# of the LGPL is applied is otherwise unspecified.
#
#
use strict;
# base directory for ftp.unicode.org files
my $BASEDIR = "ftp.unicode.org/Public/";
# UnicodeData file
# Sort keys file
my $SORTKEYS = "www.unicode.org/reports/tr10/allkeys.txt";
# Defaults mapping
my $DEFAULTS = "./defaults";
# Default char for undefined mappings
my $DEF_CHAR = ord '?';
my @allfiles =
(
[ 20127, undef, 0, "US-ASCII (7bit)" ],
[ 28591, "ISO8859/8859-1.TXT", 0, "ISO 8859-1 Latin 1" ],
[ 28592, "ISO8859/8859-2.TXT", 0, "ISO 8859-2 Latin 2 (East European)" ],
[ 28593, "ISO8859/8859-3.TXT", 0, "ISO 8859-3 Latin 3 (South European)" ],
[ 28594, "ISO8859/8859-4.TXT", 0, "ISO 8859-4 Latin 4 (Baltic old)" ],
[ 28595, "ISO8859/8859-5.TXT", 0, "ISO 8859-5 Cyrillic" ],
[ 28596, "ISO8859/8859-6.TXT", 0, "ISO 8859-6 Arabic" ],
[ 28597, "ISO8859/8859-7.TXT", 0, "ISO 8859-7 Greek" ],
[ 28598, "ISO8859/8859-8.TXT", 0, "ISO 8859-8 Hebrew" ],
[ 28599, "ISO8859/8859-9.TXT", 0, "ISO 8859-9 Latin 5 (Turkish)" ],
[ 28600, "ISO8859/8859-10.TXT", 0, "ISO 8859-10 Latin 6 (Nordic)" ],
[ 28603, "ISO8859/8859-13.TXT", 0, "ISO 8859-13 Latin 7 (Baltic)" ],
[ 28604, "ISO8859/8859-14.TXT", 0, "ISO 8859-14 Latin 8 (Celtic)" ],
[ 28605, "ISO8859/8859-15.TXT", 0, "ISO 8859-15 Latin 9 (Euro)" ],
[ 28606, "ISO8859/8859-16.TXT", 0, "ISO 8859-16 Latin 10 (Balkan)" ]
);
my %ctype =
(
"upper" => 0x0001,
"lower" => 0x0002,
"digit" => 0x0004,
"space" => 0x0008,
"punct" => 0x0010,
"cntrl" => 0x0020,
"blank" => 0x0040,
"xdigit" => 0x0080,
"alpha" => 0x0100
);
my %categories =
(
"Cf" => 0, # Other, Format
"Cs" => 0, # Other, Surrogate
"Co" => 0, # Other, Private Use
"Cn" => 0, # Other, Not Assigned
);
# a few characters need additional categories that cannot be determined automatically
my %special_categories =
(
"xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
"space" => [ 0x09..0x0d, 0x85 ],
"blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
"cntrl" => [ 0x070f, 0x180b, 0x180c, 0x180d, 0x180e, 0x200c, 0x200d,
0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
0xfff9, 0xfffa, 0xfffb ]
);
my %directions =
(
"L" => 1, # Left-to-Right
"LRE" => 15, # Left-to-Right Embedding
"LRO" => 15, # Left-to-Right Override
"R" => 2, # Right-to-Left
"AL" => 12, # Right-to-Left Arabic
"RLE" => 15, # Right-to-Left Embedding
"RLO" => 15, # Right-to-Left Override
"PDF" => 15, # Pop Directional Format
"EN" => 3, # European Number
"ES" => 4, # European Number Separator
"ET" => 5, # European Number Terminator
"AN" => 6, # Arabic Number
"CS" => 7, # Common Number Separator
"NSM" => 13, # Non-Spacing Mark
"BN" => 14, # Boundary Neutral
"B" => 8, # Paragraph Separator
"S" => 9, # Segment Separator
"WS" => 10, # Whitespace
"ON" => 11 # Other Neutrals
);
my @cp2uni = ();
my @lead_bytes = ();
my @uni2cp = ();
my @unicode_defaults = ();
my @unicode_aliases = ();
my @tolower_table = ();
my @toupper_table = ();
my @digitmap_table = ();
my @compatmap_table = ();
my @direction_table = ();
my @decomp_table = ();
my @compose_table = ();
################################################################
# read in the defaults file
sub READ_DEFAULTS($)
{
my $filename = shift;
my $start;
# first setup a few default mappings
print "Loading $filename\n";
while (<DEFAULTS>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
{
my @src = map hex, split /,/,$1;
my $dst = $4;
my $comment = $5;
if ($#src > 0) { push @unicode_aliases, \@src; }
next if ($dst eq "none");
{
}
next;
}
die "Unrecognized line $_\n";
}
# now build mappings from the decomposition field of the Unicode database
print "Loading $UNICODEDATA\n";
while (<UNICODEDATA>)
{
# Decode the fields ...
my $dst;
if ($lower ne "")
{
}
if ($upper ne "")
{
}
if ($dec ne "")
{
}
if ($dig ne "")
{
}
{
{
$start++;
}
}
{
# decomposition of the form "<foo> 1234" -> use char if type is known
{
# Single char decomposition in the compatibility range
}
next unless ($1 eq "font" ||
$1 eq "noBreak" ||
$1 eq "circle" ||
$1 eq "super" ||
$1 eq "sub" ||
$1 eq "wide" ||
$1 eq "narrow" ||
$1 eq "compat" ||
$1 eq "small");
$dst = hex $2;
}
{
# decomposition "<compat> 0020 1234" -> combining accent
$dst = hex $1;
}
{
# decomposition contains only char values without prefix -> use first char
$dst = hex $1;
# store decomposition if it contains two chars
{
}
{
# Single char decomposition in the compatibility range
}
}
else
{
next;
}
# check for loops
for (my $i = $dst; ; $i = $unicode_defaults[$i])
{
last unless defined($unicode_defaults[$i]);
}
}
# patch the category of some special characters
foreach my $cat (keys %special_categories)
{
}
}
################################################################
# parse the input file
sub READ_FILE($)
{
my $name = shift;
while (<INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
{
my $cp = hex $1;
push @lead_bytes,$cp;
next;
}
{
my $cp = hex $1;
my $uni = hex $2;
{
}
next;
}
die "$name: Unrecognized line $_\n";
}
}
################################################################
# fill input data for the 20127 (us-ascii) codepage
sub fill_20127_codepage()
{
}
################################################################
# get a mapping including glyph chars for MB_USEGLYPHCHARS
sub get_glyphs_mapping(@)
{
$_[0x01] = 0x263a; # (WHITE SMILING FACE)
$_[0x02] = 0x263b; # (BLACK SMILING FACE)
$_[0x03] = 0x2665; # (BLACK HEART SUIT)
$_[0x04] = 0x2666; # (BLACK DIAMOND SUIT)
$_[0x05] = 0x2663; # (BLACK CLUB SUIT)
$_[0x06] = 0x2660; # (BLACK SPADE SUIT)
$_[0x07] = 0x2022; # (BULLET)
$_[0x08] = 0x25d8; # (INVERSE BULLET)
$_[0x09] = 0x25cb; # (WHITE CIRCLE)
$_[0x0a] = 0x25d9; # (INVERSE WHITE CIRCLE)
$_[0x0b] = 0x2642; # (MALE SIGN)
$_[0x0c] = 0x2640; # (FEMALE SIGN)
$_[0x0d] = 0x266a; # (EIGHTH NOTE)
$_[0x0e] = 0x266b; # (BEAMED EIGHTH NOTES)
$_[0x0f] = 0x263c; # (WHITE SUN WITH RAYS)
$_[0x10] = 0x25ba; # (BLACK RIGHT-POINTING POINTER)
$_[0x11] = 0x25c4; # (BLACK LEFT-POINTING POINTER)
$_[0x12] = 0x2195; # (UP DOWN ARROW)
$_[0x13] = 0x203c; # (DOUBLE EXCLAMATION MARK)
$_[0x14] = 0x00b6; # (PILCROW SIGN)
$_[0x15] = 0x00a7; # (SECTION SIGN)
$_[0x16] = 0x25ac; # (BLACK RECTANGLE)
$_[0x17] = 0x21a8; # (UP DOWN ARROW WITH BASE)
$_[0x18] = 0x2191; # (UPWARDS ARROW)
$_[0x19] = 0x2193; # (DOWNWARDS ARROW)
$_[0x1a] = 0x2192; # (RIGHTWARDS ARROW)
$_[0x1b] = 0x2190; # (LEFTWARDS ARROW)
$_[0x1c] = 0x221f; # (RIGHT ANGLE)
$_[0x1d] = 0x2194; # (LEFT RIGHT ARROW)
$_[0x1e] = 0x25b2; # (BLACK UP-POINTING TRIANGLE)
$_[0x1f] = 0x25bc; # (BLACK DOWN-POINTING TRIANGLE)
$_[0x7f] = 0x2302; # (HOUSE)
return @_;
}
################################################################
# build EUC-JP table from the JIS 0208 file
# FIXME: for proper EUC-JP we should probably read JIS 0212 too
# but this would require 3-byte DBCS characters
sub READ_JIS0208_FILE($)
{
my $name = shift;
# ASCII chars
for (my $i = 0x00; $i <= 0x7f; $i++)
{
$cp2uni[$i] = $i;
$uni2cp[$i] = $i;
}
# JIS X 0201 right plane
for (my $i = 0xa1; $i <= 0xdf; $i++)
{
}
# lead bytes
foreach my $i (0x8e, 0x8f, 0xa1 .. 0xfe)
{
push @lead_bytes,$i;
$cp2uni[$i] = 0;
}
# undefined chars
foreach my $i (0x80 .. 0x8d, 0x90 .. 0xa0, 0xff)
{
}
# Shift-JIS compatibility
# Fix backslash conversion
while (<INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
{
my $uni = hex $2;
next;
}
die "$name: Unrecognized line $_\n";
}
}
################################################################
# build the sort keys table
sub READ_SORTKEYS_FILE()
{
my @sortkeys = ();
print "Loading $SORTKEYS\n";
while (<INPUT>)
{
next if /^\#/; # skip comments
next if /^$/; # skip empty lines
next if /\x1a/; # skip ^Z
next if /^\@version/; # skip @version header
if (/^([0-9a-fA-F]+)\s+;\s+\[([*.])([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]+)\]/)
{
next if $uni > 65535;
next;
}
if (/^([0-9a-fA-F]+\s+)+;\s+\[[*.]([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]+)\]/)
{
# multiple character sequence, ignored for now
next;
}
die "$SORTKEYS: Unrecognized line $_\n";
}
close INPUT;
# compress the keys to 32 bit:
# key 1 to 16 bits, key 2 to 8 bits, key 3 to 4 bits, key 4 to 1 bit
${$a}[2] <=> ${$b}[2] or
${$a}[3] <=> ${$b}[3] or
${$a}[4] <=> ${$b}[4] or
$a cmp $b; } @sortkeys;
my @flatkeys = ();
for (my $i = 0; $i < 65536; $i++)
{
{
{
{
# nothing
}
else
{
$n3++;
die if ($n3 >= 16);
}
}
else
{
$n2++;
$n3 = 1;
die if ($n2 >= 256);
}
}
else
{
$n2 = 1;
$n3 = 1;
}
$flatkeys[$current[0]] = ($current[1] << 16) | ($current[2] << 8) | ($current[3] << 4) | $current[4];
}
return @flatkeys;
}
################################################################
# build the sort keys table
sub DUMP_SORTKEYS($@)
{
# count the number of 256-key ranges that contain something
my @offsets = ();
my $ranges = 2;
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $keys[$i];
$ranges++;
$i |= 255;
}
# output the range offsets
printf "Building $filename\n";
printf OUTPUT "/* Unicode collation element table */\n";
printf OUTPUT "/* DO NOT EDIT!! */\n\n";
printf OUTPUT " /* index */\n";
# output the default values
printf OUTPUT " /* defaults */\n";
# output all the key ranges
for (my $i = 0; $i < 256; $i++)
{
next if $offsets[$i] == 256;
printf OUTPUT ",\n /* 0x%02x00 .. 0x%02xff */\n", $i, $i;
}
printf OUTPUT "\n};\n";
close OUTPUT;
}
################################################################
# add default mappings once the file had been read
sub ADD_DEFAULT_MAPPINGS()
{
# Apply aliases
foreach my $alias (@unicode_aliases)
{
my $target = undef;
{
{
last;
}
}
next unless defined($target);
# At least one char of the alias set is defined, set the others to the same value
{
}
}
# For every src -> target mapping in the defaults table,
# make uni2cp[src] = uni2cp[target] if uni2cp[target] is defined
{
# do a recursive mapping until we find a target char that is defined
}
# Add an identity mapping for all undefined chars
for (my $i = 0; $i < 256; $i++)
{
next if defined($cp2uni[$i]);
next if defined($uni2cp[$i]);
}
}
################################################################
# dump an array of integers
sub DUMP_ARRAY($$@)
{
my $i;
my $ret = " ";
for ($i = 0; $i < $#array; $i++)
{
}
return $ret;
}
################################################################
# dump an SBCS mapping table
sub dump_sbcs_table($$$$$)
{
my $i;
# output the ascii->unicode table
if ($has_glyphs)
{
printf OUTPUT "static const WCHAR cp2uni[512] =\n";
printf OUTPUT ",\n /* glyphs */\n%s\n};\n\n",
}
else
{
printf OUTPUT "static const WCHAR cp2uni[256] =\n";
}
# count the number of unicode->ascii subtables that contain something
my @filled = ();
my $subtables = 1;
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $uni2cp[$i];
$subtables++;
$i |= 255;
}
# output all the subtables into a single array
for (my $i = 0; $i < 256; $i++)
{
next unless $filled[$i];
printf OUTPUT " /* 0x%02x00 .. 0x%02xff */\n", $i, $i;
}
printf OUTPUT " /* defaults */\n";
# output a table of the offsets of the subtables in the previous array
my $pos = 0;
my @offsets = ();
for (my $i = 0; $i < 256; $i++)
{
}
printf OUTPUT "static const unsigned short uni2cp_high[256] =\n";
# output the code page descriptor
printf OUTPUT " { %d, 1, 0x%04x, 0x%04x, \"%s\" },\n",
printf OUTPUT " cp2uni,\n";
else { printf OUTPUT " cp2uni,\n"; }
printf OUTPUT " uni2cp_low,\n";
printf OUTPUT " uni2cp_high\n};\n";
}
################################################################
# dump a DBCS mapping table
sub dump_dbcs_table($$$$@)
{
# build a list of lead bytes that are actually used
my @lblist = ();
{
for (my $x = 0; $x < 256; $x++)
{
{
push @lblist,$lead_bytes[$y];
next LBLOOP;
}
}
}
my $unused = ($#lead_bytes > $#lblist);
# output the ascii->unicode table for the single byte chars
# output the default table for unused lead bytes
if ($unused)
{
printf OUTPUT " /* unused lead bytes */\n";
}
# output the ascii->unicode table for each DBCS lead byte
for (my $y = 0; $y <= $#lblist; $y++)
{
printf OUTPUT ($y < $#lblist) ? ",\n" : "\n};\n\n";
}
# output the lead byte subtables offsets
my @offsets = ();
for (my $x = 0; $x <= $#lblist; $x++) { $offsets[$lblist[$x]] = $x + 1; }
if ($unused)
{
# increment all lead bytes offset to take into account the unused table
for (my $x = 0; $x <= $#lead_bytes; $x++) { $offsets[$lead_bytes[$x]]++; }
}
printf OUTPUT "static const unsigned char cp2uni_leadbytes[256] =\n";
# count the number of unicode->ascii subtables that contain something
my @filled = ();
my $subtables = 1;
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $uni2cp[$i];
$subtables++;
$i |= 255;
}
# output all the subtables into a single array
for (my $y = 0; $y < 256; $y++)
{
next unless $filled[$y];
printf OUTPUT " /* 0x%02x00 .. 0x%02xff */\n", $y, $y;
}
printf OUTPUT " /* defaults */\n";
# output a table of the offsets of the subtables in the previous array
my $pos = 0;
@offsets = ();
for (my $y = 0; $y < 256; $y++)
{
}
printf OUTPUT "static const unsigned short uni2cp_high[256] =\n";
# output the code page descriptor
printf OUTPUT " { %d, 2, 0x%04x, 0x%04x, \"%s\" },\n",
printf OUTPUT " cp2uni,\n";
printf OUTPUT " cp2uni_leadbytes,\n";
printf OUTPUT " uni2cp_low,\n";
printf OUTPUT " uni2cp_high,\n";
printf OUTPUT "};\n";
}
################################################################
# get the list of defined lead byte ranges
sub get_lb_ranges()
{
my @list = ();
my @ranges = ();
my $i = 0;
my $on = 0;
for (my $i = 0; $i < 256; $i++)
{
if ($on)
{
}
else
{
}
}
return @ranges;
}
################################################################
# dump the case mapping tables
sub DUMP_CASE_MAPPINGS($)
{
my $filename = shift;
printf "Building $filename\n";
printf OUTPUT "/* Unicode case mappings */\n";
printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
close OUTPUT;
}
################################################################
# dump a case mapping table
sub DUMP_CASE_TABLE($@)
{
# count the number of sub tables that contain something
# also compute the low and upper populated bounds
my $index = 0;
my @filled = ();
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $table[$i];
if (!defined $filled[$i >> 8])
{
$index++;
}
else
{
}
}
# Collapse blocks upwards if possible
my $removed = 0;
$index = 0;
for (my $i = 0; $i < 256; $i++)
{
next unless defined $filled[$i];
{
}
else
{
}
$index++;
}
# dump the table
printf OUTPUT "{\n /* index */\n";
printf OUTPUT " /* defaults */\n";
$index = 0;
for (my $i = 0; $i < 256; $i++)
{
next unless $filled[$i];
$index++;
}
printf OUTPUT "\n};\n";
}
################################################################
# dump the ctype tables
sub DUMP_CTYPE_TABLES($)
{
my $filename = shift;
printf "Building $filename\n";
printf OUTPUT "/* Unicode ctype tables */\n";
printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
my %sequences;
# add the direction in the high 4 bits of the category
for (my $i = 0; $i < 65536; $i++)
{
}
# try to merge table rows
{
{
# reuse an existing row
}
else
{
# create a new row
}
}
close OUTPUT;
}
################################################################
# dump the char composition tables
sub DUMP_COMPOSE_TABLES($)
{
my $filename = shift;
printf "Building $filename\n";
printf OUTPUT "/* Unicode char composition */\n";
printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
######### composition table
my @filled = ();
foreach my $i (@compose_table)
{
my @comp = @$i;
}
# count how many different second chars we have
my $count = 0;
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $filled[$i];
$count++;
}
# build the table of second chars and offsets
my @table = ();
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $filled[$i];
}
# terminator with last position
# build the table of first chars and mappings
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $filled[$i];
my @table = ();
for (my $j = 0; $j <= $#list; $j++)
{
}
}
######### decomposition table
# first determine all the 16-char subsets that contain something
for (my $i = 0; $i < 65536; $i++)
{
next unless defined $decomp_table[$i];
$i |= 15;
}
# now count the 256-char subsets that contain something
for (my $i = 0; $i < 4096; $i++)
{
next unless $filled[$i];
$pos += 16;
$i |= 15;
}
# add the index offsets to the subsets positions
for (my $i = 0; $i < 4096; $i++)
{
next unless $filled[$i];
$filled[$i] += $null_offset;
}
# dump the main index
printf OUTPUT "{\n /* index */\n";
# dump the second-level indexes
for (my $i = 0; $i < 256; $i++)
{
next unless ($filled_idx[$i] > 256);
printf OUTPUT ",\n /* sub-index %02x */\n", $i;
}
# dump the 16-char subsets
printf OUTPUT ",\n /* null mapping */\n";
for (my $i = 0; $i < 4096; $i++)
{
next unless $filled[$i];
for (my $j = 0; $j < 16; $j++)
{
if (defined $decomp_table[($i<<4) + $j])
{
}
}
printf OUTPUT ",\n /* 0x%03x0 .. 0x%03xf */\n", $i, $i;
}
printf OUTPUT "\n};\n";
close OUTPUT;
}
################################################################
# handle a "bestfit" Windows mapping file
sub handle_bestfit_file($$$)
{
my $state = "";
my @lb_ranges = ();
while (<INPUT>)
{
next if /^;/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
last if /^ENDCODEPAGE/;
if (/^CODEPAGE\s+(\d+)/)
{
$codepage = $1;
next;
}
{
$width = $1;
$def = hex $2;
$defw = hex $3;
next;
}
{
$state = $1;
$count = $2;
next;
}
{
if ($state eq "MBTABLE")
{
my $cp = hex $1;
my $uni = hex $2;
next;
}
if ($state eq "WCTABLE")
{
my $uni = hex $1;
my $cp = hex $2;
next;
}
if ($state eq "DBCSRANGE")
{
my $start = hex $1;
my $end = hex $2;
{
push @lead_bytes, $i;
$cp2uni[$i] = 0;
}
next;
}
if ($state eq "DBCSTABLE")
{
my $mb = hex $1;
my $uni = hex $2;
if (!--$count)
{
}
next;
}
}
die "$filename: Unrecognized line $_\n";
}
close INPUT;
# dump all tables
printf OUTPUT "/* DO NOT EDIT!! */\n\n";
close OUTPUT;
}
################################################################
# read an input file and generate the corresponding .c file
sub HANDLE_FILE(@)
{
@cp2uni = ();
@lead_bytes = ();
@uni2cp = ();
# symbol codepage file is special
{
return;
}
# dump all tables
if ($filename)
{
printf OUTPUT "/* DO NOT EDIT!! */\n\n";
}
else
{
printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
}
close OUTPUT;
}
################################################################
# save a file if modified
sub save_file($)
{
my $file = shift;
{
unlink "$file.new";
}
else
{
rename "$file.new", "$file";
}
}
################################################################
# output the list of codepage tables into the cptable.c file
sub OUTPUT_CPTABLE()
{
my @tables_decl = ();
{
}
push @tables_decl, sprintf("\nstatic const union cptable * const cptables[%d] =\n{\n",$#allfiles+1);
{
}
push @tables_decl, "};";
}
################################################################
# replace the contents of a file between ### cpmap ### marks
sub REPLACE_IN_FILE($@)
{
my $name = shift;
my @data = @_;
my @lines = ();
while (<FILE>)
{
push @lines, $_;
last if /\#\#\# cpmap begin \#\#\#/;
}
while (<FILE>)
{
if (/\#\#\# cpmap end \#\#\#/) { push @lines, "\n", $_; last; }
}
close(FILE);
}
################################################################
# main routine
READ_DEFAULTS( $DEFAULTS );
DUMP_CASE_MAPPINGS( "casemap.c" );
DUMP_COMPOSE_TABLES( "compose.c" );
DUMP_CTYPE_TABLES( "wctype.c" );
exit 0;
# Local Variables:
# compile-command: "./cpmap.pl && make -k"
# End: