38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#!/usr/bin/python -u
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# Original script modified in November 2003 to take advantage of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# the character-validation range routines, and updated to the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# current Unicode information (Version 4.0.1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# NOTE: there is an 'alias' facility for blocks which are not present in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# the current release, but are needed for ABI compatibility. This
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# must be accomplished MANUALLY! Please see the comments below under
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# 'blockAliases'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncimport sys
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncimport string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncimport time
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncwebpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncsources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# blockAliases is a small hack - it is used for mapping block names which
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# were were used in the 3.1 release, but are missing or changed in the current
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# release. The format is "OldBlockName:NewBlockName1[,NewBlockName2[,...]]"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncblockAliases = []
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncblockAliases.append("CombiningMarksforSymbols:CombiningDiacriticalMarksforSymbols")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncblockAliases.append("Greek:GreekandCoptic")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncblockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," +
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "SupplementaryPrivateUseArea-B")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# minTableSize gives the minimum number of ranges which must be present
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# before a range table is produced. If there are less than this
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# number, inline comparisons are generated
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncminTableSize = 8
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync(blockfile, catfile) = string.split(sources)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# Now process the "blocks" file, reducing it to a dictionary
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# indexed by blockname, containing a tuple with the applicable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# block range
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncBlockNames = {}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctry:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync blocks = open(blockfile, "r")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncexcept:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Missing %s, aborting ..." % blockfile
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sys.exit(1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor line in blocks.readlines():
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if line[0] == '#':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync line = string.strip(line)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if line == '':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync fields = string.split(line, ';')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync range = string.strip(fields[0])
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (start, end) = string.split(range, "..")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = string.strip(fields[1])
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = string.replace(name, ' ', '')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Failed to process line: %s" % (line)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = "0x" + start
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync end = "0x" + end
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BlockNames[name].append((start, end))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BlockNames[name] = [(start, end)]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncblocks.close()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncprint "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor block in blockAliases:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync alias = string.split(block,':')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync alist = string.split(alias[1],',')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for comp in alist:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if BlockNames.has_key(comp):
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if alias[0] not in BlockNames:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BlockNames[alias[0]] = []
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for r in BlockNames[comp]:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BlockNames[alias[0]].append(r)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Alias %s: %s not in Blocks" % (alias[0], comp)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# Next process the Categories file. This is more complex, since
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# the file is in code sequence, and we need to invert it. We use
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# a dictionary with index category-name, with each entry containing
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# all the ranges (codepoints) of that category. Note that category
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# names comprise two parts - the general category, and the "subclass"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# within that category. Therefore, both "general category" (which is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# the first character of the 2-character category-name) and the full
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# (2-character) name are entered into this dictionary.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctry:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync data = open(catfile, "r")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncexcept:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Missing %s, aborting ..." % catfile
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sys.exit(1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncnbchar = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncCategories = {}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor line in data.readlines():
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if line[0] == '#':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync line = string.strip(line)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if line == '':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync fields = string.split(line, ';')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync point = string.strip(fields[0])
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while point != '':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = value * 16
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if point[0] >= '0' and point[0] <= '9':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = value + ord(point[0]) - ord('0')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync elif point[0] >= 'A' and point[0] <= 'F':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = value + 10 + ord(point[0]) - ord('A')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync elif point[0] >= 'a' and point[0] <= 'f':
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = value + 10 + ord(point[0]) - ord('a')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync point = point[1:]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = fields[2]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Failed to process line: %s" % (line)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchar = nbchar + 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync # update entry for "full name"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync Categories[name].append(value)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync Categories[name] = [value]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Failed to process line: %s" % (line)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync # update "general category" name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync Categories[name[0]].append(value)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync try:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync Categories[name[0]] = [value]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync except:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Failed to process line: %s" % (line)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncblocks.close()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncprint "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# The data is now all read. Time to process it into a more useful form.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# reduce the number list into ranges
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor cat in Categories.keys():
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync list = Categories[cat]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = -1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync prev = -1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync end = -1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges = []
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for val in list:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if start == -1:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync prev = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync elif val == prev + 1:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync prev = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync elif prev == start:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges.append((prev, prev))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync prev = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges.append((start, prev))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync prev = val
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if prev == start:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges.append((prev, prev))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges.append((start, prev))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync Categories[cat] = ranges
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# Assure all data is in alphabetic order, since we will be doing binary
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# searches on the tables.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncbkeys = BlockNames.keys()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncbkeys.sort()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncckeys = Categories.keys()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncckeys.sort()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# Generate the resulting files
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctry:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync header = open("include/libxml/xmlunicode.h", "w")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncexcept:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Failed to open include/libxml/xmlunicode.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sys.exit(1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctry:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output = open("xmlunicode.c", "w")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncexcept:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync print "Failed to open xmlunicode.c"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sys.exit(1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncdate = time.asctime(time.localtime(time.time()))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncheader.write(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"""/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Summary: Unicode character APIs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Description: API for the Unicode character APIs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This file is automatically generated from the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UCS description files of the Unicode Character Database
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * %s
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * using the genUnicode.py Python script.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Generation date: %s
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sources: %s
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Author: Daniel Veillard
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifndef __XML_UNICODE_H__
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define __XML_UNICODE_H__
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlversion.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_UNICODE_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef __cplusplus
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncextern "C" {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync""" % (webpage, date, sources));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"""/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlunicode.c: this module implements the Unicode character APIs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This file is automatically generated from the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UCS description files of the Unicode Character Database
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * %s
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * using the genUnicode.py Python script.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Generation date: %s
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sources: %s
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Daniel Veillard <veillard@redhat.com>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define IN_LIBXML
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include "libxml.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_UNICODE_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <string.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlversion.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlunicode.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/chvalid.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef struct {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *rangename;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlIntFunc *func;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync} xmlUnicodeRange;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef struct {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlUnicodeRange *table;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int numentries;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync} xmlUnicodeNameTable;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlUnicodeRange xmlUnicodeBlocks[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync""" % (webpage, date, sources));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncflag = 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor block in bkeys:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = string.replace(block, '-', '')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if flag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(',\n')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync flag = 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(' {"%s", xmlUCSIs%s}' % (block, name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write('};\n\n')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write('static xmlUnicodeRange xmlUnicodeCats[] = {\n')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncflag = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor name in ckeys:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if flag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(',\n')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync flag = 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(' {"%s", xmlUCSIsCat%s}' % (name, name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write('};\n\n')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# For any categories with more than minTableSize ranges we generate
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync# a range table suitable for xmlCharInRange
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor name in ckeys:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if len(Categories[name]) > minTableSize:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync numshort = 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync numlong = 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges = Categories[name]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sptr = "NULL"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync lptr = "NULL"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for range in ranges:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (low, high) = range
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if high < 0x10000:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if numshort == 0:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync pline = "static const xmlChSRange xml%sS[] = {" % name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sptr = "xml%sS" % name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync pline += ", "
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync numshort += 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if numlong == 0:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if numshort > 0:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(pline + " };\n")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync pline = "static const xmlChLRange xml%sL[] = {" % name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync lptr = "xml%sL" % name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync pline += ", "
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync numlong += 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if len(pline) > 60:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(pline + "\n")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync pline = " "
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync pline += "{%s, %s}" % (hex(low), hex(high))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(pline + " };\nstatic xmlChRangeGroup xml%sG = {%s,%s,%s,%s};\n\n"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync % (name, numshort, numlong, sptr, lptr))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"""static xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlUnicodeLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @tptr: pointer to the name table
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: name to be found
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * binary table lookup for user-supplied name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns pointer to range function if found, otherwise NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlIntFunc
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync*xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int low, high, mid, cmp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlUnicodeRange *sptr;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((tptr == NULL) || (tname == NULL)) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync low = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync high = tptr->numentries - 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sptr = tptr->table;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (low <= high) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync mid = (low + high) / 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (sptr[mid].func);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cmp < 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync high = mid - 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync low = mid + 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync""" % (len(BlockNames), len(Categories)) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor block in bkeys:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = string.replace(block, '-', '')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (block))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("int\nxmlUCSIs%s(int code) {\n return(" % name)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync flag = 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (start, end) in BlockNames[block]:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if flag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" ||\n ")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync flag = 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("((code >= %s) && (code <= %s))" % (start, end))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(");\n}\n\n")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncheader.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code, const char *block);\n\n")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"""/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlUCSIsBlock:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @code: UCS code point
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @block: UCS block name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check whether the character is part of the UCS Block
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if true, 0 if false and -1 on unknown block
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncxmlUCSIsBlock(int code, const char *block) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlIntFunc *func;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (func == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (func(code));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync""")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfor name in ckeys:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ranges = Categories[name]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if len(Categories[name]) > minTableSize:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync % name)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for range in ranges:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (begin, end) = range;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if start:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" return(");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync start = 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(" ||\n ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (begin == end):
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("(code == %s)" % (hex(begin)))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write("((code >= %s) && (code <= %s))" % (
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync hex(begin), hex(end)))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync output.write(");\n}\n\n")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncheader.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code, const char *cat);\n")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.write(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"""/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlUCSIsCat:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @code: UCS code point
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cat: UCS Category name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check whether the character is part of the UCS Category
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if true, 0 if false and -1 on unknown category
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncxmlUCSIsCat(int code, const char *cat) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlIntFunc *func;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (func == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (func(code));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define bottom_xmlunicode
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include "elfgcchack.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_UNICODE_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync""")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncheader.write("""
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef __cplusplus
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_UNICODE_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* __XML_UNICODE_H__ */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync""");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncheader.close()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncoutput.close()