genUnicode.py revision d46ee884c41b808b239563b1978468aae12e33a2
#
# Original script modified in November 2003 to take advantage of
# the character-validation range routines, and updated to the
# current Unicode information (Version 4.0.1)
#
# NOTE: there is an 'alias' facility for blocks which are not present in
# the current release, but are needed for ABI compatibility. This
# must be accomplished MANUALLY! Please see the comments below under
# 'blockAliases'
#
import sys
import string
import time
#
# blockAliases is a small hack - it is used for mapping block names which
# were were used in the 3.1 release, but are missing or changed in the current
# release. The format is "OldBlockName:NewBlockName1[,NewBlockName2[,...]]"
blockAliases = []
"SupplementaryPrivateUseArea-B")
# minTableSize gives the minimum number of ranges which must be present
# before a range table is produced. If there are less than this
# number, inline comparisons are generated
minTableSize = 8
#
# Now process the "blocks" file, reducing it to a dictionary
# indexed by blockname, containing a tuple with the applicable
# block range
#
BlockNames = {}
try:
except:
print "Missing %s, aborting ..." % blockfile
continue
if line == '':
continue
try:
except:
print "Failed to process line: %s" % (line)
continue
try:
except:
for block in blockAliases:
for r in BlockNames[comp]:
else:
continue
#
# Next process the Categories file. This is more complex, since
# the file is in code sequence, and we need to invert it. We use
# a dictionary with index category-name, with each entry containing
# all the ranges (codepoints) of that category. Note that category
# names comprise two parts - the general category, and the "subclass"
# within that category. Therefore, both "general category" (which is
# the first character of the 2-character category-name) and the full
# (2-character) name are entered into this dictionary.
#
try:
except:
print "Missing %s, aborting ..." % catfile
nbchar = 0;
Categories = {}
continue
if line == '':
continue
try:
value = 0
while point != '':
except:
print "Failed to process line: %s" % (line)
continue
# update entry for "full name"
try:
except:
try:
except:
print "Failed to process line: %s" % (line)
# update "general category" name
try:
except:
try:
except:
print "Failed to process line: %s" % (line)
#
# The data is now all read. Time to process it into a more useful form.
#
# reduce the number list into ranges
start = -1
prev = -1
end = -1
ranges = []
if start == -1:
continue
continue
continue
else:
continue
else:
#
# Assure all data is in alphabetic order, since we will be doing binary
# searches on the tables.
#
#
# Generate the resulting files
#
try:
except:
print "Failed to open include/libxml/xmlunicode.h"
try:
except:
print "Failed to open xmlunicode.c"
"""/*
* Summary: Unicode character APIs
* Description: API for the Unicode character APIs
*
* This file is automatically generated from the
* UCS description files of the Unicode Character Database
* %s
* using the genUnicode.py Python script.
*
* Generation date: %s
* Sources: %s
* Author: Daniel Veillard
*/
#ifndef __XML_UNICODE_H__
#define __XML_UNICODE_H__
#include <libxml/xmlversion.h>
#ifdef LIBXML_UNICODE_ENABLED
#ifdef __cplusplus
extern "C" {
#endif
"""/*
* xmlunicode.c: this module implements the Unicode character APIs
*
* This file is automatically generated from the
* UCS description files of the Unicode Character Database
* %s
* using the genUnicode.py Python script.
*
* Generation date: %s
* Sources: %s
* Daniel Veillard <veillard@redhat.com>
*/
#define IN_LIBXML
#include "libxml.h"
#ifdef LIBXML_UNICODE_ENABLED
#include <string.h>
#include <libxml/xmlversion.h>
#include <libxml/xmlunicode.h>
typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */
typedef struct {
const char *rangename;
xmlIntFunc *func;
} xmlUnicodeRange;
typedef struct {
xmlUnicodeRange *table;
int numentries;
} xmlUnicodeNameTable;
static xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname);
static xmlUnicodeRange xmlUnicodeBlocks[] = {
flag = 0
if flag:
else:
flag = 1
flag = 0;
if flag:
else:
flag = 1
#
# For any categories with more than minTableSize ranges we generate
# a range table suitable for xmlCharInRange
#
numshort = 0
numlong = 0
sptr = "NULL"
lptr = "NULL"
if high < 0x10000:
if numshort == 0:
else:
pline += ", "
numshort += 1
else:
if numlong == 0:
if numshort > 0:
else:
pline += ", "
numlong += 1
pline = " "
"""static xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
static xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
/**
* xmlUnicodeLookup:
* @tptr: pointer to the name table
* @name: name to be found
*
* binary table lookup for user-supplied name
*
* Returns pointer to range function if found, otherwise NULL
*/
static xmlIntFunc
*xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) {
int low, high, mid, cmp;
xmlUnicodeRange *sptr;
if ((tptr == NULL) || (tname == NULL)) return(NULL);
low = 0;
high = tptr->numentries - 1;
sptr = tptr->table;
while (low <= high) {
mid = (low + high) / 2;
if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0)
return (sptr[mid].func);
if (cmp < 0)
high = mid - 1;
else
low = mid + 1;
}
return (NULL);
}
(block))
flag = 0
if flag:
else:
flag = 1
"""/**
* xmlUCSIsBlock:
* @code: UCS code point
* @block: UCS block name
*
* Check whether the character is part of the UCS Block
*
* Returns 1 if true, 0 if false and -1 on unknown block
*/
int
xmlUCSIsBlock(int code, const char *block) {
xmlIntFunc *func;
func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
if (func == NULL)
return (-1);
return (func(code));
}
""")
(name))
% name)
else:
start = 1
if start:
start = 0
else:
else:
"""/**
* xmlUCSIsCat:
* @code: UCS code point
* @cat: UCS Category name
*
* Check whether the character is part of the UCS Category
*
* Returns 1 if true, 0 if false and -1 on unknown category
*/
int
xmlUCSIsCat(int code, const char *cat) {
xmlIntFunc *func;
func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
if (func == NULL)
return (-1);
return (func(code));
}
#define bottom_xmlunicode
#include "elfgcchack.h"
#endif /* LIBXML_UNICODE_ENABLED */
""")
#ifdef __cplusplus
}
#endif
#endif /* LIBXML_UNICODE_ENABLED */
#endif /* __XML_UNICODE_H__ */
""");