mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-13 09:01:53 +03:00
merged the current state of XML Schemas implementation, it is not
* Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c include/libxml/Makefile.am include/libxml/schemasInternals.h include/libxml/xmlautomata.h include/libxml/xmlregexp.h include/libxml/xmlschemas.h include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h include/libxml/xmlversion.h.in : merged the current state of XML Schemas implementation, it is not configured in by default, a specific --schemas configure option has been added. * test/automata test/regexp test/schemas Makefile.am result/automata result/regexp result/schemas: merged automata/regexp/schemas regression tests Daniel
This commit is contained in:
256
genUnicode.py
Executable file
256
genUnicode.py
Executable file
@ -0,0 +1,256 @@
|
||||
#!/usr/bin/python -u
|
||||
import sys
|
||||
import string
|
||||
import time
|
||||
|
||||
sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
|
||||
|
||||
try:
|
||||
blocks = open("Blocks-4.txt", "r")
|
||||
except:
|
||||
print "Missing Blocks-4.txt, aborting ..."
|
||||
sys.exit(1)
|
||||
|
||||
BlockNames = {}
|
||||
for line in blocks.readlines():
|
||||
if line[0] == '#':
|
||||
continue
|
||||
line = string.strip(line)
|
||||
if line == '':
|
||||
continue
|
||||
try:
|
||||
fields = string.split(line, ';')
|
||||
range = string.strip(fields[0])
|
||||
(start, end) = string.split(range, "..")
|
||||
name = string.strip(fields[1])
|
||||
name = string.replace(name, ' ', '')
|
||||
except:
|
||||
print "Failed to process line: %s" % (line)
|
||||
continue
|
||||
BlockNames[name] = ("0x"+start, "0x"+end)
|
||||
blocks.close()
|
||||
print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
|
||||
|
||||
try:
|
||||
data = open("UnicodeData-3.1.0.txt", "r")
|
||||
except:
|
||||
print "Missing UnicodeData-3.1.0.txt, aborting ..."
|
||||
sys.exit(1)
|
||||
|
||||
nbchar = 0;
|
||||
Categories = {}
|
||||
for line in data.readlines():
|
||||
if line[0] == '#':
|
||||
continue
|
||||
line = string.strip(line)
|
||||
if line == '':
|
||||
continue
|
||||
try:
|
||||
fields = string.split(line, ';')
|
||||
point = string.strip(fields[0])
|
||||
value = 0
|
||||
while point != '':
|
||||
value = value * 16
|
||||
if point[0] >= '0' and point[0] <= '9':
|
||||
value = value + ord(point[0]) - ord('0')
|
||||
elif point[0] >= 'A' and point[0] <= 'F':
|
||||
value = value + 10 + ord(point[0]) - ord('A')
|
||||
elif point[0] >= 'a' and point[0] <= 'f':
|
||||
value = value + 10 + ord(point[0]) - ord('a')
|
||||
point = point[1:]
|
||||
name = fields[2]
|
||||
except:
|
||||
print "Failed to process line: %s" % (line)
|
||||
continue
|
||||
|
||||
nbchar = nbchar + 1
|
||||
try:
|
||||
Categories[name].append(value)
|
||||
except:
|
||||
try:
|
||||
Categories[name] = [value]
|
||||
except:
|
||||
print "Failed to process line: %s" % (line)
|
||||
try:
|
||||
Categories[name[0]].append(value)
|
||||
except:
|
||||
try:
|
||||
Categories[name[0]] = [value]
|
||||
except:
|
||||
print "Failed to process line: %s" % (line)
|
||||
|
||||
blocks.close()
|
||||
print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
|
||||
#reduce the number list into ranges
|
||||
for cat in Categories.keys():
|
||||
list = Categories[cat]
|
||||
start = -1
|
||||
prev = -1
|
||||
end = -1
|
||||
ranges = []
|
||||
for val in list:
|
||||
if start == -1:
|
||||
start = val
|
||||
prev = val
|
||||
continue
|
||||
elif val == prev + 1:
|
||||
prev = val
|
||||
continue
|
||||
elif prev == start:
|
||||
ranges.append((prev, prev))
|
||||
start = val
|
||||
prev = val
|
||||
continue
|
||||
else:
|
||||
ranges.append((start, prev))
|
||||
start = val
|
||||
prev = val
|
||||
continue
|
||||
if prev == start:
|
||||
ranges.append((prev, prev))
|
||||
else:
|
||||
ranges.append((start, prev))
|
||||
Categories[cat] = ranges
|
||||
|
||||
#
|
||||
# Generate the resulting files
|
||||
#
|
||||
try:
|
||||
header = open("xmlunicode.h", "w")
|
||||
except:
|
||||
print "Failed to open xmlunicode.h"
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
output = open("xmlunicode.c", "w")
|
||||
except:
|
||||
print "Failed to open xmlunicode.c"
|
||||
sys.exit(1)
|
||||
|
||||
date = time.asctime(time.localtime(time.time()))
|
||||
|
||||
header.write(
|
||||
"""/*
|
||||
* xmlunicode.h: this header exports interfaces for the Unicode character APIs
|
||||
*
|
||||
* This file is automatically generated from the
|
||||
* UCS description files of the Unicode Character Database
|
||||
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
|
||||
* using the genUnicode.py Python script.
|
||||
*
|
||||
* Generation date: %s
|
||||
* Sources: %s
|
||||
* Daniel Veillard <veillard@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef __XML_UNICODE_H__
|
||||
#define __XML_UNICODE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
""" % (date, sources));
|
||||
output.write(
|
||||
"""/*
|
||||
* xmlunicode.c: this module implements the Unicode character APIs
|
||||
*
|
||||
* This file is automatically generated from the
|
||||
* UCS description files of the Unicode Character Database
|
||||
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
|
||||
* using the genUnicode.py Python script.
|
||||
*
|
||||
* Generation date: %s
|
||||
* Sources: %s
|
||||
* Daniel Veillard <veillard@redhat.com>
|
||||
*/
|
||||
|
||||
#define IN_LIBXML
|
||||
#include "libxml.h"
|
||||
|
||||
#ifdef LIBXML_UNICODE_ENABLED
|
||||
|
||||
#include <string.h>
|
||||
#include <libxml/xmlversion.h>
|
||||
#include <libxml/xmlunicode.h>
|
||||
|
||||
""" % (date, sources));
|
||||
|
||||
keys = BlockNames.keys()
|
||||
keys.sort()
|
||||
for block in keys:
|
||||
(start, end) = BlockNames[block]
|
||||
name = string.replace(block, '-', '')
|
||||
header.write("int\txmlUCSIs%s\t(int code);\n" % name)
|
||||
output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
|
||||
output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
|
||||
(block))
|
||||
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
|
||||
output.write("int\nxmlUCSIs%s(int code) {\n" % name)
|
||||
output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
|
||||
output.write("}\n\n")
|
||||
|
||||
header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
|
||||
output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
|
||||
output.write(" * @block: UCS block name\n")
|
||||
output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
|
||||
output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
|
||||
output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
|
||||
keys = BlockNames.keys()
|
||||
keys.sort()
|
||||
for block in keys:
|
||||
name = string.replace(block, '-', '')
|
||||
output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
|
||||
(block, name));
|
||||
output.write(" return(-1);\n}\n\n")
|
||||
|
||||
|
||||
keys = Categories.keys()
|
||||
keys.sort()
|
||||
for name in keys:
|
||||
ranges = Categories[name]
|
||||
header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
|
||||
output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
|
||||
output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
|
||||
(name))
|
||||
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
|
||||
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
|
||||
start = 1
|
||||
for range in ranges:
|
||||
(begin, end) = range;
|
||||
if start:
|
||||
output.write(" return(");
|
||||
start = 0
|
||||
else:
|
||||
output.write(" ||\n ");
|
||||
if (begin == end):
|
||||
output.write("(code == %s)" % (hex(begin)))
|
||||
else:
|
||||
output.write("((code >= %s) && (code <= %s))" % (
|
||||
hex(begin), hex(end)))
|
||||
output.write(");\n}\n\n")
|
||||
|
||||
header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
|
||||
output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
|
||||
output.write(" * @cat: UCS Category name\n")
|
||||
output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
|
||||
output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
|
||||
output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
|
||||
keys = Categories.keys()
|
||||
keys.sort()
|
||||
for name in keys:
|
||||
output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
|
||||
(name, name));
|
||||
output.write(" return(-1);\n}\n\n")
|
||||
|
||||
header.write("""
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* __XML_UNICODE_H__ */
|
||||
""");
|
||||
output.write("""
|
||||
#endif /* LIBXML_UNICODE_ENABLED */
|
||||
""");
|
||||
header.close()
|
||||
output.close()
|
Reference in New Issue
Block a user