merged the current state of XML Schemas implementation, it is not

* Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c include/libxml/Makefile.am include/libxml/schemasInternals.h include/libxml/xmlautomata.h include/libxml/xmlregexp.h include/libxml/xmlschemas.h include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h include/libxml/xmlversion.h.in : merged the current state of XML Schemas implementation, it is not configured in by default, a specific --schemas configure option has been added. * test/automata test/regexp test/schemas Makefile.am result/automata result/regexp result/schemas: merged automata/regexp/schemas regression tests Daniel
2025-10-20 03:52:25 +03:00 · 2002-04-16 15:50:10 +00:00
parent f7c730f074
commit 4255d50415
43 changed files with 15353 additions and 9 deletions
--- a/genUnicode.py
+++ b/genUnicode.py
@@ -0,0 +1,256 @@
+#!/usr/bin/python -u
+import sys
+import string
+import time
+
+sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
+
+try:
+    blocks = open("Blocks-4.txt", "r")
+except:
+    print "Missing Blocks-4.txt, aborting ..."
+    sys.exit(1)
+
+BlockNames = {}
+for line in blocks.readlines():
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+	continue
+    try:
+	fields = string.split(line, ';')
+	range = string.strip(fields[0])
+	(start, end) = string.split(range, "..")
+	name = string.strip(fields[1])
+	name = string.replace(name, ' ', '')
+    except:
+        print "Failed to process line: %s" % (line)
+	continue
+    BlockNames[name] = ("0x"+start, "0x"+end)
+blocks.close()
+print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
+
+try:
+    data = open("UnicodeData-3.1.0.txt", "r")
+except:
+    print "Missing UnicodeData-3.1.0.txt, aborting ..."
+    sys.exit(1)
+
+nbchar = 0;
+Categories = {}
+for line in data.readlines():
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+	continue
+    try:
+	fields = string.split(line, ';')
+	point = string.strip(fields[0])
+	value = 0
+	while point != '':
+	    value = value * 16
+	    if point[0] >= '0' and point[0] <= '9':
+	        value = value + ord(point[0]) - ord('0')
+	    elif point[0] >= 'A' and point[0] <= 'F':
+	        value = value + 10 + ord(point[0]) - ord('A')
+	    elif point[0] >= 'a' and point[0] <= 'f':
+	        value = value + 10 + ord(point[0]) - ord('a')
+	    point = point[1:]
+	name = fields[2]
+    except:
+        print "Failed to process line: %s" % (line)
+	continue
+    
+    nbchar = nbchar + 1
+    try:
+	Categories[name].append(value)
+    except:
+        try:
+	    Categories[name] = [value]
+	except:
+	    print "Failed to process line: %s" % (line)
+    try:
+	Categories[name[0]].append(value)
+    except:
+        try:
+	    Categories[name[0]] = [value]
+	except:
+	    print "Failed to process line: %s" % (line)
+	
+blocks.close()
+print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
+#reduce the number list into ranges
+for cat in Categories.keys():
+    list = Categories[cat]
+    start = -1
+    prev = -1
+    end = -1
+    ranges = []
+    for val in list:
+        if start == -1:
+	    start = val
+	    prev = val
+	    continue
+	elif val == prev + 1:
+	    prev = val
+	    continue
+	elif prev == start:
+	    ranges.append((prev, prev))
+	    start = val
+	    prev = val
+	    continue
+	else:
+	    ranges.append((start, prev))
+	    start = val
+	    prev = val
+	    continue
+    if prev == start:
+        ranges.append((prev, prev))
+    else:
+        ranges.append((start, prev))
+    Categories[cat] = ranges
+        
+#
+# Generate the resulting files
+#
+try:
+    header = open("xmlunicode.h", "w")
+except:
+    print "Failed to open xmlunicode.h"
+    sys.exit(1)
+
+try:
+    output = open("xmlunicode.c", "w")
+except:
+    print "Failed to open xmlunicode.c"
+    sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * xmlunicode.h: this header exports interfaces for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+""" % (date, sources));
+output.write(
+"""/*
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_UNICODE_ENABLED
+
+#include <string.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlunicode.h>
+
+""" % (date, sources));
+
+keys = BlockNames.keys()
+keys.sort()
+for block in keys:
+    (start, end) = BlockNames[block]
+    name = string.replace(block, '-', '')
+    header.write("int\txmlUCSIs%s\t(int code);\n" % name)
+    output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
+    output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
+                 (block))
+    output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+    output.write("int\nxmlUCSIs%s(int code) {\n" % name)
+    output.write("    return((code >= %s) && (code <= %s));\n" % (start, end))
+    output.write("}\n\n")
+
+header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
+output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
+output.write(" * @block: UCS block name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
+output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
+keys = BlockNames.keys()
+keys.sort()
+for block in keys:
+    name = string.replace(block, '-', '')
+    output.write("    if (!strcmp(block, \"%s\"))\n        return(xmlUCSIs%s(code));\n" %
+                 (block, name));
+output.write("    return(-1);\n}\n\n")
+
+
+keys = Categories.keys()
+keys.sort()
+for name in keys:
+    ranges = Categories[name]
+    header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
+    output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
+    output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
+                 (name))
+    output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+    output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
+    start = 1
+    for range in ranges:
+        (begin, end) = range;
+	if start:
+	    output.write("    return(");
+	    start = 0
+	else:
+	    output.write(" ||\n           ");
+	if (begin == end):
+	    output.write("(code == %s)" % (hex(begin)))
+	else:
+	    output.write("((code >= %s) && (code <= %s))" % (
+	                 hex(begin), hex(end)))
+    output.write(");\n}\n\n")
+
+header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
+output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
+output.write(" * @cat: UCS Category name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
+output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
+keys = Categories.keys()
+keys.sort()
+for name in keys:
+    output.write("    if (!strcmp(cat, \"%s\"))\n        return(xmlUCSIsCat%s(code));\n" %
+                 (name, name));
+output.write("    return(-1);\n}\n\n")
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_UNICODE_H__ */
+""");
+output.write("""
+#endif /* LIBXML_UNICODE_ENABLED */
+""");
+header.close()
+output.close()