mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-26 00:37:43 +03:00
codegen: Merge xmlunicode.c into xmlregexp.c
Include generated parts. Generate xmlChRangeGroups instead of functions for Unicode blocks.
This commit is contained in:
@@ -12,8 +12,6 @@
|
||||
import sys
|
||||
import string
|
||||
|
||||
webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
|
||||
|
||||
#
|
||||
# blockAliases is a small hack - it is used for mapping block names which
|
||||
# were were used in the 3.1 release, but are missing or changed in the current
|
||||
@@ -60,8 +58,8 @@ for line in blocks.readlines():
|
||||
except:
|
||||
print("Failed to process line: %s" % (line))
|
||||
continue
|
||||
start = "0x" + start
|
||||
end = "0x" + end
|
||||
start = int(start, 16)
|
||||
end = int(end, 16)
|
||||
try:
|
||||
BlockNames[name].append((start, end))
|
||||
except:
|
||||
@@ -142,7 +140,7 @@ for line in data.readlines():
|
||||
except:
|
||||
print("Failed to process line: %s" % (line))
|
||||
|
||||
blocks.close()
|
||||
data.close()
|
||||
print("Parsed %d char generating %d categories" % (nbchar, len(Categories.keys())))
|
||||
|
||||
#
|
||||
@@ -191,55 +189,17 @@ ckeys = sorted(Categories.keys())
|
||||
# Generate the resulting files
|
||||
#
|
||||
try:
|
||||
output = open("xmlunicode.c", "w")
|
||||
output = open("codegen/unicode.inc", "w")
|
||||
except:
|
||||
print("Failed to open xmlunicode.c")
|
||||
print("Failed to open codegen/unicode.inc")
|
||||
sys.exit(1)
|
||||
|
||||
output.write(
|
||||
"""/*
|
||||
* xmlunicode.c: this module implements the Unicode character APIs
|
||||
*
|
||||
* This file is automatically generated from the
|
||||
* UCS description files of the Unicode Character Database
|
||||
* %s
|
||||
* using the genUnicode.py Python script.
|
||||
*/
|
||||
|
||||
#define IN_LIBXML
|
||||
#include "libxml.h"
|
||||
|
||||
#ifdef LIBXML_REGEXP_ENABLED
|
||||
|
||||
#include <string.h>
|
||||
#include <libxml/xmlversion.h>
|
||||
#include <libxml/chvalid.h>
|
||||
|
||||
#include "private/unicode.h"
|
||||
|
||||
typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */
|
||||
|
||||
typedef struct {
|
||||
const char *rangename;
|
||||
xmlIntFunc *func;
|
||||
} xmlUnicodeRange;
|
||||
|
||||
typedef struct {
|
||||
const xmlUnicodeRange *table;
|
||||
int numentries;
|
||||
} xmlUnicodeNameTable;
|
||||
|
||||
|
||||
static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname);
|
||||
|
||||
""" % webpage);
|
||||
|
||||
#
|
||||
# For any categories with more than minTableSize ranges we generate
|
||||
# a range table suitable for xmlCharInRange
|
||||
#
|
||||
for name in ckeys:
|
||||
if len(Categories[name]) > minTableSize:
|
||||
if len(Categories[name]) > minTableSize and name != 'Cs':
|
||||
numshort = 0
|
||||
numlong = 0
|
||||
ranges = Categories[name]
|
||||
@@ -257,7 +217,7 @@ for name in ckeys:
|
||||
else:
|
||||
if numlong == 0:
|
||||
if numshort > 0:
|
||||
output.write(pline + " };\n")
|
||||
output.write(pline + "};\n")
|
||||
pline = "static const xmlChLRange xml%sL[] = {" % name
|
||||
lptr = "xml%sL" % name
|
||||
else:
|
||||
@@ -269,66 +229,14 @@ for name in ckeys:
|
||||
elif pline[-1:] == ",":
|
||||
pline += " "
|
||||
pline += "{%s, %s}" % (hex(low), hex(high))
|
||||
output.write(pline + " };\nstatic const xmlChRangeGroup xml%sG = {%s,%s,%s,%s};\n\n"
|
||||
output.write(pline + "};\nstatic const xmlChRangeGroup xml%sG = {%s,%s,%s,%s};\n\n"
|
||||
% (name, numshort, numlong, sptr, lptr))
|
||||
|
||||
|
||||
output.write(
|
||||
"""/**
|
||||
* binary table lookup for user-supplied name
|
||||
*
|
||||
* @param tptr pointer to the name table
|
||||
* @param tname name to be found
|
||||
* @returns pointer to range function if found, otherwise NULL
|
||||
*/
|
||||
static xmlIntFunc
|
||||
*xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) {
|
||||
int low, high, mid, cmp;
|
||||
const xmlUnicodeRange *sptr;
|
||||
|
||||
if ((tptr == NULL) || (tname == NULL)) return(NULL);
|
||||
|
||||
low = 0;
|
||||
high = tptr->numentries - 1;
|
||||
sptr = tptr->table;
|
||||
while (low <= high) {
|
||||
mid = (low + high) / 2;
|
||||
cmp = strcmp(tname, sptr[mid].rangename);
|
||||
if (cmp == 0)
|
||||
return (sptr[mid].func);
|
||||
if (cmp < 0)
|
||||
high = mid - 1;
|
||||
else
|
||||
low = mid + 1;
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
""")
|
||||
|
||||
for block in bkeys:
|
||||
name = block.replace('-', '')
|
||||
output.write("/**\n * Check whether the character is part of %s UCS Block\n"%
|
||||
(block))
|
||||
output.write(" *\n * @param code UCS code point\n")
|
||||
output.write(" * @returns 1 if true 0 otherwise\n */\n");
|
||||
output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name)
|
||||
flag = 0
|
||||
for (start, end) in BlockNames[block]:
|
||||
if flag:
|
||||
output.write(" ||\n ")
|
||||
else:
|
||||
flag = 1
|
||||
output.write("((code >= %s) && (code <= %s))" % (start, end))
|
||||
output.write(");\n}\n\n")
|
||||
|
||||
for name in ckeys:
|
||||
if name == 'Cs':
|
||||
continue
|
||||
ranges = Categories[name]
|
||||
output.write("/**\n * Check whether the character is part of %s UCS Category\n"%
|
||||
(name))
|
||||
output.write(" *\n * @param code UCS code point\n")
|
||||
output.write(" * @returns 1 if true 0 otherwise\n */\n");
|
||||
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
|
||||
output.write("static int\nxmlUCSIsCat%s(int code) {\n" % name)
|
||||
if len(Categories[name]) > minTableSize:
|
||||
output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)"
|
||||
% name)
|
||||
@@ -348,69 +256,53 @@ for name in ckeys:
|
||||
hex(begin), hex(end)))
|
||||
output.write(");\n}\n\n")
|
||||
|
||||
output.write(
|
||||
"""static const xmlUnicodeRange xmlUnicodeBlocks[] = {""")
|
||||
#
|
||||
# Range tables for blocks
|
||||
#
|
||||
|
||||
blockGroups = ''
|
||||
flag = 0
|
||||
for block in bkeys:
|
||||
name = block.replace('-', '')
|
||||
if flag:
|
||||
output.write(',\n')
|
||||
else:
|
||||
numshort = 0
|
||||
numlong = 0
|
||||
ranges = BlockNames[block]
|
||||
sptr = "NULL"
|
||||
lptr = "NULL"
|
||||
for range in ranges:
|
||||
(low, high) = range
|
||||
if high < 0x10000:
|
||||
if numshort == 0:
|
||||
pline = "static const xmlChSRange xml%sS[] = {" % name
|
||||
sptr = "xml%sS" % name
|
||||
else:
|
||||
pline += ","
|
||||
numshort += 1
|
||||
else:
|
||||
if numlong == 0:
|
||||
if numshort > 0:
|
||||
output.write(pline + "};\n")
|
||||
pline = "static const xmlChLRange xml%sL[] = {" % name
|
||||
lptr = "xml%sL" % name
|
||||
else:
|
||||
pline += ","
|
||||
numlong += 1
|
||||
if len(pline) > 60:
|
||||
output.write(pline + "\n")
|
||||
pline = " "
|
||||
elif pline[-1:] == ",":
|
||||
pline += " "
|
||||
pline += "{%s, %s}" % (hex(low), hex(high))
|
||||
output.write(pline + "};\n\n")
|
||||
if flag == 0:
|
||||
flag = 1
|
||||
output.write(' {"%s", xmlUCSIs%s}' % (block, name))
|
||||
output.write('};\n\n')
|
||||
|
||||
output.write('static const xmlUnicodeRange xmlUnicodeCats[] = {\n')
|
||||
flag = 0;
|
||||
for name in ckeys:
|
||||
if flag:
|
||||
output.write(',\n')
|
||||
else:
|
||||
flag = 1
|
||||
output.write(' {"%s", xmlUCSIsCat%s}' % (name, name))
|
||||
blockGroups += ",\n"
|
||||
blockGroups += ' {"%s",\n {%s,%s,%s,%s}}' % (block, numshort, numlong,
|
||||
sptr, lptr)
|
||||
|
||||
output.write(
|
||||
"""};
|
||||
|
||||
static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
|
||||
static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
|
||||
|
||||
/**
|
||||
* Check whether the character is part of the UCS Block
|
||||
*
|
||||
* @param code UCS code point
|
||||
* @param block UCS block name
|
||||
* @returns 1 if true, 0 if false and -1 on unknown block
|
||||
*/
|
||||
int
|
||||
xmlUCSIsBlock(int code, const char *block) {
|
||||
xmlIntFunc *func;
|
||||
|
||||
func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
|
||||
if (func == NULL)
|
||||
return (-1);
|
||||
return (func(code));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the character is part of the UCS Category
|
||||
*
|
||||
* @param code UCS code point
|
||||
* @param cat UCS Category name
|
||||
* @returns 1 if true, 0 if false and -1 on unknown category
|
||||
*/
|
||||
int
|
||||
xmlUCSIsCat(int code, const char *cat) {
|
||||
xmlIntFunc *func;
|
||||
|
||||
func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
|
||||
if (func == NULL)
|
||||
return (-1);
|
||||
return (func(code));
|
||||
}
|
||||
|
||||
#endif /* LIBXML_REGEXP_ENABLED */
|
||||
""" % (len(BlockNames), len(Categories)))
|
||||
output.write("static const xmlUnicodeRange xmlUnicodeBlocks[] = {\n")
|
||||
output.write(blockGroups)
|
||||
output.write("\n};\n\n")
|
||||
|
||||
output.close()
|
||||
|
||||
Reference in New Issue
Block a user