1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-28 23:14:57 +03:00

codegen: Only generate tables for character ranges

The rest can be easily maintained manually.
This commit is contained in:
Nick Wellnhofer
2025-05-16 01:52:44 +02:00
parent 770c6decd8
commit 4cb767e96e
6 changed files with 141 additions and 459 deletions

View File

@@ -202,16 +202,17 @@ CLEANFILES = missing.lst runsuite.log runxmlconf.log test.out \
EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \ EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \
libxml.h \ libxml.h \
codegen/charset.inc \ codegen/charset.inc \
codegen/chvalid.def \
codegen/escape.inc \ codegen/escape.inc \
codegen/genCharset.py \ codegen/genCharset.py \
codegen/genChRanges.py \ codegen/genRanges.py \
codegen/genEscape.py \ codegen/genEscape.py \
codegen/genHtml5Ent.py \ codegen/genHtml5Ent.py \
codegen/genHtml5LibTests.py \ codegen/genHtml5LibTests.py \
codegen/genTestApi.py \ codegen/genTestApi.py \
codegen/genUnicode.py \ codegen/genUnicode.py \
codegen/html5ent.inc \ codegen/html5ent.inc \
codegen/ranges.def \
codegen/ranges.inc \
codegen/xmlmod.py \ codegen/xmlmod.py \
timsort.h \ timsort.h \
README.zOS README.md \ README.zOS README.md \

134
chvalid.c
View File

@@ -1,9 +1,6 @@
/* /*
* chvalid.c: this module implements the character range * chvalid.c: this module implements the character range
* validation APIs * validation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
*/ */
#define IN_LIBXML #define IN_LIBXML
@@ -20,137 +17,8 @@
* allowed. * allowed.
* *
*/ */
const unsigned char xmlIsPubidChar_tab[256] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01,
0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01,
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00 };
static const xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131},
{0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3},
{0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8},
{0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c},
{0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da},
{0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3},
{0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481},
{0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb},
{0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559},
{0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a},
{0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce},
{0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939},
{0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990},
{0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9},
{0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a},
{0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33},
{0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e},
{0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91},
{0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9},
{0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10},
{0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39},
{0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a},
{0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c},
{0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5},
{0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28},
{0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c},
{0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9},
{0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10},
{0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e},
{0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82},
{0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d},
{0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5},
{0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0},
{0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47},
{0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100},
{0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c},
{0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140},
{0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155},
{0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165},
{0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173},
{0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab},
{0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2},
{0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b},
{0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
{0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b},
{0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc},
{0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
{0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
{0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182},
{0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}};
const xmlChRangeGroup xmlIsBaseCharGroup =
{197, 0, xmlIsBaseChar_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff},
{0xe000, 0xfffd}};
static const xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}};
const xmlChRangeGroup xmlIsCharGroup =
{2, 1, xmlIsChar_srng, xmlIsChar_lrng};
static const xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345},
{0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
{0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
{0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df},
{0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903},
{0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954},
{0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be},
{0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
{0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c},
{0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48},
{0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc},
{0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03},
{0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d},
{0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8},
{0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44},
{0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83},
{0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
{0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
{0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e},
{0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd},
{0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39},
{0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b},
{0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7},
{0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f},
{0x3099, 0x3099}, {0x309a, 0x309a}};
const xmlChRangeGroup xmlIsCombiningGroup =
{95, 0, xmlIsCombining_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669},
{0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f},
{0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f},
{0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9},
{0xf20, 0xf29}};
const xmlChRangeGroup xmlIsDigitGroup =
{14, 0, xmlIsDigit_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0},
{0x2d1, 0x2d1}, {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46},
{0xec6, 0xec6}, {0x3005, 0x3005}, {0x3031, 0x3035}, {0x309d, 0x309e},
{0x30fc, 0x30fe}};
const xmlChRangeGroup xmlIsExtenderGroup =
{10, 0, xmlIsExtender_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007},
{0x3021, 0x3029}, {0x4e00, 0x9fa5}};
const xmlChRangeGroup xmlIsIdeographicGroup =
{3, 0, xmlIsIdeographic_srng, (xmlChLRangePtr)0};
#include "codegen/ranges.inc"
/** /**
* Does a binary search of the range table to determine if char * Does a binary search of the range table to determine if char

View File

@@ -51,9 +51,9 @@ Functs = {}
state = 0 state = 0
try: try:
defines = open("codegen/chvalid.def", "r") defines = open("codegen/ranges.def", "r")
except: except:
print("Missing codegen/chvalid.def, aborting ...") print("Missing codegen/ranges.def, aborting ...")
sys.exit(1) sys.exit(1)
# #
@@ -173,160 +173,26 @@ for line in defines.readlines():
except: except:
print("Failed to process line: %s" % (line)) print("Failed to process line: %s" % (line))
raise raise
#
# At this point, the entire definition file has been processed. Now we
# enter the output phase, where we generate the two files chvalid.c and'
# chvalid.h
#
# To do this, we first output the 'static' data (heading, fixed
# definitions, etc.), then output the 'dynamic' data (the results
# of the above processing), and finally output closing 'static' data
# (e.g. the subroutine to process the ranges)
#
#
# Generate the headings:
#
try:
header = open("include/libxml/chvalid.h", "w")
except:
print("Failed to open include/libxml/chvalid.h")
sys.exit(1)
try: try:
output = open("chvalid.c", "w") output = open("codegen/ranges.inc", "w")
except: except:
print("Failed to open chvalid.c") print("Failed to open codegen/ranges.inc")
sys.exit(1) sys.exit(1)
fkeys = sorted(Functs.keys())
header.write(
"""/**
* @file
*
* @brief Unicode character range checking
*
* this module exports interfaces for the character
* range validation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
*/
#ifndef __XML_CHVALID_H__
#define __XML_CHVALID_H__
#include <libxml/xmlversion.h>
#include <libxml/xmlstring.h>
#ifdef __cplusplus
extern "C" {
#endif
/** @cond ignore */
/*
* Define our typedefs and structures
*
*/
typedef struct _xmlChSRange xmlChSRange;
typedef xmlChSRange *xmlChSRangePtr;
struct _xmlChSRange {
unsigned short\tlow;
unsigned short\thigh;
};
typedef struct _xmlChLRange xmlChLRange;
typedef xmlChLRange *xmlChLRangePtr;
struct _xmlChLRange {
unsigned int\tlow;
unsigned int\thigh;
};
typedef struct _xmlChRangeGroup xmlChRangeGroup;
typedef xmlChRangeGroup *xmlChRangeGroupPtr;
struct _xmlChRangeGroup {
int\t\t\tnbShortRange;
int\t\t\tnbLongRange;
const xmlChSRange\t*shortRange;\t/* points to an array of ranges */
const xmlChLRange\t*longRange;
};
""");
for f in fkeys:
if len(Functs[f][1]) > 0:
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
if max(Functs[f][0]) > 0: # only check if at least one entry
rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
/**
* Range checking routine
*/
XMLPUBFUN int
\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
/** @endcond */
""");
output.write(
"""/*
* chvalid.c:\tthis module implements the character range
*\t\tvalidation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
*/
#define IN_LIBXML
#include "libxml.h"
#include <libxml/chvalid.h>
#include <stddef.h>
/*
* The initial tables ({func_name}_tab) are used to validate whether a
* single-byte character is within the specified group. Each table
* contains 256 bytes, with each byte representing one of the 256
* possible characters. If the table byte is set, the character is
* allowed.
*
*/
""");
# #
# Now output the generated data. # Now output the generated data.
# We try to produce the best execution times. Tests have shown that validation
# with direct table lookup is, when there are a "small" number of valid items,
# still not as fast as a sequence of inline compares. So, if the single-byte
# portion of a range has a "small" number of ranges, we output a macro for inline
# compares, otherwise we output a 256-byte table and a macro to use it.
# #
fkeys = sorted(Functs.keys())
for f in fkeys: for f in fkeys:
# First we convert the specified single-byte values into a group of ranges. # First we convert the specified single-byte values into a group of ranges.
# If the total number of such ranges is less than minTableSize, we generate
# an inline macro for direct comparisons; if greater, we generate a lookup
# table.
if max(Functs[f][0]) > 0: # only check if at least one entry if max(Functs[f][0]) > 0: # only check if at least one entry
rangeTable = makeRange(Functs[f][0]) rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable) numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile if numRanges >= minTableSize: # table is worthwhile
header.write("""
/**
* Automatically generated by genChRanges.py
*
* @param c char to validate
*/
""")
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
# write the constant data to the code file # write the constant data to the code file
output.write("const unsigned char %s_tab[256] = {\n" % f) output.write("const unsigned char %s_tab[256] = {\n" % f)
pline = " " pline = " "
@@ -337,94 +203,6 @@ for f in fkeys:
pline = " " pline = " "
output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
else: # inline check is used
# first another little optimisation - if space is present,
# put it at the front of the list so it is checked first
try:
ix = rangeTable.remove((0x20, 0x20))
rangeTable.insert(0, (0x20, 0x20))
except:
pass
firstFlag = 1
header.write("""
/**
* Automatically generated by genChRanges.py
*
* @param c char to validate
*/
""")
# okay, I'm tired of the messy lineup - let's automate it!
pline = "#define %s_ch(c)" % f
# 'ntab' is number of tabs needed to position to col. 33 from name end
ntab = 4 - (len(pline)) // 8
if ntab < 0:
ntab = 0
just = ""
for i in range(ntab):
just += "\t"
pline = pline + just + "("
for rg in rangeTable:
if not firstFlag:
pline += " || \\\n\t\t\t\t "
else:
firstFlag = 0
if rg[0] == rg[1]: # single value - check equal
pline += "((c) == 0x%x)" % rg[0]
else: # value range
# since we are doing char, also change range ending in 0xff
if rg[1] != 0xff:
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
else:
pline += " (0x%x <= (c))" % rg[0]
pline += ")\n"
header.write(pline)
header.write("""
/**
* Automatically generated by genChRanges.py
*
* @param c char to validate
*/
""")
pline = "#define %sQ(c)" % f
ntab = 4 - (len(pline)) // 8
if ntab < 0:
ntab = 0
just = ""
for i in range(ntab):
just += "\t"
header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")
if max(Functs[f][0]) > 0:
header.write("%s_ch((c)) :" % f)
else:
header.write("0 :")
# if no ranges defined, value invalid if >= 0x100
numRanges = len(Functs[f][1])
if numRanges == 0:
header.write(" 0)\n\n")
else:
if numRanges >= minTableSize:
header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n" % f)
else: # if < minTableSize, generate inline code
firstFlag = 1
for rg in Functs[f][1]:
if not firstFlag:
pline += " || \\\n\t\t\t\t "
else:
firstFlag = 0
pline = "\\\n\t\t\t\t("
if rg[0] == rg[1]: # single value - check equal
pline += "((c) == 0x%x)" % rg[0]
else: # value range
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
pline += "))\n"
header.write(pline)
# #
# Next we do the unicode ranges # Next we do the unicode ranges
# #
@@ -474,98 +252,5 @@ for f in fkeys:
output.write(pline + "};\n\n") output.write(pline + "};\n\n")
output.write(
"""
/**
* Does a binary search of the range table to determine if char
* is valid
*
* @param val character to be validated
* @param rptr pointer to range to be used to validate
* @returns true if character valid, false otherwise
*/
int
xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
int low, high, mid;
const xmlChSRange *sptr;
const xmlChLRange *lptr;
if (rptr == NULL) return(0);
if (val < 0x10000) {\t/* is val in 'short' or 'long' array? */
\tif (rptr->nbShortRange == 0)
\t return 0;
\tlow = 0;
\thigh = rptr->nbShortRange - 1;
\tsptr = rptr->shortRange;
\twhile (low <= high) {
\t mid = (low + high) / 2;
\t if ((unsigned short) val < sptr[mid].low) {
\t\thigh = mid - 1;
\t } else {
\t\tif ((unsigned short) val > sptr[mid].high) {
\t\t low = mid + 1;
\t\t} else {
\t\t return 1;
\t\t}
\t }
\t}
} else {
\tif (rptr->nbLongRange == 0) {
\t return 0;
\t}
\tlow = 0;
\thigh = rptr->nbLongRange - 1;
\tlptr = rptr->longRange;
\twhile (low <= high) {
\t mid = (low + high) / 2;
\t if (val < lptr[mid].low) {
\t\thigh = mid - 1;
\t } else {
\t\tif (val > lptr[mid].high) {
\t\t low = mid + 1;
\t\t} else {
\t\t return 1;
\t\t}
\t }
\t}
}
return 0;
}
""");
#
# finally, generate the ABI compatibility functions
#
for f in fkeys:
output.write("""
/**
* This function is DEPRECATED.
""");
if max(Functs[f][0]) > 0:
output.write(" * Use %s_ch() or %sQ() instead" % (f, f))
else:
output.write(" * Use %sQ() instead" % f)
output.write("""
*
* @param ch character to validate
* @returns true if argument valid, false otherwise
*/
""")
output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f))
header.write("XMLPUBFUN int\n\t\t%s(unsigned int ch);\n" % f);
#
# Run complete - write trailers and close the output files
#
header.write("""
#ifdef __cplusplus
}
#endif
#endif /* __XML_CHVALID_H__ */
""")
header.close()
output.close() output.close()

131
codegen/ranges.inc Normal file
View File

@@ -0,0 +1,131 @@
const unsigned char xmlIsPubidChar_tab[256] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01,
0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01,
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00 };
static const xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131},
{0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3},
{0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8},
{0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c},
{0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da},
{0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3},
{0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481},
{0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb},
{0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559},
{0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a},
{0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce},
{0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939},
{0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990},
{0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9},
{0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a},
{0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33},
{0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e},
{0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91},
{0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9},
{0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10},
{0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39},
{0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a},
{0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c},
{0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5},
{0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28},
{0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c},
{0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9},
{0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10},
{0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e},
{0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82},
{0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d},
{0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5},
{0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0},
{0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47},
{0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100},
{0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c},
{0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140},
{0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155},
{0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165},
{0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173},
{0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab},
{0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2},
{0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b},
{0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
{0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b},
{0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc},
{0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
{0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
{0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182},
{0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}};
const xmlChRangeGroup xmlIsBaseCharGroup =
{197, 0, xmlIsBaseChar_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff},
{0xe000, 0xfffd}};
static const xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}};
const xmlChRangeGroup xmlIsCharGroup =
{2, 1, xmlIsChar_srng, xmlIsChar_lrng};
static const xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345},
{0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
{0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
{0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df},
{0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903},
{0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954},
{0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be},
{0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
{0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c},
{0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48},
{0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc},
{0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03},
{0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d},
{0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8},
{0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44},
{0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83},
{0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
{0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
{0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e},
{0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd},
{0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39},
{0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b},
{0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7},
{0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f},
{0x3099, 0x3099}, {0x309a, 0x309a}};
const xmlChRangeGroup xmlIsCombiningGroup =
{95, 0, xmlIsCombining_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669},
{0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f},
{0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f},
{0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9},
{0xf20, 0xf29}};
const xmlChRangeGroup xmlIsDigitGroup =
{14, 0, xmlIsDigit_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0},
{0x2d1, 0x2d1}, {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46},
{0xec6, 0xec6}, {0x3005, 0x3005}, {0x3031, 0x3035}, {0x309d, 0x309e},
{0x30fc, 0x30fe}};
const xmlChRangeGroup xmlIsExtenderGroup =
{10, 0, xmlIsExtender_srng, (xmlChLRangePtr)0};
static const xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007},
{0x3021, 0x3029}, {0x4e00, 0x9fa5}};
const xmlChRangeGroup xmlIsIdeographicGroup =
{3, 0, xmlIsIdeographic_srng, (xmlChLRangePtr)0};

View File

@@ -5,9 +5,6 @@
* *
* this module exports interfaces for the character * this module exports interfaces for the character
* range validation APIs * range validation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
*/ */
#ifndef __XML_CHVALID_H__ #ifndef __XML_CHVALID_H__