1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-30 22:43:14 +03:00

codegen: Consolidate tools for code generation

Move tools, source files and output tables into codegen directory.

Rename some files.

Adjust tools to match modified files. Remove generation date and source
files from output.

Distribute all tools and sources.
This commit is contained in:
Nick Wellnhofer
2025-05-15 17:49:49 +02:00
parent 0d34d690c4
commit 258d870629
18 changed files with 256 additions and 262 deletions

View File

@ -2432,7 +2432,7 @@ htmlCodePointToUtf8(int c, xmlChar *out, int *osize) {
return(out);
}
#include "html5ent.inc"
#include "codegen/html5ent.inc"
#define ENT_F_SEMICOLON 0x80u
#define ENT_F_SUBTABLE 0x40u

View File

@ -154,7 +154,7 @@ testdso_la_LDFLAGS = $(AM_LDFLAGS) \
-module -no-undefined -avoid-version -rpath $(libdir)
rebuild_testapi:
cd $(srcdir) && python3 tools/gentest.py $(abs_builddir)
cd $(srcdir) && python3 codegen/genTestApi.py $(abs_builddir)
testapi_SOURCES=testapi.c
testapi_DEPENDENCIES = $(DEPS)
@ -201,9 +201,18 @@ CLEANFILES = missing.lst runsuite.log runxmlconf.log test.out \
EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \
libxml.h \
html5ent.inc iso8859x.inc \
tools/gentest.py tools/xmlmod.py \
tools/genChRanges.py tools/genEscape.py tools/genUnicode.py \
codegen/charset.inc \
codegen/chvalid.def \
codegen/escape.inc \
codegen/genCharset.py \
codegen/genChRanges.py \
codegen/genEscape.py \
codegen/genHtml5Ent.py \
codegen/genHtml5LibTests.py \
codegen/genTestApi.py \
codegen/genUnicode.py \
codegen/html5ent.inc \
codegen/xmlmod.py \
timsort.h \
README.zOS README.md \
CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \

66
codegen/escape.inc Normal file
View File

@ -0,0 +1,66 @@
static const char xmlEscapeContent[] = {
8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#',
'9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1',
'3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a',
'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't',
';',
};
static const signed char xmlEscapeTab[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabQuot[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabAttr[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#ifdef LIBXML_HTML_ENABLED
static const signed char htmlEscapeTab[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char htmlEscapeTabAttr[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#endif /* LIBXML_HTML_ENABLED */

View File

@ -16,7 +16,6 @@
#
import sys
import time
#
# A routine to take a list of yes/no (1, 0) values and turn it
@ -41,8 +40,6 @@ def makeRange(lst):
pos = e + 1 # ready to check for next range
return ret
sources = "chvalid.def" # input filename
# minTableSize gives the minimum number of ranges which must be present
# before a 256-byte lookup table is produced. If there are less than this
# number, a macro with inline comparisons is generated
@ -54,9 +51,9 @@ Functs = {}
state = 0
try:
defines = open("chvalid.def", "r")
defines = open("codegen/chvalid.def", "r")
except:
print("Missing chvalid.def, aborting ...")
print("Missing codegen/chvalid.def, aborting ...")
sys.exit(1)
#
@ -202,19 +199,19 @@ except:
print("Failed to open chvalid.c")
sys.exit(1)
date = time.asctime(time.localtime(time.time()))
fkeys = sorted(Functs.keys())
header.write(
"""/*
* Summary: Unicode character range checking
* Description: this module exports interfaces for the character
"""/**
* @file
*
* @brief Unicode character range checking
*
* this module exports interfaces for the character
* range validation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
*
* Generation date: %s
* Sources: %s
*/
#ifndef __XML_CHVALID_H__
@ -227,6 +224,8 @@ header.write(
extern "C" {
#endif
/** @cond ignore */
/*
* Define our typedefs and structures
*
@ -254,13 +253,27 @@ struct _xmlChRangeGroup {
const xmlChLRange\t*longRange;
};
""");
for f in fkeys:
if len(Functs[f][1]) > 0:
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
if max(Functs[f][0]) > 0: # only check if at least one entry
rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
/**
* Range checking routine
*/
XMLPUBFUN int
\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
""" % (date, sources));
/** @endcond */
""");
output.write(
"""/*
* chvalid.c:\tthis module implements the character range
@ -268,9 +281,6 @@ output.write(
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
*
* Generation date: %s
* Sources: %s
*/
#define IN_LIBXML
@ -287,7 +297,7 @@ output.write(
* allowed.
*
*/
""" % (date, sources));
""");
#
# Now output the generated data.
@ -298,8 +308,6 @@ output.write(
# compares, otherwise we output a 256-byte table and a macro to use it.
#
fkeys = sorted(Functs.keys())
for f in fkeys:
# First we convert the specified single-byte values into a group of ranges.
@ -310,15 +318,13 @@ for f in fkeys:
rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
/**
* %s_ch:
* @c: char to validate
*
* Automatically generated by genChRanges.py
*
* @param c char to validate
*/
""" % f)
""")
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
# write the constant data to the code file
@ -343,12 +349,11 @@ for f in fkeys:
header.write("""
/**
* %s_ch:
* @c: char to validate
*
* Automatically generated by genChRanges.py
*
* @param c char to validate
*/
""" % f)
""")
# okay, I'm tired of the messy lineup - let's automate it!
pline = "#define %s_ch(c)" % f
# 'ntab' is number of tabs needed to position to col. 33 from name end
@ -378,12 +383,11 @@ for f in fkeys:
header.write("""
/**
* %sQ:
* @c: char to validate
*
* Automatically generated by genChRanges.py
*
* @param c char to validate
*/
""" % f)
""")
pline = "#define %sQ(c)" % f
ntab = 4 - (len(pline)) // 8
if ntab < 0:
@ -403,7 +407,7 @@ for f in fkeys:
header.write(" 0)\n\n")
else:
if numRanges >= minTableSize:
header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f)
header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n" % f)
else: # if < minTableSize, generate inline code
firstFlag = 1
for rg in Functs[f][1]:
@ -417,14 +421,10 @@ for f in fkeys:
else: # value range
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
pline += "))\n\n"
pline += "))\n"
header.write(pline)
if len(Functs[f][1]) > 0:
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
#
# Next we do the unicode ranges
#
@ -477,14 +477,12 @@ for f in fkeys:
output.write(
"""
/**
* xmlCharInRange:
* @val: character to be validated
* @rptr: pointer to range to be used to validate
*
* Does a binary search of the range table to determine if char
* is valid
*
* Returns: true if character valid, false otherwise
* @param val character to be validated
* @param rptr pointer to range to be used to validate
* @returns true if character valid, false otherwise
*/
int
xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
@ -542,18 +540,16 @@ xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
for f in fkeys:
output.write("""
/**
* %s:
* @ch: character to validate
*
* This function is DEPRECATED.
""" % f);
""");
if max(Functs[f][0]) > 0:
output.write(" * Use %s_ch or %sQ instead" % (f, f))
output.write(" * Use %s_ch() or %sQ() instead" % (f, f))
else:
output.write(" * Use %sQ instead" % f)
output.write(" * Use %sQ() instead" % f)
output.write("""
*
* Returns true if argument valid, false otherwise
* @param ch character to validate
* @returns true if argument valid, false otherwise
*/
""")
output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f))

View File

@ -73,7 +73,7 @@ def genTranscodeTable(out, name, chars):
printHexTable(out, 2, data)
out.write('};\n\n')
out = open(f'iso8859x.inc', 'w')
out = open(f'codegen/charset.inc', 'w')
out.write('''/*
* Lookup tables for transcoding of 8-bit character sets.

69
codegen/genEscape.py Executable file
View File

@ -0,0 +1,69 @@
#!/usr/bin/env python3
entities = [
[ '', '&#xFFFD;' ],
[ '\t', '&#9;' ],
[ '\n', '&#10;' ],
[ '\r', '&#13;' ],
[ '"', '&quot;' ],
[ '&', '&amp;' ],
[ '<', '&lt;' ],
[ '>', '&gt;' ],
]
offset = [ None ] * 128
def gen_content(out):
pos = 0
r = ''
for rec in entities:
char, repl = rec
if char:
offset[ord(char)] = pos
if pos % 12 == 0: r += '\n '
else: r += ' '
r += '%3d,' % len(repl)
pos += 1
for c in repl:
if pos % 12 == 0: r += '\n '
else: r += ' '
r += "'%s'," % c
pos += 1
out.write('static const char xmlEscapeContent[] = {%s\n};\n\n' % r)
def gen_tab(out, name, escape, is_xml):
r = ''
for i in range(0x80):
if chr(i) in escape:
v = offset[i]
elif i == 0:
v = 0
elif is_xml and i < 32 and i != 9 and i != 10:
v = 0
else:
v = -1
if i % 16 == 0: r += '\n '
else: r += ' '
r += '%2d,' % v
out.write('static const signed char %s[128] = {%s\n};\n\n' % (name, r))
with open('codegen/escape.inc', 'w') as out:
gen_content(out)
gen_tab(out, 'xmlEscapeTab', '\r&<>', True)
gen_tab(out, 'xmlEscapeTabQuot', '\r"&<>', True)
gen_tab(out, 'xmlEscapeTabAttr', '\t\n\r"&<>', True)
out.write('#ifdef LIBXML_HTML_ENABLED\n\n')
gen_tab(out, 'htmlEscapeTab', '&<>', False)
gen_tab(out, 'htmlEscapeTabAttr', '"&', False)
out.write('#endif /* LIBXML_HTML_ENABLED */\n')

View File

@ -162,8 +162,9 @@ def gen_table(ctype, cname, values, fmt, elems_per_line):
else: r += ' '
r += fmt % values[i]
return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n'
return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n\n'
print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))
with open('codegen/html5ent.inc', 'w') as out:
out.write(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
out.write(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
out.write(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))

View File

68
tools/gentest.py → codegen/genTestApi.py Normal file → Executable file
View File

@ -13,40 +13,40 @@ import xmlmod
# Globals
dtors = {
'htmlDocPtr': 'xmlFreeDoc',
'htmlParserCtxtPtr': 'htmlFreeParserCtxt',
'xmlAutomataPtr': 'xmlFreeAutomata',
'xmlBufferPtr': 'xmlBufferFree',
'xmlCatalogPtr': 'xmlFreeCatalog',
'htmlDoc *': 'xmlFreeDoc',
'htmlParserCtxt *': 'htmlFreeParserCtxt',
'xmlAutomata *': 'xmlFreeAutomata',
'xmlBuffer *': 'xmlBufferFree',
'xmlCatalog *': 'xmlFreeCatalog',
'xmlChar *': 'xmlFree',
'xmlDOMWrapCtxtPtr': 'xmlDOMWrapFreeCtxt',
'xmlDictPtr': 'xmlDictFree',
'xmlDocPtr': 'xmlFreeDoc',
'xmlDtdPtr': 'xmlFreeDtd',
'xmlEntitiesTablePtr': 'xmlFreeEntitiesTable',
'xmlEnumerationPtr': 'xmlFreeEnumeration',
'xmlListPtr': 'xmlListDelete',
'xmlModulePtr': 'xmlModuleFree',
'xmlMutexPtr': 'xmlFreeMutex',
'xmlNodePtr': 'xmlFreeNode',
'xmlNodeSetPtr': 'xmlXPathFreeNodeSet',
'xmlNsPtr': 'xmlFreeNs',
'xmlOutputBufferPtr': 'xmlOutputBufferClose',
'xmlParserCtxtPtr': 'xmlFreeParserCtxt',
'xmlParserInputBufferPtr': 'xmlFreeParserInputBuffer',
'xmlParserInputPtr': 'xmlFreeInputStream',
'xmlRMutexPtr': 'xmlFreeRMutex',
'xmlRelaxNGValidCtxtPtr': 'xmlRelaxNGFreeValidCtxt',
'xmlSaveCtxtPtr': 'xmlSaveClose',
'xmlSchemaFacetPtr': 'xmlSchemaFreeFacet',
'xmlSchemaValPtr': 'xmlSchemaFreeValue',
'xmlSchemaValidCtxtPtr': 'xmlSchemaFreeValidCtxt',
'xmlTextWriterPtr': 'xmlFreeTextWriter',
'xmlURIPtr': 'xmlFreeURI',
'xmlValidCtxtPtr': 'xmlFreeValidCtxt',
'xmlXPathContextPtr': 'xmlXPathFreeContext',
'xmlXPathParserContextPtr': 'xmlXPathFreeParserContext',
'xmlXPathObjectPtr': 'xmlXPathFreeObject',
'xmlDOMWrapCtxt *': 'xmlDOMWrapFreeCtxt',
'xmlDict *': 'xmlDictFree',
'xmlDoc *': 'xmlFreeDoc',
'xmlDtd *': 'xmlFreeDtd',
'xmlEntitiesTable *': 'xmlFreeEntitiesTable',
'xmlEnumeration *': 'xmlFreeEnumeration',
'xmlList *': 'xmlListDelete',
'xmlModule *': 'xmlModuleFree',
'xmlMutex *': 'xmlFreeMutex',
'xmlNode *': 'xmlFreeNode',
'xmlNodeSet *': 'xmlXPathFreeNodeSet',
'xmlNs *': 'xmlFreeNs',
'xmlOutputBuffer *': 'xmlOutputBufferClose',
'xmlParserCtxt *': 'xmlFreeParserCtxt',
'xmlParserInputBuffer *': 'xmlFreeParserInputBuffer',
'xmlParserInput *': 'xmlFreeInputStream',
'xmlRMutex *': 'xmlFreeRMutex',
'xmlRelaxNGValidCtxt *': 'xmlRelaxNGFreeValidCtxt',
'xmlSaveCtxt *': 'xmlSaveClose',
'xmlSchemaFacet *': 'xmlSchemaFreeFacet',
'xmlSchemaVal *': 'xmlSchemaFreeValue',
'xmlSchemaValidCtxt *': 'xmlSchemaFreeValidCtxt',
'xmlTextWriter *': 'xmlFreeTextWriter',
'xmlURI *': 'xmlFreeURI',
'xmlValidCtxt *': 'xmlFreeValidCtxt',
'xmlXPathContext *': 'xmlXPathFreeContext',
'xmlXPathParserContext *': 'xmlXPathFreeParserContext',
'xmlXPathObject *': 'xmlXPathFreeObject',
}
blockList = {
@ -194,7 +194,7 @@ for file in os.listdir(xmlDocDir):
dtor = dtors.get(rtype)
if dtor is not None:
code = f'{dtor}({code})'
elif rtype == 'xmlHashTablePtr':
elif rtype == 'xmlHashTable *':
code = f'xmlHashFree({code}, NULL)'
mmfunc[name] = f' {code};\n'

View File

@ -11,10 +11,8 @@
#
import sys
import string
import time
webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt"
#
# blockAliases is a small hack - it is used for mapping block names which
@ -31,7 +29,8 @@ blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," +
# number, inline comparisons are generated
minTableSize = 8
(blockfile, catfile) = sources.split()
blockfile = "Blocks-4.0.1.txt"
catfile = "UnicodeData-4.0.1.txt"
#
@ -197,8 +196,6 @@ except:
print("Failed to open xmlunicode.c")
sys.exit(1)
date = time.asctime(time.localtime(time.time()))
output.write(
"""/*
* xmlunicode.c: this module implements the Unicode character APIs
@ -207,9 +204,6 @@ output.write(
* UCS description files of the Unicode Character Database
* %s
* using the genUnicode.py Python script.
*
* Generation date: %s
* Sources: %s
*/
#define IN_LIBXML
@ -238,7 +232,7 @@ typedef struct {
static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname);
""" % (webpage, date, sources));
""" % webpage);
#
# For any categories with more than minTableSize ranges we generate
@ -281,13 +275,11 @@ for name in ckeys:
output.write(
"""/**
* xmlUnicodeLookup:
* @tptr: pointer to the name table
* @tname: name to be found
*
* binary table lookup for user-supplied name
*
* Returns pointer to range function if found, otherwise NULL
* @param tptr pointer to the name table
* @param tname name to be found
* @returns pointer to range function if found, otherwise NULL
*/
static xmlIntFunc
*xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) {
@ -316,10 +308,10 @@ static xmlIntFunc
for block in bkeys:
name = block.replace('-', '')
output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
output.write("/**\n * Check whether the character is part of %s UCS Block\n"%
(block))
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
output.write(" *\n * @param code UCS code point\n")
output.write(" * @returns 1 if true 0 otherwise\n */\n");
output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name)
flag = 0
for (start, end) in BlockNames[block]:
@ -332,10 +324,10 @@ for block in bkeys:
for name in ckeys:
ranges = Categories[name]
output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
output.write("/**\n * Check whether the character is part of %s UCS Category\n"%
(name))
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
output.write(" *\n * @param code UCS code point\n")
output.write(" * @returns 1 if true 0 otherwise\n */\n");
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
if len(Categories[name]) > minTableSize:
output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)"
@ -385,13 +377,11 @@ static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
/**
* xmlUCSIsBlock:
* @code: UCS code point
* @block: UCS block name
*
* Check whether the character is part of the UCS Block
*
* Returns 1 if true, 0 if false and -1 on unknown block
* @param code UCS code point
* @param block UCS block name
* @returns 1 if true, 0 if false and -1 on unknown block
*/
int
xmlUCSIsBlock(int code, const char *block) {
@ -404,13 +394,11 @@ xmlUCSIsBlock(int code, const char *block) {
}
/**
* xmlUCSIsCat:
* @code: UCS code point
* @cat: UCS Category name
*
* Check whether the character is part of the UCS Category
*
* Returns 1 if true, 0 if false and -1 on unknown category
* @param code UCS code point
* @param cat UCS Category name
* @returns 1 if true, 0 if false and -1 on unknown category
*/
int
xmlUCSIsCat(int code, const char *cat) {

View File

@ -279,7 +279,7 @@ UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen,
#define UTF8ToHtmlWrapper NULL
#endif
#include "iso8859x.inc"
#include "codegen/charset.inc"
static xmlCharEncError
EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen,

View File

@ -350,7 +350,7 @@ skipped_types = {
import os
import xml.etree.ElementTree as etree
sys.path.append(srcPref + '/../tools')
sys.path.append(srcPref + '/../codegen')
import xmlmod
xmlDocDir = dstPref + '/../doc/xml'

View File

@ -1,66 +0,0 @@
#!/usr/bin/env python3
entities = [
[ '', '&#xFFFD;' ],
[ '\t', '&#9;' ],
[ '\n', '&#10;' ],
[ '\r', '&#13;' ],
[ '"', '&quot;' ],
[ '&', '&amp;' ],
[ '<', '&lt;' ],
[ '>', '&gt;' ],
]
### xmlEscapeContent
offset = [ None ] * 128
pos = 0
r = ''
for rec in entities:
char, repl = rec
if char:
offset[ord(char)] = pos
if pos % 12 == 0: r += '\n '
else: r += ' '
r += '%3d,' % len(repl)
pos += 1
for c in repl:
if pos % 12 == 0: r += '\n '
else: r += ' '
r += "'%s'," % c
pos += 1
print('static const char xmlEscapeContent[] = {%s\n};\n' % r)
def gen_tab(name, escape, is_xml):
r = ''
for i in range(0x80):
if chr(i) in escape:
v = offset[i]
elif i == 0:
v = 0
elif is_xml and i < 32 and i != 9 and i != 10:
v = 0
else:
v = -1
if i % 16 == 0: r += '\n '
else: r += ' '
r += '%2d,' % v
print('static const signed char %s[128] = {%s\n};\n' % (name, r))
gen_tab('xmlEscapeTab', '\r&<>', True)
gen_tab('xmlEscapeTabQuot', '\r"&<>', True)
gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True)
print('#ifdef LIBXML_HTML_ENABLED\n')
gen_tab('htmlEscapeTab', '&<>', False)
gen_tab('htmlEscapeTabAttr', '"&', False)
print('#endif /* LIBXML_HTML_ENABLED */')

71
xmlIO.c
View File

@ -159,76 +159,7 @@ xmlSerializeHexCharRef(char *buf, int val) {
return(out - buf);
}
/*
* Tables generated with tools/genEscape.py
*/
static const char xmlEscapeContent[] = {
8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#',
'9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1',
'3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a',
'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't',
';',
};
static const signed char xmlEscapeTab[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabQuot[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabAttr[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#ifdef LIBXML_HTML_ENABLED
static const signed char htmlEscapeTab[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char htmlEscapeTabAttr[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#endif /* LIBXML_HTML_ENABLED */
#include "codegen/escape.inc"
/*
* @param text input text