1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

codegen: Consolidate tools for code generation

Move tools, source files and output tables into codegen directory.

Rename some files.

Adjust tools to match modified files. Remove generation date and source
files from output.

Distribute all tools and sources.
This commit is contained in:
Nick Wellnhofer
2025-05-15 17:49:49 +02:00
parent 0d34d690c4
commit 258d870629
18 changed files with 256 additions and 262 deletions

View File

@ -2432,7 +2432,7 @@ htmlCodePointToUtf8(int c, xmlChar *out, int *osize) {
return(out); return(out);
} }
#include "html5ent.inc" #include "codegen/html5ent.inc"
#define ENT_F_SEMICOLON 0x80u #define ENT_F_SEMICOLON 0x80u
#define ENT_F_SUBTABLE 0x40u #define ENT_F_SUBTABLE 0x40u

View File

@ -154,7 +154,7 @@ testdso_la_LDFLAGS = $(AM_LDFLAGS) \
-module -no-undefined -avoid-version -rpath $(libdir) -module -no-undefined -avoid-version -rpath $(libdir)
rebuild_testapi: rebuild_testapi:
cd $(srcdir) && python3 tools/gentest.py $(abs_builddir) cd $(srcdir) && python3 codegen/genTestApi.py $(abs_builddir)
testapi_SOURCES=testapi.c testapi_SOURCES=testapi.c
testapi_DEPENDENCIES = $(DEPS) testapi_DEPENDENCIES = $(DEPS)
@ -201,9 +201,18 @@ CLEANFILES = missing.lst runsuite.log runxmlconf.log test.out \
EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \ EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \
libxml.h \ libxml.h \
html5ent.inc iso8859x.inc \ codegen/charset.inc \
tools/gentest.py tools/xmlmod.py \ codegen/chvalid.def \
tools/genChRanges.py tools/genEscape.py tools/genUnicode.py \ codegen/escape.inc \
codegen/genCharset.py \
codegen/genChRanges.py \
codegen/genEscape.py \
codegen/genHtml5Ent.py \
codegen/genHtml5LibTests.py \
codegen/genTestApi.py \
codegen/genUnicode.py \
codegen/html5ent.inc \
codegen/xmlmod.py \
timsort.h \ timsort.h \
README.zOS README.md \ README.zOS README.md \
CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \ CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \

66
codegen/escape.inc Normal file
View File

@ -0,0 +1,66 @@
static const char xmlEscapeContent[] = {
8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#',
'9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1',
'3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a',
'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't',
';',
};
static const signed char xmlEscapeTab[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabQuot[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabAttr[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#ifdef LIBXML_HTML_ENABLED
static const signed char htmlEscapeTab[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char htmlEscapeTabAttr[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#endif /* LIBXML_HTML_ENABLED */

View File

@ -16,7 +16,6 @@
# #
import sys import sys
import time
# #
# A routine to take a list of yes/no (1, 0) values and turn it # A routine to take a list of yes/no (1, 0) values and turn it
@ -41,8 +40,6 @@ def makeRange(lst):
pos = e + 1 # ready to check for next range pos = e + 1 # ready to check for next range
return ret return ret
sources = "chvalid.def" # input filename
# minTableSize gives the minimum number of ranges which must be present # minTableSize gives the minimum number of ranges which must be present
# before a 256-byte lookup table is produced. If there are less than this # before a 256-byte lookup table is produced. If there are less than this
# number, a macro with inline comparisons is generated # number, a macro with inline comparisons is generated
@ -54,9 +51,9 @@ Functs = {}
state = 0 state = 0
try: try:
defines = open("chvalid.def", "r") defines = open("codegen/chvalid.def", "r")
except: except:
print("Missing chvalid.def, aborting ...") print("Missing codegen/chvalid.def, aborting ...")
sys.exit(1) sys.exit(1)
# #
@ -202,19 +199,19 @@ except:
print("Failed to open chvalid.c") print("Failed to open chvalid.c")
sys.exit(1) sys.exit(1)
date = time.asctime(time.localtime(time.time())) fkeys = sorted(Functs.keys())
header.write( header.write(
"""/* """/**
* Summary: Unicode character range checking * @file
* Description: this module exports interfaces for the character *
* @brief Unicode character range checking
*
* this module exports interfaces for the character
* range validation APIs * range validation APIs
* *
* This file is automatically generated from the cvs source * This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script * definition files using the genChRanges.py Python script
*
* Generation date: %s
* Sources: %s
*/ */
#ifndef __XML_CHVALID_H__ #ifndef __XML_CHVALID_H__
@ -227,6 +224,8 @@ header.write(
extern "C" { extern "C" {
#endif #endif
/** @cond ignore */
/* /*
* Define our typedefs and structures * Define our typedefs and structures
* *
@ -254,13 +253,27 @@ struct _xmlChRangeGroup {
const xmlChLRange\t*longRange; const xmlChLRange\t*longRange;
}; };
""");
for f in fkeys:
if len(Functs[f][1]) > 0:
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
if max(Functs[f][0]) > 0: # only check if at least one entry
rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
/** /**
* Range checking routine * Range checking routine
*/ */
XMLPUBFUN int XMLPUBFUN int
\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group); \t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
""" % (date, sources)); /** @endcond */
""");
output.write( output.write(
"""/* """/*
* chvalid.c:\tthis module implements the character range * chvalid.c:\tthis module implements the character range
@ -268,9 +281,6 @@ output.write(
* *
* This file is automatically generated from the cvs source * This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script * definition files using the genChRanges.py Python script
*
* Generation date: %s
* Sources: %s
*/ */
#define IN_LIBXML #define IN_LIBXML
@ -287,7 +297,7 @@ output.write(
* allowed. * allowed.
* *
*/ */
""" % (date, sources)); """);
# #
# Now output the generated data. # Now output the generated data.
@ -298,8 +308,6 @@ output.write(
# compares, otherwise we output a 256-byte table and a macro to use it. # compares, otherwise we output a 256-byte table and a macro to use it.
# #
fkeys = sorted(Functs.keys())
for f in fkeys: for f in fkeys:
# First we convert the specified single-byte values into a group of ranges. # First we convert the specified single-byte values into a group of ranges.
@ -310,15 +318,13 @@ for f in fkeys:
rangeTable = makeRange(Functs[f][0]) rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable) numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write(""" header.write("""
/** /**
* %s_ch:
* @c: char to validate
*
* Automatically generated by genChRanges.py * Automatically generated by genChRanges.py
*
* @param c char to validate
*/ */
""" % f) """)
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
# write the constant data to the code file # write the constant data to the code file
@ -343,12 +349,11 @@ for f in fkeys:
header.write(""" header.write("""
/** /**
* %s_ch:
* @c: char to validate
*
* Automatically generated by genChRanges.py * Automatically generated by genChRanges.py
*
* @param c char to validate
*/ */
""" % f) """)
# okay, I'm tired of the messy lineup - let's automate it! # okay, I'm tired of the messy lineup - let's automate it!
pline = "#define %s_ch(c)" % f pline = "#define %s_ch(c)" % f
# 'ntab' is number of tabs needed to position to col. 33 from name end # 'ntab' is number of tabs needed to position to col. 33 from name end
@ -378,12 +383,11 @@ for f in fkeys:
header.write(""" header.write("""
/** /**
* %sQ:
* @c: char to validate
*
* Automatically generated by genChRanges.py * Automatically generated by genChRanges.py
*
* @param c char to validate
*/ */
""" % f) """)
pline = "#define %sQ(c)" % f pline = "#define %sQ(c)" % f
ntab = 4 - (len(pline)) // 8 ntab = 4 - (len(pline)) // 8
if ntab < 0: if ntab < 0:
@ -403,7 +407,7 @@ for f in fkeys:
header.write(" 0)\n\n") header.write(" 0)\n\n")
else: else:
if numRanges >= minTableSize: if numRanges >= minTableSize:
header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f) header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n" % f)
else: # if < minTableSize, generate inline code else: # if < minTableSize, generate inline code
firstFlag = 1 firstFlag = 1
for rg in Functs[f][1]: for rg in Functs[f][1]:
@ -417,14 +421,10 @@ for f in fkeys:
else: # value range else: # value range
pline += "((0x%x <= (c)) &&" % rg[0] pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1] pline += " ((c) <= 0x%x))" % rg[1]
pline += "))\n\n" pline += "))\n"
header.write(pline) header.write(pline)
if len(Functs[f][1]) > 0:
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
# #
# Next we do the unicode ranges # Next we do the unicode ranges
# #
@ -477,14 +477,12 @@ for f in fkeys:
output.write( output.write(
""" """
/** /**
* xmlCharInRange:
* @val: character to be validated
* @rptr: pointer to range to be used to validate
*
* Does a binary search of the range table to determine if char * Does a binary search of the range table to determine if char
* is valid * is valid
* *
* Returns: true if character valid, false otherwise * @param val character to be validated
* @param rptr pointer to range to be used to validate
* @returns true if character valid, false otherwise
*/ */
int int
xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
@ -542,18 +540,16 @@ xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
for f in fkeys: for f in fkeys:
output.write(""" output.write("""
/** /**
* %s:
* @ch: character to validate
*
* This function is DEPRECATED. * This function is DEPRECATED.
""" % f); """);
if max(Functs[f][0]) > 0: if max(Functs[f][0]) > 0:
output.write(" * Use %s_ch or %sQ instead" % (f, f)) output.write(" * Use %s_ch() or %sQ() instead" % (f, f))
else: else:
output.write(" * Use %sQ instead" % f) output.write(" * Use %sQ() instead" % f)
output.write(""" output.write("""
* *
* Returns true if argument valid, false otherwise * @param ch character to validate
* @returns true if argument valid, false otherwise
*/ */
""") """)
output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f)) output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f))

View File

@ -73,7 +73,7 @@ def genTranscodeTable(out, name, chars):
printHexTable(out, 2, data) printHexTable(out, 2, data)
out.write('};\n\n') out.write('};\n\n')
out = open(f'iso8859x.inc', 'w') out = open(f'codegen/charset.inc', 'w')
out.write('''/* out.write('''/*
* Lookup tables for transcoding of 8-bit character sets. * Lookup tables for transcoding of 8-bit character sets.

69
codegen/genEscape.py Executable file
View File

@ -0,0 +1,69 @@
#!/usr/bin/env python3
entities = [
[ '', '&#xFFFD;' ],
[ '\t', '&#9;' ],
[ '\n', '&#10;' ],
[ '\r', '&#13;' ],
[ '"', '&quot;' ],
[ '&', '&amp;' ],
[ '<', '&lt;' ],
[ '>', '&gt;' ],
]
offset = [ None ] * 128
def gen_content(out):
pos = 0
r = ''
for rec in entities:
char, repl = rec
if char:
offset[ord(char)] = pos
if pos % 12 == 0: r += '\n '
else: r += ' '
r += '%3d,' % len(repl)
pos += 1
for c in repl:
if pos % 12 == 0: r += '\n '
else: r += ' '
r += "'%s'," % c
pos += 1
out.write('static const char xmlEscapeContent[] = {%s\n};\n\n' % r)
def gen_tab(out, name, escape, is_xml):
r = ''
for i in range(0x80):
if chr(i) in escape:
v = offset[i]
elif i == 0:
v = 0
elif is_xml and i < 32 and i != 9 and i != 10:
v = 0
else:
v = -1
if i % 16 == 0: r += '\n '
else: r += ' '
r += '%2d,' % v
out.write('static const signed char %s[128] = {%s\n};\n\n' % (name, r))
with open('codegen/escape.inc', 'w') as out:
gen_content(out)
gen_tab(out, 'xmlEscapeTab', '\r&<>', True)
gen_tab(out, 'xmlEscapeTabQuot', '\r"&<>', True)
gen_tab(out, 'xmlEscapeTabAttr', '\t\n\r"&<>', True)
out.write('#ifdef LIBXML_HTML_ENABLED\n\n')
gen_tab(out, 'htmlEscapeTab', '&<>', False)
gen_tab(out, 'htmlEscapeTabAttr', '"&', False)
out.write('#endif /* LIBXML_HTML_ENABLED */\n')

View File

@ -162,8 +162,9 @@ def gen_table(ctype, cname, values, fmt, elems_per_line):
else: r += ' ' else: r += ' '
r += fmt % values[i] r += fmt % values[i]
return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n' return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n\n'
print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15)) with open('codegen/html5ent.inc', 'w') as out:
print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10)) out.write(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15)) out.write(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
out.write(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))

View File

68
tools/gentest.py → codegen/genTestApi.py Normal file → Executable file
View File

@ -13,40 +13,40 @@ import xmlmod
# Globals # Globals
dtors = { dtors = {
'htmlDocPtr': 'xmlFreeDoc', 'htmlDoc *': 'xmlFreeDoc',
'htmlParserCtxtPtr': 'htmlFreeParserCtxt', 'htmlParserCtxt *': 'htmlFreeParserCtxt',
'xmlAutomataPtr': 'xmlFreeAutomata', 'xmlAutomata *': 'xmlFreeAutomata',
'xmlBufferPtr': 'xmlBufferFree', 'xmlBuffer *': 'xmlBufferFree',
'xmlCatalogPtr': 'xmlFreeCatalog', 'xmlCatalog *': 'xmlFreeCatalog',
'xmlChar *': 'xmlFree', 'xmlChar *': 'xmlFree',
'xmlDOMWrapCtxtPtr': 'xmlDOMWrapFreeCtxt', 'xmlDOMWrapCtxt *': 'xmlDOMWrapFreeCtxt',
'xmlDictPtr': 'xmlDictFree', 'xmlDict *': 'xmlDictFree',
'xmlDocPtr': 'xmlFreeDoc', 'xmlDoc *': 'xmlFreeDoc',
'xmlDtdPtr': 'xmlFreeDtd', 'xmlDtd *': 'xmlFreeDtd',
'xmlEntitiesTablePtr': 'xmlFreeEntitiesTable', 'xmlEntitiesTable *': 'xmlFreeEntitiesTable',
'xmlEnumerationPtr': 'xmlFreeEnumeration', 'xmlEnumeration *': 'xmlFreeEnumeration',
'xmlListPtr': 'xmlListDelete', 'xmlList *': 'xmlListDelete',
'xmlModulePtr': 'xmlModuleFree', 'xmlModule *': 'xmlModuleFree',
'xmlMutexPtr': 'xmlFreeMutex', 'xmlMutex *': 'xmlFreeMutex',
'xmlNodePtr': 'xmlFreeNode', 'xmlNode *': 'xmlFreeNode',
'xmlNodeSetPtr': 'xmlXPathFreeNodeSet', 'xmlNodeSet *': 'xmlXPathFreeNodeSet',
'xmlNsPtr': 'xmlFreeNs', 'xmlNs *': 'xmlFreeNs',
'xmlOutputBufferPtr': 'xmlOutputBufferClose', 'xmlOutputBuffer *': 'xmlOutputBufferClose',
'xmlParserCtxtPtr': 'xmlFreeParserCtxt', 'xmlParserCtxt *': 'xmlFreeParserCtxt',
'xmlParserInputBufferPtr': 'xmlFreeParserInputBuffer', 'xmlParserInputBuffer *': 'xmlFreeParserInputBuffer',
'xmlParserInputPtr': 'xmlFreeInputStream', 'xmlParserInput *': 'xmlFreeInputStream',
'xmlRMutexPtr': 'xmlFreeRMutex', 'xmlRMutex *': 'xmlFreeRMutex',
'xmlRelaxNGValidCtxtPtr': 'xmlRelaxNGFreeValidCtxt', 'xmlRelaxNGValidCtxt *': 'xmlRelaxNGFreeValidCtxt',
'xmlSaveCtxtPtr': 'xmlSaveClose', 'xmlSaveCtxt *': 'xmlSaveClose',
'xmlSchemaFacetPtr': 'xmlSchemaFreeFacet', 'xmlSchemaFacet *': 'xmlSchemaFreeFacet',
'xmlSchemaValPtr': 'xmlSchemaFreeValue', 'xmlSchemaVal *': 'xmlSchemaFreeValue',
'xmlSchemaValidCtxtPtr': 'xmlSchemaFreeValidCtxt', 'xmlSchemaValidCtxt *': 'xmlSchemaFreeValidCtxt',
'xmlTextWriterPtr': 'xmlFreeTextWriter', 'xmlTextWriter *': 'xmlFreeTextWriter',
'xmlURIPtr': 'xmlFreeURI', 'xmlURI *': 'xmlFreeURI',
'xmlValidCtxtPtr': 'xmlFreeValidCtxt', 'xmlValidCtxt *': 'xmlFreeValidCtxt',
'xmlXPathContextPtr': 'xmlXPathFreeContext', 'xmlXPathContext *': 'xmlXPathFreeContext',
'xmlXPathParserContextPtr': 'xmlXPathFreeParserContext', 'xmlXPathParserContext *': 'xmlXPathFreeParserContext',
'xmlXPathObjectPtr': 'xmlXPathFreeObject', 'xmlXPathObject *': 'xmlXPathFreeObject',
} }
blockList = { blockList = {
@ -194,7 +194,7 @@ for file in os.listdir(xmlDocDir):
dtor = dtors.get(rtype) dtor = dtors.get(rtype)
if dtor is not None: if dtor is not None:
code = f'{dtor}({code})' code = f'{dtor}({code})'
elif rtype == 'xmlHashTablePtr': elif rtype == 'xmlHashTable *':
code = f'xmlHashFree({code}, NULL)' code = f'xmlHashFree({code}, NULL)'
mmfunc[name] = f' {code};\n' mmfunc[name] = f' {code};\n'

View File

@ -11,10 +11,8 @@
# #
import sys import sys
import string import string
import time
webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html" webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt"
# #
# blockAliases is a small hack - it is used for mapping block names which # blockAliases is a small hack - it is used for mapping block names which
@ -31,7 +29,8 @@ blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," +
# number, inline comparisons are generated # number, inline comparisons are generated
minTableSize = 8 minTableSize = 8
(blockfile, catfile) = sources.split() blockfile = "Blocks-4.0.1.txt"
catfile = "UnicodeData-4.0.1.txt"
# #
@ -197,8 +196,6 @@ except:
print("Failed to open xmlunicode.c") print("Failed to open xmlunicode.c")
sys.exit(1) sys.exit(1)
date = time.asctime(time.localtime(time.time()))
output.write( output.write(
"""/* """/*
* xmlunicode.c: this module implements the Unicode character APIs * xmlunicode.c: this module implements the Unicode character APIs
@ -207,9 +204,6 @@ output.write(
* UCS description files of the Unicode Character Database * UCS description files of the Unicode Character Database
* %s * %s
* using the genUnicode.py Python script. * using the genUnicode.py Python script.
*
* Generation date: %s
* Sources: %s
*/ */
#define IN_LIBXML #define IN_LIBXML
@ -238,7 +232,7 @@ typedef struct {
static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname); static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname);
""" % (webpage, date, sources)); """ % webpage);
# #
# For any categories with more than minTableSize ranges we generate # For any categories with more than minTableSize ranges we generate
@ -281,13 +275,11 @@ for name in ckeys:
output.write( output.write(
"""/** """/**
* xmlUnicodeLookup:
* @tptr: pointer to the name table
* @tname: name to be found
*
* binary table lookup for user-supplied name * binary table lookup for user-supplied name
* *
* Returns pointer to range function if found, otherwise NULL * @param tptr pointer to the name table
* @param tname name to be found
* @returns pointer to range function if found, otherwise NULL
*/ */
static xmlIntFunc static xmlIntFunc
*xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) { *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) {
@ -316,10 +308,10 @@ static xmlIntFunc
for block in bkeys: for block in bkeys:
name = block.replace('-', '') name = block.replace('-', '')
output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name)) output.write("/**\n * Check whether the character is part of %s UCS Block\n"%
output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
(block)) (block))
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); output.write(" *\n * @param code UCS code point\n")
output.write(" * @returns 1 if true 0 otherwise\n */\n");
output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name) output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name)
flag = 0 flag = 0
for (start, end) in BlockNames[block]: for (start, end) in BlockNames[block]:
@ -332,10 +324,10 @@ for block in bkeys:
for name in ckeys: for name in ckeys:
ranges = Categories[name] ranges = Categories[name]
output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name)) output.write("/**\n * Check whether the character is part of %s UCS Category\n"%
output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
(name)) (name))
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); output.write(" *\n * @param code UCS code point\n")
output.write(" * @returns 1 if true 0 otherwise\n */\n");
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name) output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
if len(Categories[name]) > minTableSize: if len(Categories[name]) > minTableSize:
output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)" output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)"
@ -385,13 +377,11 @@ static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s}; static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
/** /**
* xmlUCSIsBlock:
* @code: UCS code point
* @block: UCS block name
*
* Check whether the character is part of the UCS Block * Check whether the character is part of the UCS Block
* *
* Returns 1 if true, 0 if false and -1 on unknown block * @param code UCS code point
* @param block UCS block name
* @returns 1 if true, 0 if false and -1 on unknown block
*/ */
int int
xmlUCSIsBlock(int code, const char *block) { xmlUCSIsBlock(int code, const char *block) {
@ -404,13 +394,11 @@ xmlUCSIsBlock(int code, const char *block) {
} }
/** /**
* xmlUCSIsCat:
* @code: UCS code point
* @cat: UCS Category name
*
* Check whether the character is part of the UCS Category * Check whether the character is part of the UCS Category
* *
* Returns 1 if true, 0 if false and -1 on unknown category * @param code UCS code point
* @param cat UCS Category name
* @returns 1 if true, 0 if false and -1 on unknown category
*/ */
int int
xmlUCSIsCat(int code, const char *cat) { xmlUCSIsCat(int code, const char *cat) {

View File

@ -279,7 +279,7 @@ UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen,
#define UTF8ToHtmlWrapper NULL #define UTF8ToHtmlWrapper NULL
#endif #endif
#include "iso8859x.inc" #include "codegen/charset.inc"
static xmlCharEncError static xmlCharEncError
EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen, EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen,

View File

@ -350,7 +350,7 @@ skipped_types = {
import os import os
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
sys.path.append(srcPref + '/../tools') sys.path.append(srcPref + '/../codegen')
import xmlmod import xmlmod
xmlDocDir = dstPref + '/../doc/xml' xmlDocDir = dstPref + '/../doc/xml'

View File

@ -1,66 +0,0 @@
#!/usr/bin/env python3
entities = [
[ '', '&#xFFFD;' ],
[ '\t', '&#9;' ],
[ '\n', '&#10;' ],
[ '\r', '&#13;' ],
[ '"', '&quot;' ],
[ '&', '&amp;' ],
[ '<', '&lt;' ],
[ '>', '&gt;' ],
]
### xmlEscapeContent
offset = [ None ] * 128
pos = 0
r = ''
for rec in entities:
char, repl = rec
if char:
offset[ord(char)] = pos
if pos % 12 == 0: r += '\n '
else: r += ' '
r += '%3d,' % len(repl)
pos += 1
for c in repl:
if pos % 12 == 0: r += '\n '
else: r += ' '
r += "'%s'," % c
pos += 1
print('static const char xmlEscapeContent[] = {%s\n};\n' % r)
def gen_tab(name, escape, is_xml):
r = ''
for i in range(0x80):
if chr(i) in escape:
v = offset[i]
elif i == 0:
v = 0
elif is_xml and i < 32 and i != 9 and i != 10:
v = 0
else:
v = -1
if i % 16 == 0: r += '\n '
else: r += ' '
r += '%2d,' % v
print('static const signed char %s[128] = {%s\n};\n' % (name, r))
gen_tab('xmlEscapeTab', '\r&<>', True)
gen_tab('xmlEscapeTabQuot', '\r"&<>', True)
gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True)
print('#ifdef LIBXML_HTML_ENABLED\n')
gen_tab('htmlEscapeTab', '&<>', False)
gen_tab('htmlEscapeTabAttr', '"&', False)
print('#endif /* LIBXML_HTML_ENABLED */')

71
xmlIO.c
View File

@ -159,76 +159,7 @@ xmlSerializeHexCharRef(char *buf, int val) {
return(out - buf); return(out - buf);
} }
/* #include "codegen/escape.inc"
* Tables generated with tools/genEscape.py
*/
static const char xmlEscapeContent[] = {
8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#',
'9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1',
'3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a',
'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't',
';',
};
static const signed char xmlEscapeTab[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabQuot[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char xmlEscapeTabAttr[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#ifdef LIBXML_HTML_ENABLED
static const signed char htmlEscapeTab[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const signed char htmlEscapeTabAttr[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
#endif /* LIBXML_HTML_ENABLED */
/* /*
* @param text input text * @param text input text