diff --git a/HTMLparser.c b/HTMLparser.c
index 35e162a4..9a8e2c05 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -2432,7 +2432,7 @@ htmlCodePointToUtf8(int c, xmlChar *out, int *osize) {
return(out);
}
-#include "html5ent.inc"
+#include "codegen/html5ent.inc"
#define ENT_F_SEMICOLON 0x80u
#define ENT_F_SUBTABLE 0x40u
diff --git a/Makefile.am b/Makefile.am
index ae877617..c07147ea 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -154,7 +154,7 @@ testdso_la_LDFLAGS = $(AM_LDFLAGS) \
-module -no-undefined -avoid-version -rpath $(libdir)
rebuild_testapi:
- cd $(srcdir) && python3 tools/gentest.py $(abs_builddir)
+ cd $(srcdir) && python3 codegen/genTestApi.py $(abs_builddir)
testapi_SOURCES=testapi.c
testapi_DEPENDENCIES = $(DEPS)
@@ -201,9 +201,18 @@ CLEANFILES = missing.lst runsuite.log runxmlconf.log test.out \
EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \
libxml.h \
- html5ent.inc iso8859x.inc \
- tools/gentest.py tools/xmlmod.py \
- tools/genChRanges.py tools/genEscape.py tools/genUnicode.py \
+ codegen/charset.inc \
+ codegen/chvalid.def \
+ codegen/escape.inc \
+ codegen/genCharset.py \
+ codegen/genChRanges.py \
+ codegen/genEscape.py \
+ codegen/genHtml5Ent.py \
+ codegen/genHtml5LibTests.py \
+ codegen/genTestApi.py \
+ codegen/genUnicode.py \
+ codegen/html5ent.inc \
+ codegen/xmlmod.py \
timsort.h \
README.zOS README.md \
CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \
diff --git a/iso8859x.inc b/codegen/charset.inc
similarity index 100%
rename from iso8859x.inc
rename to codegen/charset.inc
diff --git a/chvalid.def b/codegen/chvalid.def
similarity index 100%
rename from chvalid.def
rename to codegen/chvalid.def
diff --git a/codegen/escape.inc b/codegen/escape.inc
new file mode 100644
index 00000000..a51660af
--- /dev/null
+++ b/codegen/escape.inc
@@ -0,0 +1,66 @@
+static const char xmlEscapeContent[] = {
+ 8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#',
+ '9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1',
+ '3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a',
+ 'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't',
+ ';',
+};
+
+static const signed char xmlEscapeTab[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+static const signed char xmlEscapeTabQuot[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+static const signed char xmlEscapeTabAttr[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+#ifdef LIBXML_HTML_ENABLED
+
+static const signed char htmlEscapeTab[128] = {
+ 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+static const signed char htmlEscapeTabAttr[128] = {
+ 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+#endif /* LIBXML_HTML_ENABLED */
diff --git a/tools/genChRanges.py b/codegen/genChRanges.py
similarity index 94%
rename from tools/genChRanges.py
rename to codegen/genChRanges.py
index 567b31b4..3e58b70d 100755
--- a/tools/genChRanges.py
+++ b/codegen/genChRanges.py
@@ -16,7 +16,6 @@
#
import sys
-import time
#
# A routine to take a list of yes/no (1, 0) values and turn it
@@ -41,8 +40,6 @@ def makeRange(lst):
pos = e + 1 # ready to check for next range
return ret
-sources = "chvalid.def" # input filename
-
# minTableSize gives the minimum number of ranges which must be present
# before a 256-byte lookup table is produced. If there are less than this
# number, a macro with inline comparisons is generated
@@ -54,9 +51,9 @@ Functs = {}
state = 0
try:
- defines = open("chvalid.def", "r")
+ defines = open("codegen/chvalid.def", "r")
except:
- print("Missing chvalid.def, aborting ...")
+ print("Missing codegen/chvalid.def, aborting ...")
sys.exit(1)
#
@@ -202,19 +199,19 @@ except:
print("Failed to open chvalid.c")
sys.exit(1)
-date = time.asctime(time.localtime(time.time()))
+fkeys = sorted(Functs.keys())
header.write(
-"""/*
- * Summary: Unicode character range checking
- * Description: this module exports interfaces for the character
+"""/**
+ * @file
+ *
+ * @brief Unicode character range checking
+ *
+ * this module exports interfaces for the character
* range validation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
- *
- * Generation date: %s
- * Sources: %s
*/
#ifndef __XML_CHVALID_H__
@@ -227,6 +224,8 @@ header.write(
extern "C" {
#endif
+/** @cond ignore */
+
/*
* Define our typedefs and structures
*
@@ -254,13 +253,27 @@ struct _xmlChRangeGroup {
const xmlChLRange\t*longRange;
};
+""");
+
+for f in fkeys:
+ if len(Functs[f][1]) > 0:
+ header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
+ if max(Functs[f][0]) > 0: # only check if at least one entry
+ rangeTable = makeRange(Functs[f][0])
+ numRanges = len(rangeTable)
+ if numRanges >= minTableSize: # table is worthwhile
+ header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
+
+header.write("""
/**
* Range checking routine
*/
XMLPUBFUN int
\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
-""" % (date, sources));
+/** @endcond */
+""");
+
output.write(
"""/*
* chvalid.c:\tthis module implements the character range
@@ -268,9 +281,6 @@ output.write(
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
- *
- * Generation date: %s
- * Sources: %s
*/
#define IN_LIBXML
@@ -287,7 +297,7 @@ output.write(
* allowed.
*
*/
-""" % (date, sources));
+""");
#
# Now output the generated data.
@@ -298,8 +308,6 @@ output.write(
# compares, otherwise we output a 256-byte table and a macro to use it.
#
-fkeys = sorted(Functs.keys())
-
for f in fkeys:
# First we convert the specified single-byte values into a group of ranges.
@@ -310,15 +318,13 @@ for f in fkeys:
rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
- header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
/**
- * %s_ch:
- * @c: char to validate
- *
* Automatically generated by genChRanges.py
+ *
+ * @param c char to validate
*/
-""" % f)
+""")
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
# write the constant data to the code file
@@ -343,12 +349,11 @@ for f in fkeys:
header.write("""
/**
- * %s_ch:
- * @c: char to validate
- *
* Automatically generated by genChRanges.py
+ *
+ * @param c char to validate
*/
-""" % f)
+""")
# okay, I'm tired of the messy lineup - let's automate it!
pline = "#define %s_ch(c)" % f
# 'ntab' is number of tabs needed to position to col. 33 from name end
@@ -378,12 +383,11 @@ for f in fkeys:
header.write("""
/**
- * %sQ:
- * @c: char to validate
- *
* Automatically generated by genChRanges.py
+ *
+ * @param c char to validate
*/
-""" % f)
+""")
pline = "#define %sQ(c)" % f
ntab = 4 - (len(pline)) // 8
if ntab < 0:
@@ -403,7 +407,7 @@ for f in fkeys:
header.write(" 0)\n\n")
else:
if numRanges >= minTableSize:
- header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f)
+ header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n" % f)
else: # if < minTableSize, generate inline code
firstFlag = 1
for rg in Functs[f][1]:
@@ -417,14 +421,10 @@ for f in fkeys:
else: # value range
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
- pline += "))\n\n"
+ pline += "))\n"
header.write(pline)
- if len(Functs[f][1]) > 0:
- header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
-
-
#
# Next we do the unicode ranges
#
@@ -477,14 +477,12 @@ for f in fkeys:
output.write(
"""
/**
- * xmlCharInRange:
- * @val: character to be validated
- * @rptr: pointer to range to be used to validate
- *
* Does a binary search of the range table to determine if char
* is valid
*
- * Returns: true if character valid, false otherwise
+ * @param val character to be validated
+ * @param rptr pointer to range to be used to validate
+ * @returns true if character valid, false otherwise
*/
int
xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
@@ -542,18 +540,16 @@ xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
for f in fkeys:
output.write("""
/**
- * %s:
- * @ch: character to validate
- *
* This function is DEPRECATED.
-""" % f);
+""");
if max(Functs[f][0]) > 0:
- output.write(" * Use %s_ch or %sQ instead" % (f, f))
+ output.write(" * Use %s_ch() or %sQ() instead" % (f, f))
else:
- output.write(" * Use %sQ instead" % f)
+ output.write(" * Use %sQ() instead" % f)
output.write("""
*
- * Returns true if argument valid, false otherwise
+ * @param ch character to validate
+ * @returns true if argument valid, false otherwise
*/
""")
output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f))
diff --git a/tools/genTranscode.py b/codegen/genCharset.py
similarity index 99%
rename from tools/genTranscode.py
rename to codegen/genCharset.py
index 7a5168ae..7f73c48f 100755
--- a/tools/genTranscode.py
+++ b/codegen/genCharset.py
@@ -73,7 +73,7 @@ def genTranscodeTable(out, name, chars):
printHexTable(out, 2, data)
out.write('};\n\n')
-out = open(f'iso8859x.inc', 'w')
+out = open(f'codegen/charset.inc', 'w')
out.write('''/*
* Lookup tables for transcoding of 8-bit character sets.
diff --git a/codegen/genEscape.py b/codegen/genEscape.py
new file mode 100755
index 00000000..03bf383d
--- /dev/null
+++ b/codegen/genEscape.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+entities = [
+ [ '', '�' ],
+ [ '\t', ' ' ],
+ [ '\n', '
' ],
+ [ '\r', '
' ],
+ [ '"', '"' ],
+ [ '&', '&' ],
+ [ '<', '<' ],
+ [ '>', '>' ],
+]
+
+offset = [ None ] * 128
+
+def gen_content(out):
+ pos = 0
+ r = ''
+
+ for rec in entities:
+ char, repl = rec
+
+ if char:
+ offset[ord(char)] = pos
+
+ if pos % 12 == 0: r += '\n '
+ else: r += ' '
+ r += '%3d,' % len(repl)
+ pos += 1
+
+ for c in repl:
+ if pos % 12 == 0: r += '\n '
+ else: r += ' '
+ r += "'%s'," % c
+ pos += 1
+
+ out.write('static const char xmlEscapeContent[] = {%s\n};\n\n' % r)
+
+def gen_tab(out, name, escape, is_xml):
+ r = ''
+
+ for i in range(0x80):
+
+ if chr(i) in escape:
+ v = offset[i]
+ elif i == 0:
+ v = 0
+ elif is_xml and i < 32 and i != 9 and i != 10:
+ v = 0
+ else:
+ v = -1
+
+ if i % 16 == 0: r += '\n '
+ else: r += ' '
+ r += '%2d,' % v
+
+ out.write('static const signed char %s[128] = {%s\n};\n\n' % (name, r))
+
+with open('codegen/escape.inc', 'w') as out:
+ gen_content(out)
+
+ gen_tab(out, 'xmlEscapeTab', '\r&<>', True)
+ gen_tab(out, 'xmlEscapeTabQuot', '\r"&<>', True)
+ gen_tab(out, 'xmlEscapeTabAttr', '\t\n\r"&<>', True)
+
+ out.write('#ifdef LIBXML_HTML_ENABLED\n\n')
+ gen_tab(out, 'htmlEscapeTab', '&<>', False)
+ gen_tab(out, 'htmlEscapeTabAttr', '"&', False)
+ out.write('#endif /* LIBXML_HTML_ENABLED */\n')
diff --git a/tools/genHtmlEnt.py b/codegen/genHtml5Ent.py
similarity index 93%
rename from tools/genHtmlEnt.py
rename to codegen/genHtml5Ent.py
index f87a570f..e3be9f1d 100755
--- a/tools/genHtmlEnt.py
+++ b/codegen/genHtml5Ent.py
@@ -162,8 +162,9 @@ def gen_table(ctype, cname, values, fmt, elems_per_line):
else: r += ' '
r += fmt % values[i]
- return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n'
+ return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n\n'
-print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
-print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
-print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))
+with open('codegen/html5ent.inc', 'w') as out:
+ out.write(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15))
+ out.write(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10))
+ out.write(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15))
diff --git a/tools/genHtml5LibTests.py b/codegen/genHtml5LibTests.py
old mode 100644
new mode 100755
similarity index 100%
rename from tools/genHtml5LibTests.py
rename to codegen/genHtml5LibTests.py
diff --git a/tools/gentest.py b/codegen/genTestApi.py
old mode 100644
new mode 100755
similarity index 81%
rename from tools/gentest.py
rename to codegen/genTestApi.py
index 8efa03a6..ba9f688c
--- a/tools/gentest.py
+++ b/codegen/genTestApi.py
@@ -13,40 +13,40 @@ import xmlmod
# Globals
dtors = {
- 'htmlDocPtr': 'xmlFreeDoc',
- 'htmlParserCtxtPtr': 'htmlFreeParserCtxt',
- 'xmlAutomataPtr': 'xmlFreeAutomata',
- 'xmlBufferPtr': 'xmlBufferFree',
- 'xmlCatalogPtr': 'xmlFreeCatalog',
+ 'htmlDoc *': 'xmlFreeDoc',
+ 'htmlParserCtxt *': 'htmlFreeParserCtxt',
+ 'xmlAutomata *': 'xmlFreeAutomata',
+ 'xmlBuffer *': 'xmlBufferFree',
+ 'xmlCatalog *': 'xmlFreeCatalog',
'xmlChar *': 'xmlFree',
- 'xmlDOMWrapCtxtPtr': 'xmlDOMWrapFreeCtxt',
- 'xmlDictPtr': 'xmlDictFree',
- 'xmlDocPtr': 'xmlFreeDoc',
- 'xmlDtdPtr': 'xmlFreeDtd',
- 'xmlEntitiesTablePtr': 'xmlFreeEntitiesTable',
- 'xmlEnumerationPtr': 'xmlFreeEnumeration',
- 'xmlListPtr': 'xmlListDelete',
- 'xmlModulePtr': 'xmlModuleFree',
- 'xmlMutexPtr': 'xmlFreeMutex',
- 'xmlNodePtr': 'xmlFreeNode',
- 'xmlNodeSetPtr': 'xmlXPathFreeNodeSet',
- 'xmlNsPtr': 'xmlFreeNs',
- 'xmlOutputBufferPtr': 'xmlOutputBufferClose',
- 'xmlParserCtxtPtr': 'xmlFreeParserCtxt',
- 'xmlParserInputBufferPtr': 'xmlFreeParserInputBuffer',
- 'xmlParserInputPtr': 'xmlFreeInputStream',
- 'xmlRMutexPtr': 'xmlFreeRMutex',
- 'xmlRelaxNGValidCtxtPtr': 'xmlRelaxNGFreeValidCtxt',
- 'xmlSaveCtxtPtr': 'xmlSaveClose',
- 'xmlSchemaFacetPtr': 'xmlSchemaFreeFacet',
- 'xmlSchemaValPtr': 'xmlSchemaFreeValue',
- 'xmlSchemaValidCtxtPtr': 'xmlSchemaFreeValidCtxt',
- 'xmlTextWriterPtr': 'xmlFreeTextWriter',
- 'xmlURIPtr': 'xmlFreeURI',
- 'xmlValidCtxtPtr': 'xmlFreeValidCtxt',
- 'xmlXPathContextPtr': 'xmlXPathFreeContext',
- 'xmlXPathParserContextPtr': 'xmlXPathFreeParserContext',
- 'xmlXPathObjectPtr': 'xmlXPathFreeObject',
+ 'xmlDOMWrapCtxt *': 'xmlDOMWrapFreeCtxt',
+ 'xmlDict *': 'xmlDictFree',
+ 'xmlDoc *': 'xmlFreeDoc',
+ 'xmlDtd *': 'xmlFreeDtd',
+ 'xmlEntitiesTable *': 'xmlFreeEntitiesTable',
+ 'xmlEnumeration *': 'xmlFreeEnumeration',
+ 'xmlList *': 'xmlListDelete',
+ 'xmlModule *': 'xmlModuleFree',
+ 'xmlMutex *': 'xmlFreeMutex',
+ 'xmlNode *': 'xmlFreeNode',
+ 'xmlNodeSet *': 'xmlXPathFreeNodeSet',
+ 'xmlNs *': 'xmlFreeNs',
+ 'xmlOutputBuffer *': 'xmlOutputBufferClose',
+ 'xmlParserCtxt *': 'xmlFreeParserCtxt',
+ 'xmlParserInputBuffer *': 'xmlFreeParserInputBuffer',
+ 'xmlParserInput *': 'xmlFreeInputStream',
+ 'xmlRMutex *': 'xmlFreeRMutex',
+ 'xmlRelaxNGValidCtxt *': 'xmlRelaxNGFreeValidCtxt',
+ 'xmlSaveCtxt *': 'xmlSaveClose',
+ 'xmlSchemaFacet *': 'xmlSchemaFreeFacet',
+ 'xmlSchemaVal *': 'xmlSchemaFreeValue',
+ 'xmlSchemaValidCtxt *': 'xmlSchemaFreeValidCtxt',
+ 'xmlTextWriter *': 'xmlFreeTextWriter',
+ 'xmlURI *': 'xmlFreeURI',
+ 'xmlValidCtxt *': 'xmlFreeValidCtxt',
+ 'xmlXPathContext *': 'xmlXPathFreeContext',
+ 'xmlXPathParserContext *': 'xmlXPathFreeParserContext',
+ 'xmlXPathObject *': 'xmlXPathFreeObject',
}
blockList = {
@@ -194,7 +194,7 @@ for file in os.listdir(xmlDocDir):
dtor = dtors.get(rtype)
if dtor is not None:
code = f'{dtor}({code})'
- elif rtype == 'xmlHashTablePtr':
+ elif rtype == 'xmlHashTable *':
code = f'xmlHashFree({code}, NULL)'
mmfunc[name] = f' {code};\n'
diff --git a/tools/genUnicode.py b/codegen/genUnicode.py
similarity index 90%
rename from tools/genUnicode.py
rename to codegen/genUnicode.py
index 67fef622..de881f43 100755
--- a/tools/genUnicode.py
+++ b/codegen/genUnicode.py
@@ -11,10 +11,8 @@
#
import sys
import string
-import time
webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
-sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt"
#
# blockAliases is a small hack - it is used for mapping block names which
@@ -31,7 +29,8 @@ blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," +
# number, inline comparisons are generated
minTableSize = 8
-(blockfile, catfile) = sources.split()
+blockfile = "Blocks-4.0.1.txt"
+catfile = "UnicodeData-4.0.1.txt"
#
@@ -197,8 +196,6 @@ except:
print("Failed to open xmlunicode.c")
sys.exit(1)
-date = time.asctime(time.localtime(time.time()))
-
output.write(
"""/*
* xmlunicode.c: this module implements the Unicode character APIs
@@ -207,9 +204,6 @@ output.write(
* UCS description files of the Unicode Character Database
* %s
* using the genUnicode.py Python script.
- *
- * Generation date: %s
- * Sources: %s
*/
#define IN_LIBXML
@@ -238,7 +232,7 @@ typedef struct {
static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname);
-""" % (webpage, date, sources));
+""" % webpage);
#
# For any categories with more than minTableSize ranges we generate
@@ -281,13 +275,11 @@ for name in ckeys:
output.write(
"""/**
- * xmlUnicodeLookup:
- * @tptr: pointer to the name table
- * @tname: name to be found
- *
* binary table lookup for user-supplied name
*
- * Returns pointer to range function if found, otherwise NULL
+ * @param tptr pointer to the name table
+ * @param tname name to be found
+ * @returns pointer to range function if found, otherwise NULL
*/
static xmlIntFunc
*xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) {
@@ -316,10 +308,10 @@ static xmlIntFunc
for block in bkeys:
name = block.replace('-', '')
- output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
- output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
+ output.write("/**\n * Check whether the character is part of %s UCS Block\n"%
(block))
- output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+ output.write(" *\n * @param code UCS code point\n")
+ output.write(" * @returns 1 if true 0 otherwise\n */\n");
output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name)
flag = 0
for (start, end) in BlockNames[block]:
@@ -332,10 +324,10 @@ for block in bkeys:
for name in ckeys:
ranges = Categories[name]
- output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
- output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
+ output.write("/**\n * Check whether the character is part of %s UCS Category\n"%
(name))
- output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+ output.write(" *\n * @param code UCS code point\n")
+ output.write(" * @returns 1 if true 0 otherwise\n */\n");
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
if len(Categories[name]) > minTableSize:
output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)"
@@ -385,13 +377,11 @@ static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
/**
- * xmlUCSIsBlock:
- * @code: UCS code point
- * @block: UCS block name
- *
* Check whether the character is part of the UCS Block
*
- * Returns 1 if true, 0 if false and -1 on unknown block
+ * @param code UCS code point
+ * @param block UCS block name
+ * @returns 1 if true, 0 if false and -1 on unknown block
*/
int
xmlUCSIsBlock(int code, const char *block) {
@@ -404,13 +394,11 @@ xmlUCSIsBlock(int code, const char *block) {
}
/**
- * xmlUCSIsCat:
- * @code: UCS code point
- * @cat: UCS Category name
- *
* Check whether the character is part of the UCS Category
*
- * Returns 1 if true, 0 if false and -1 on unknown category
+ * @param code UCS code point
+ * @param cat UCS Category name
+ * @returns 1 if true, 0 if false and -1 on unknown category
*/
int
xmlUCSIsCat(int code, const char *cat) {
diff --git a/html5ent.inc b/codegen/html5ent.inc
similarity index 100%
rename from html5ent.inc
rename to codegen/html5ent.inc
diff --git a/tools/xmlmod.py b/codegen/xmlmod.py
similarity index 100%
rename from tools/xmlmod.py
rename to codegen/xmlmod.py
diff --git a/encoding.c b/encoding.c
index 659f93a3..4e4b6359 100644
--- a/encoding.c
+++ b/encoding.c
@@ -279,7 +279,7 @@ UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen,
#define UTF8ToHtmlWrapper NULL
#endif
-#include "iso8859x.inc"
+#include "codegen/charset.inc"
static xmlCharEncError
EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen,
diff --git a/python/generator.py b/python/generator.py
index 63c48446..112367b6 100755
--- a/python/generator.py
+++ b/python/generator.py
@@ -350,7 +350,7 @@ skipped_types = {
import os
import xml.etree.ElementTree as etree
-sys.path.append(srcPref + '/../tools')
+sys.path.append(srcPref + '/../codegen')
import xmlmod
xmlDocDir = dstPref + '/../doc/xml'
diff --git a/tools/genEscape.py b/tools/genEscape.py
deleted file mode 100755
index fbd12c90..00000000
--- a/tools/genEscape.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-
-entities = [
- [ '', '�' ],
- [ '\t', ' ' ],
- [ '\n', '
' ],
- [ '\r', '
' ],
- [ '"', '"' ],
- [ '&', '&' ],
- [ '<', '<' ],
- [ '>', '>' ],
-]
-
-### xmlEscapeContent
-
-offset = [ None ] * 128
-pos = 0
-r = ''
-
-for rec in entities:
- char, repl = rec
-
- if char:
- offset[ord(char)] = pos
-
- if pos % 12 == 0: r += '\n '
- else: r += ' '
- r += '%3d,' % len(repl)
- pos += 1
-
- for c in repl:
- if pos % 12 == 0: r += '\n '
- else: r += ' '
- r += "'%s'," % c
- pos += 1
-
-print('static const char xmlEscapeContent[] = {%s\n};\n' % r)
-
-def gen_tab(name, escape, is_xml):
- r = ''
-
- for i in range(0x80):
-
- if chr(i) in escape:
- v = offset[i]
- elif i == 0:
- v = 0
- elif is_xml and i < 32 and i != 9 and i != 10:
- v = 0
- else:
- v = -1
-
- if i % 16 == 0: r += '\n '
- else: r += ' '
- r += '%2d,' % v
-
- print('static const signed char %s[128] = {%s\n};\n' % (name, r))
-
-gen_tab('xmlEscapeTab', '\r&<>', True)
-gen_tab('xmlEscapeTabQuot', '\r"&<>', True)
-gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True)
-
-print('#ifdef LIBXML_HTML_ENABLED\n')
-gen_tab('htmlEscapeTab', '&<>', False)
-gen_tab('htmlEscapeTabAttr', '"&', False)
-print('#endif /* LIBXML_HTML_ENABLED */')
diff --git a/xmlIO.c b/xmlIO.c
index 6c9449b2..7f548020 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -159,76 +159,7 @@ xmlSerializeHexCharRef(char *buf, int val) {
return(out - buf);
}
-/*
- * Tables generated with tools/genEscape.py
- */
-
-static const char xmlEscapeContent[] = {
- 8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#',
- '9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1',
- '3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a',
- 'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't',
- ';',
-};
-
-static const signed char xmlEscapeTab[128] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-static const signed char xmlEscapeTabQuot[128] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-static const signed char xmlEscapeTabAttr[128] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-#ifdef LIBXML_HTML_ENABLED
-
-static const signed char htmlEscapeTab[128] = {
- 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-static const signed char htmlEscapeTabAttr[128] = {
- 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-#endif /* LIBXML_HTML_ENABLED */
+#include "codegen/escape.inc"
/*
* @param text input text