From 258d8706291cc8289598e14efc3fdc0ce0f6f575 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 15 May 2025 17:49:49 +0200 Subject: [PATCH] codegen: Consolidate tools for code generation Move tools, source files and output tables into codegen directory. Rename some files. Adjust tools to match modified files. Remove generation date and source files from output. Distribute all tools and sources. --- HTMLparser.c | 2 +- Makefile.am | 17 +++- iso8859x.inc => codegen/charset.inc | 0 chvalid.def => codegen/chvalid.def | 0 codegen/escape.inc | 66 +++++++++++++ {tools => codegen}/genChRanges.py | 96 +++++++++---------- .../genTranscode.py => codegen/genCharset.py | 2 +- codegen/genEscape.py | 69 +++++++++++++ tools/genHtmlEnt.py => codegen/genHtml5Ent.py | 9 +- {tools => codegen}/genHtml5LibTests.py | 0 tools/gentest.py => codegen/genTestApi.py | 68 ++++++------- {tools => codegen}/genUnicode.py | 48 ++++------ html5ent.inc => codegen/html5ent.inc | 0 {tools => codegen}/xmlmod.py | 0 encoding.c | 2 +- python/generator.py | 2 +- tools/genEscape.py | 66 ------------- xmlIO.c | 71 +------------- 18 files changed, 256 insertions(+), 262 deletions(-) rename iso8859x.inc => codegen/charset.inc (100%) rename chvalid.def => codegen/chvalid.def (100%) create mode 100644 codegen/escape.inc rename {tools => codegen}/genChRanges.py (94%) rename tools/genTranscode.py => codegen/genCharset.py (99%) create mode 100755 codegen/genEscape.py rename tools/genHtmlEnt.py => codegen/genHtml5Ent.py (93%) rename {tools => codegen}/genHtml5LibTests.py (100%) mode change 100644 => 100755 rename tools/gentest.py => codegen/genTestApi.py (81%) mode change 100644 => 100755 rename {tools => codegen}/genUnicode.py (90%) rename html5ent.inc => codegen/html5ent.inc (100%) rename {tools => codegen}/xmlmod.py (100%) delete mode 100755 tools/genEscape.py diff --git a/HTMLparser.c b/HTMLparser.c index 35e162a4..9a8e2c05 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2432,7 +2432,7 @@ htmlCodePointToUtf8(int c, xmlChar *out, int *osize) { return(out); } -#include "html5ent.inc" +#include "codegen/html5ent.inc" #define ENT_F_SEMICOLON 0x80u #define ENT_F_SUBTABLE 0x40u diff --git a/Makefile.am b/Makefile.am index ae877617..c07147ea 100644 --- a/Makefile.am +++ b/Makefile.am @@ -154,7 +154,7 @@ testdso_la_LDFLAGS = $(AM_LDFLAGS) \ -module -no-undefined -avoid-version -rpath $(libdir) rebuild_testapi: - cd $(srcdir) && python3 tools/gentest.py $(abs_builddir) + cd $(srcdir) && python3 codegen/genTestApi.py $(abs_builddir) testapi_SOURCES=testapi.c testapi_DEPENDENCIES = $(DEPS) @@ -201,9 +201,18 @@ CLEANFILES = missing.lst runsuite.log runxmlconf.log test.out \ EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \ libxml.h \ - html5ent.inc iso8859x.inc \ - tools/gentest.py tools/xmlmod.py \ - tools/genChRanges.py tools/genEscape.py tools/genUnicode.py \ + codegen/charset.inc \ + codegen/chvalid.def \ + codegen/escape.inc \ + codegen/genCharset.py \ + codegen/genChRanges.py \ + codegen/genEscape.py \ + codegen/genHtml5Ent.py \ + codegen/genHtml5LibTests.py \ + codegen/genTestApi.py \ + codegen/genUnicode.py \ + codegen/html5ent.inc \ + codegen/xmlmod.py \ timsort.h \ README.zOS README.md \ CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \ diff --git a/iso8859x.inc b/codegen/charset.inc similarity index 100% rename from iso8859x.inc rename to codegen/charset.inc diff --git a/chvalid.def b/codegen/chvalid.def similarity index 100% rename from chvalid.def rename to codegen/chvalid.def diff --git a/codegen/escape.inc b/codegen/escape.inc new file mode 100644 index 00000000..a51660af --- /dev/null +++ b/codegen/escape.inc @@ -0,0 +1,66 @@ +static const char xmlEscapeContent[] = { + 8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#', + '9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1', + '3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a', + 'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't', + ';', +}; + +static const signed char xmlEscapeTab[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static const signed char xmlEscapeTabQuot[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static const signed char xmlEscapeTabAttr[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +#ifdef LIBXML_HTML_ENABLED + +static const signed char htmlEscapeTab[128] = { + 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static const signed char htmlEscapeTabAttr[128] = { + 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +#endif /* LIBXML_HTML_ENABLED */ diff --git a/tools/genChRanges.py b/codegen/genChRanges.py similarity index 94% rename from tools/genChRanges.py rename to codegen/genChRanges.py index 567b31b4..3e58b70d 100755 --- a/tools/genChRanges.py +++ b/codegen/genChRanges.py @@ -16,7 +16,6 @@ # import sys -import time # # A routine to take a list of yes/no (1, 0) values and turn it @@ -41,8 +40,6 @@ def makeRange(lst): pos = e + 1 # ready to check for next range return ret -sources = "chvalid.def" # input filename - # minTableSize gives the minimum number of ranges which must be present # before a 256-byte lookup table is produced. If there are less than this # number, a macro with inline comparisons is generated @@ -54,9 +51,9 @@ Functs = {} state = 0 try: - defines = open("chvalid.def", "r") + defines = open("codegen/chvalid.def", "r") except: - print("Missing chvalid.def, aborting ...") + print("Missing codegen/chvalid.def, aborting ...") sys.exit(1) # @@ -202,19 +199,19 @@ except: print("Failed to open chvalid.c") sys.exit(1) -date = time.asctime(time.localtime(time.time())) +fkeys = sorted(Functs.keys()) header.write( -"""/* - * Summary: Unicode character range checking - * Description: this module exports interfaces for the character +"""/** + * @file + * + * @brief Unicode character range checking + * + * this module exports interfaces for the character * range validation APIs * * This file is automatically generated from the cvs source * definition files using the genChRanges.py Python script - * - * Generation date: %s - * Sources: %s */ #ifndef __XML_CHVALID_H__ @@ -227,6 +224,8 @@ header.write( extern "C" { #endif +/** @cond ignore */ + /* * Define our typedefs and structures * @@ -254,13 +253,27 @@ struct _xmlChRangeGroup { const xmlChLRange\t*longRange; }; +"""); + +for f in fkeys: + if len(Functs[f][1]) > 0: + header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) + if max(Functs[f][0]) > 0: # only check if at least one entry + rangeTable = makeRange(Functs[f][0]) + numRanges = len(rangeTable) + if numRanges >= minTableSize: # table is worthwhile + header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) + +header.write(""" /** * Range checking routine */ XMLPUBFUN int \t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group); -""" % (date, sources)); +/** @endcond */ +"""); + output.write( """/* * chvalid.c:\tthis module implements the character range @@ -268,9 +281,6 @@ output.write( * * This file is automatically generated from the cvs source * definition files using the genChRanges.py Python script - * - * Generation date: %s - * Sources: %s */ #define IN_LIBXML @@ -287,7 +297,7 @@ output.write( * allowed. * */ -""" % (date, sources)); +"""); # # Now output the generated data. @@ -298,8 +308,6 @@ output.write( # compares, otherwise we output a 256-byte table and a macro to use it. # -fkeys = sorted(Functs.keys()) - for f in fkeys: # First we convert the specified single-byte values into a group of ranges. @@ -310,15 +318,13 @@ for f in fkeys: rangeTable = makeRange(Functs[f][0]) numRanges = len(rangeTable) if numRanges >= minTableSize: # table is worthwhile - header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) header.write(""" /** - * %s_ch: - * @c: char to validate - * * Automatically generated by genChRanges.py + * + * @param c char to validate */ -""" % f) +""") header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) # write the constant data to the code file @@ -343,12 +349,11 @@ for f in fkeys: header.write(""" /** - * %s_ch: - * @c: char to validate - * * Automatically generated by genChRanges.py + * + * @param c char to validate */ -""" % f) +""") # okay, I'm tired of the messy lineup - let's automate it! pline = "#define %s_ch(c)" % f # 'ntab' is number of tabs needed to position to col. 33 from name end @@ -378,12 +383,11 @@ for f in fkeys: header.write(""" /** - * %sQ: - * @c: char to validate - * * Automatically generated by genChRanges.py + * + * @param c char to validate */ -""" % f) +""") pline = "#define %sQ(c)" % f ntab = 4 - (len(pline)) // 8 if ntab < 0: @@ -403,7 +407,7 @@ for f in fkeys: header.write(" 0)\n\n") else: if numRanges >= minTableSize: - header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f) + header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n" % f) else: # if < minTableSize, generate inline code firstFlag = 1 for rg in Functs[f][1]: @@ -417,14 +421,10 @@ for f in fkeys: else: # value range pline += "((0x%x <= (c)) &&" % rg[0] pline += " ((c) <= 0x%x))" % rg[1] - pline += "))\n\n" + pline += "))\n" header.write(pline) - if len(Functs[f][1]) > 0: - header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) - - # # Next we do the unicode ranges # @@ -477,14 +477,12 @@ for f in fkeys: output.write( """ /** - * xmlCharInRange: - * @val: character to be validated - * @rptr: pointer to range to be used to validate - * * Does a binary search of the range table to determine if char * is valid * - * Returns: true if character valid, false otherwise + * @param val character to be validated + * @param rptr pointer to range to be used to validate + * @returns true if character valid, false otherwise */ int xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { @@ -542,18 +540,16 @@ xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { for f in fkeys: output.write(""" /** - * %s: - * @ch: character to validate - * * This function is DEPRECATED. -""" % f); +"""); if max(Functs[f][0]) > 0: - output.write(" * Use %s_ch or %sQ instead" % (f, f)) + output.write(" * Use %s_ch() or %sQ() instead" % (f, f)) else: - output.write(" * Use %sQ instead" % f) + output.write(" * Use %sQ() instead" % f) output.write(""" * - * Returns true if argument valid, false otherwise + * @param ch character to validate + * @returns true if argument valid, false otherwise */ """) output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f)) diff --git a/tools/genTranscode.py b/codegen/genCharset.py similarity index 99% rename from tools/genTranscode.py rename to codegen/genCharset.py index 7a5168ae..7f73c48f 100755 --- a/tools/genTranscode.py +++ b/codegen/genCharset.py @@ -73,7 +73,7 @@ def genTranscodeTable(out, name, chars): printHexTable(out, 2, data) out.write('};\n\n') -out = open(f'iso8859x.inc', 'w') +out = open(f'codegen/charset.inc', 'w') out.write('''/* * Lookup tables for transcoding of 8-bit character sets. diff --git a/codegen/genEscape.py b/codegen/genEscape.py new file mode 100755 index 00000000..03bf383d --- /dev/null +++ b/codegen/genEscape.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +entities = [ + [ '', '�' ], + [ '\t', ' ' ], + [ '\n', ' ' ], + [ '\r', ' ' ], + [ '"', '"' ], + [ '&', '&' ], + [ '<', '<' ], + [ '>', '>' ], +] + +offset = [ None ] * 128 + +def gen_content(out): + pos = 0 + r = '' + + for rec in entities: + char, repl = rec + + if char: + offset[ord(char)] = pos + + if pos % 12 == 0: r += '\n ' + else: r += ' ' + r += '%3d,' % len(repl) + pos += 1 + + for c in repl: + if pos % 12 == 0: r += '\n ' + else: r += ' ' + r += "'%s'," % c + pos += 1 + + out.write('static const char xmlEscapeContent[] = {%s\n};\n\n' % r) + +def gen_tab(out, name, escape, is_xml): + r = '' + + for i in range(0x80): + + if chr(i) in escape: + v = offset[i] + elif i == 0: + v = 0 + elif is_xml and i < 32 and i != 9 and i != 10: + v = 0 + else: + v = -1 + + if i % 16 == 0: r += '\n ' + else: r += ' ' + r += '%2d,' % v + + out.write('static const signed char %s[128] = {%s\n};\n\n' % (name, r)) + +with open('codegen/escape.inc', 'w') as out: + gen_content(out) + + gen_tab(out, 'xmlEscapeTab', '\r&<>', True) + gen_tab(out, 'xmlEscapeTabQuot', '\r"&<>', True) + gen_tab(out, 'xmlEscapeTabAttr', '\t\n\r"&<>', True) + + out.write('#ifdef LIBXML_HTML_ENABLED\n\n') + gen_tab(out, 'htmlEscapeTab', '&<>', False) + gen_tab(out, 'htmlEscapeTabAttr', '"&', False) + out.write('#endif /* LIBXML_HTML_ENABLED */\n') diff --git a/tools/genHtmlEnt.py b/codegen/genHtml5Ent.py similarity index 93% rename from tools/genHtmlEnt.py rename to codegen/genHtml5Ent.py index f87a570f..e3be9f1d 100755 --- a/tools/genHtmlEnt.py +++ b/codegen/genHtml5Ent.py @@ -162,8 +162,9 @@ def gen_table(ctype, cname, values, fmt, elems_per_line): else: r += ' ' r += fmt % values[i] - return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n' + return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n\n' -print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15)) -print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10)) -print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15)) +with open('codegen/html5ent.inc', 'w') as out: + out.write(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15)) + out.write(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10)) + out.write(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15)) diff --git a/tools/genHtml5LibTests.py b/codegen/genHtml5LibTests.py old mode 100644 new mode 100755 similarity index 100% rename from tools/genHtml5LibTests.py rename to codegen/genHtml5LibTests.py diff --git a/tools/gentest.py b/codegen/genTestApi.py old mode 100644 new mode 100755 similarity index 81% rename from tools/gentest.py rename to codegen/genTestApi.py index 8efa03a6..ba9f688c --- a/tools/gentest.py +++ b/codegen/genTestApi.py @@ -13,40 +13,40 @@ import xmlmod # Globals dtors = { - 'htmlDocPtr': 'xmlFreeDoc', - 'htmlParserCtxtPtr': 'htmlFreeParserCtxt', - 'xmlAutomataPtr': 'xmlFreeAutomata', - 'xmlBufferPtr': 'xmlBufferFree', - 'xmlCatalogPtr': 'xmlFreeCatalog', + 'htmlDoc *': 'xmlFreeDoc', + 'htmlParserCtxt *': 'htmlFreeParserCtxt', + 'xmlAutomata *': 'xmlFreeAutomata', + 'xmlBuffer *': 'xmlBufferFree', + 'xmlCatalog *': 'xmlFreeCatalog', 'xmlChar *': 'xmlFree', - 'xmlDOMWrapCtxtPtr': 'xmlDOMWrapFreeCtxt', - 'xmlDictPtr': 'xmlDictFree', - 'xmlDocPtr': 'xmlFreeDoc', - 'xmlDtdPtr': 'xmlFreeDtd', - 'xmlEntitiesTablePtr': 'xmlFreeEntitiesTable', - 'xmlEnumerationPtr': 'xmlFreeEnumeration', - 'xmlListPtr': 'xmlListDelete', - 'xmlModulePtr': 'xmlModuleFree', - 'xmlMutexPtr': 'xmlFreeMutex', - 'xmlNodePtr': 'xmlFreeNode', - 'xmlNodeSetPtr': 'xmlXPathFreeNodeSet', - 'xmlNsPtr': 'xmlFreeNs', - 'xmlOutputBufferPtr': 'xmlOutputBufferClose', - 'xmlParserCtxtPtr': 'xmlFreeParserCtxt', - 'xmlParserInputBufferPtr': 'xmlFreeParserInputBuffer', - 'xmlParserInputPtr': 'xmlFreeInputStream', - 'xmlRMutexPtr': 'xmlFreeRMutex', - 'xmlRelaxNGValidCtxtPtr': 'xmlRelaxNGFreeValidCtxt', - 'xmlSaveCtxtPtr': 'xmlSaveClose', - 'xmlSchemaFacetPtr': 'xmlSchemaFreeFacet', - 'xmlSchemaValPtr': 'xmlSchemaFreeValue', - 'xmlSchemaValidCtxtPtr': 'xmlSchemaFreeValidCtxt', - 'xmlTextWriterPtr': 'xmlFreeTextWriter', - 'xmlURIPtr': 'xmlFreeURI', - 'xmlValidCtxtPtr': 'xmlFreeValidCtxt', - 'xmlXPathContextPtr': 'xmlXPathFreeContext', - 'xmlXPathParserContextPtr': 'xmlXPathFreeParserContext', - 'xmlXPathObjectPtr': 'xmlXPathFreeObject', + 'xmlDOMWrapCtxt *': 'xmlDOMWrapFreeCtxt', + 'xmlDict *': 'xmlDictFree', + 'xmlDoc *': 'xmlFreeDoc', + 'xmlDtd *': 'xmlFreeDtd', + 'xmlEntitiesTable *': 'xmlFreeEntitiesTable', + 'xmlEnumeration *': 'xmlFreeEnumeration', + 'xmlList *': 'xmlListDelete', + 'xmlModule *': 'xmlModuleFree', + 'xmlMutex *': 'xmlFreeMutex', + 'xmlNode *': 'xmlFreeNode', + 'xmlNodeSet *': 'xmlXPathFreeNodeSet', + 'xmlNs *': 'xmlFreeNs', + 'xmlOutputBuffer *': 'xmlOutputBufferClose', + 'xmlParserCtxt *': 'xmlFreeParserCtxt', + 'xmlParserInputBuffer *': 'xmlFreeParserInputBuffer', + 'xmlParserInput *': 'xmlFreeInputStream', + 'xmlRMutex *': 'xmlFreeRMutex', + 'xmlRelaxNGValidCtxt *': 'xmlRelaxNGFreeValidCtxt', + 'xmlSaveCtxt *': 'xmlSaveClose', + 'xmlSchemaFacet *': 'xmlSchemaFreeFacet', + 'xmlSchemaVal *': 'xmlSchemaFreeValue', + 'xmlSchemaValidCtxt *': 'xmlSchemaFreeValidCtxt', + 'xmlTextWriter *': 'xmlFreeTextWriter', + 'xmlURI *': 'xmlFreeURI', + 'xmlValidCtxt *': 'xmlFreeValidCtxt', + 'xmlXPathContext *': 'xmlXPathFreeContext', + 'xmlXPathParserContext *': 'xmlXPathFreeParserContext', + 'xmlXPathObject *': 'xmlXPathFreeObject', } blockList = { @@ -194,7 +194,7 @@ for file in os.listdir(xmlDocDir): dtor = dtors.get(rtype) if dtor is not None: code = f'{dtor}({code})' - elif rtype == 'xmlHashTablePtr': + elif rtype == 'xmlHashTable *': code = f'xmlHashFree({code}, NULL)' mmfunc[name] = f' {code};\n' diff --git a/tools/genUnicode.py b/codegen/genUnicode.py similarity index 90% rename from tools/genUnicode.py rename to codegen/genUnicode.py index 67fef622..de881f43 100755 --- a/tools/genUnicode.py +++ b/codegen/genUnicode.py @@ -11,10 +11,8 @@ # import sys import string -import time webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html" -sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt" # # blockAliases is a small hack - it is used for mapping block names which @@ -31,7 +29,8 @@ blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," + # number, inline comparisons are generated minTableSize = 8 -(blockfile, catfile) = sources.split() +blockfile = "Blocks-4.0.1.txt" +catfile = "UnicodeData-4.0.1.txt" # @@ -197,8 +196,6 @@ except: print("Failed to open xmlunicode.c") sys.exit(1) -date = time.asctime(time.localtime(time.time())) - output.write( """/* * xmlunicode.c: this module implements the Unicode character APIs @@ -207,9 +204,6 @@ output.write( * UCS description files of the Unicode Character Database * %s * using the genUnicode.py Python script. - * - * Generation date: %s - * Sources: %s */ #define IN_LIBXML @@ -238,7 +232,7 @@ typedef struct { static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname); -""" % (webpage, date, sources)); +""" % webpage); # # For any categories with more than minTableSize ranges we generate @@ -281,13 +275,11 @@ for name in ckeys: output.write( """/** - * xmlUnicodeLookup: - * @tptr: pointer to the name table - * @tname: name to be found - * * binary table lookup for user-supplied name * - * Returns pointer to range function if found, otherwise NULL + * @param tptr pointer to the name table + * @param tname name to be found + * @returns pointer to range function if found, otherwise NULL */ static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) { @@ -316,10 +308,10 @@ static xmlIntFunc for block in bkeys: name = block.replace('-', '') - output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name)) - output.write(" *\n * Check whether the character is part of %s UCS Block\n"% + output.write("/**\n * Check whether the character is part of %s UCS Block\n"% (block)) - output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); + output.write(" *\n * @param code UCS code point\n") + output.write(" * @returns 1 if true 0 otherwise\n */\n"); output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name) flag = 0 for (start, end) in BlockNames[block]: @@ -332,10 +324,10 @@ for block in bkeys: for name in ckeys: ranges = Categories[name] - output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name)) - output.write(" *\n * Check whether the character is part of %s UCS Category\n"% + output.write("/**\n * Check whether the character is part of %s UCS Category\n"% (name)) - output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); + output.write(" *\n * @param code UCS code point\n") + output.write(" * @returns 1 if true 0 otherwise\n */\n"); output.write("int\nxmlUCSIsCat%s(int code) {\n" % name) if len(Categories[name]) > minTableSize: output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)" @@ -385,13 +377,11 @@ static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s}; static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s}; /** - * xmlUCSIsBlock: - * @code: UCS code point - * @block: UCS block name - * * Check whether the character is part of the UCS Block * - * Returns 1 if true, 0 if false and -1 on unknown block + * @param code UCS code point + * @param block UCS block name + * @returns 1 if true, 0 if false and -1 on unknown block */ int xmlUCSIsBlock(int code, const char *block) { @@ -404,13 +394,11 @@ xmlUCSIsBlock(int code, const char *block) { } /** - * xmlUCSIsCat: - * @code: UCS code point - * @cat: UCS Category name - * * Check whether the character is part of the UCS Category * - * Returns 1 if true, 0 if false and -1 on unknown category + * @param code UCS code point + * @param cat UCS Category name + * @returns 1 if true, 0 if false and -1 on unknown category */ int xmlUCSIsCat(int code, const char *cat) { diff --git a/html5ent.inc b/codegen/html5ent.inc similarity index 100% rename from html5ent.inc rename to codegen/html5ent.inc diff --git a/tools/xmlmod.py b/codegen/xmlmod.py similarity index 100% rename from tools/xmlmod.py rename to codegen/xmlmod.py diff --git a/encoding.c b/encoding.c index 659f93a3..4e4b6359 100644 --- a/encoding.c +++ b/encoding.c @@ -279,7 +279,7 @@ UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen, #define UTF8ToHtmlWrapper NULL #endif -#include "iso8859x.inc" +#include "codegen/charset.inc" static xmlCharEncError EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen, diff --git a/python/generator.py b/python/generator.py index 63c48446..112367b6 100755 --- a/python/generator.py +++ b/python/generator.py @@ -350,7 +350,7 @@ skipped_types = { import os import xml.etree.ElementTree as etree -sys.path.append(srcPref + '/../tools') +sys.path.append(srcPref + '/../codegen') import xmlmod xmlDocDir = dstPref + '/../doc/xml' diff --git a/tools/genEscape.py b/tools/genEscape.py deleted file mode 100755 index fbd12c90..00000000 --- a/tools/genEscape.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 - -entities = [ - [ '', '�' ], - [ '\t', ' ' ], - [ '\n', ' ' ], - [ '\r', ' ' ], - [ '"', '"' ], - [ '&', '&' ], - [ '<', '<' ], - [ '>', '>' ], -] - -### xmlEscapeContent - -offset = [ None ] * 128 -pos = 0 -r = '' - -for rec in entities: - char, repl = rec - - if char: - offset[ord(char)] = pos - - if pos % 12 == 0: r += '\n ' - else: r += ' ' - r += '%3d,' % len(repl) - pos += 1 - - for c in repl: - if pos % 12 == 0: r += '\n ' - else: r += ' ' - r += "'%s'," % c - pos += 1 - -print('static const char xmlEscapeContent[] = {%s\n};\n' % r) - -def gen_tab(name, escape, is_xml): - r = '' - - for i in range(0x80): - - if chr(i) in escape: - v = offset[i] - elif i == 0: - v = 0 - elif is_xml and i < 32 and i != 9 and i != 10: - v = 0 - else: - v = -1 - - if i % 16 == 0: r += '\n ' - else: r += ' ' - r += '%2d,' % v - - print('static const signed char %s[128] = {%s\n};\n' % (name, r)) - -gen_tab('xmlEscapeTab', '\r&<>', True) -gen_tab('xmlEscapeTabQuot', '\r"&<>', True) -gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True) - -print('#ifdef LIBXML_HTML_ENABLED\n') -gen_tab('htmlEscapeTab', '&<>', False) -gen_tab('htmlEscapeTabAttr', '"&', False) -print('#endif /* LIBXML_HTML_ENABLED */') diff --git a/xmlIO.c b/xmlIO.c index 6c9449b2..7f548020 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -159,76 +159,7 @@ xmlSerializeHexCharRef(char *buf, int val) { return(out - buf); } -/* - * Tables generated with tools/genEscape.py - */ - -static const char xmlEscapeContent[] = { - 8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#', - '9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1', - '3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a', - 'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't', - ';', -}; - -static const signed char xmlEscapeTab[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -static const signed char xmlEscapeTabQuot[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -static const signed char xmlEscapeTabAttr[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -#ifdef LIBXML_HTML_ENABLED - -static const signed char htmlEscapeTab[128] = { - 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -static const signed char htmlEscapeTabAttr[128] = { - 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -#endif /* LIBXML_HTML_ENABLED */ +#include "codegen/escape.inc" /* * @param text input text