diff --git a/HTMLparser.c b/HTMLparser.c index 35e162a4..9a8e2c05 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2432,7 +2432,7 @@ htmlCodePointToUtf8(int c, xmlChar *out, int *osize) { return(out); } -#include "html5ent.inc" +#include "codegen/html5ent.inc" #define ENT_F_SEMICOLON 0x80u #define ENT_F_SUBTABLE 0x40u diff --git a/Makefile.am b/Makefile.am index ae877617..c07147ea 100644 --- a/Makefile.am +++ b/Makefile.am @@ -154,7 +154,7 @@ testdso_la_LDFLAGS = $(AM_LDFLAGS) \ -module -no-undefined -avoid-version -rpath $(libdir) rebuild_testapi: - cd $(srcdir) && python3 tools/gentest.py $(abs_builddir) + cd $(srcdir) && python3 codegen/genTestApi.py $(abs_builddir) testapi_SOURCES=testapi.c testapi_DEPENDENCIES = $(DEPS) @@ -201,9 +201,18 @@ CLEANFILES = missing.lst runsuite.log runxmlconf.log test.out \ EXTRA_DIST = Copyright libxml2-config.cmake.in autogen.sh \ libxml.h \ - html5ent.inc iso8859x.inc \ - tools/gentest.py tools/xmlmod.py \ - tools/genChRanges.py tools/genEscape.py tools/genUnicode.py \ + codegen/charset.inc \ + codegen/chvalid.def \ + codegen/escape.inc \ + codegen/genCharset.py \ + codegen/genChRanges.py \ + codegen/genEscape.py \ + codegen/genHtml5Ent.py \ + codegen/genHtml5LibTests.py \ + codegen/genTestApi.py \ + codegen/genUnicode.py \ + codegen/html5ent.inc \ + codegen/xmlmod.py \ timsort.h \ README.zOS README.md \ CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \ diff --git a/iso8859x.inc b/codegen/charset.inc similarity index 100% rename from iso8859x.inc rename to codegen/charset.inc diff --git a/chvalid.def b/codegen/chvalid.def similarity index 100% rename from chvalid.def rename to codegen/chvalid.def diff --git a/codegen/escape.inc b/codegen/escape.inc new file mode 100644 index 00000000..a51660af --- /dev/null +++ b/codegen/escape.inc @@ -0,0 +1,66 @@ +static const char xmlEscapeContent[] = { + 8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#', + '9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1', + '3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a', + 'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't', + ';', +}; + +static const signed char xmlEscapeTab[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static const signed char xmlEscapeTabQuot[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static const signed char xmlEscapeTabAttr[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +#ifdef LIBXML_HTML_ENABLED + +static const signed char htmlEscapeTab[128] = { + 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static const signed char htmlEscapeTabAttr[128] = { + 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +#endif /* LIBXML_HTML_ENABLED */ diff --git a/tools/genChRanges.py b/codegen/genChRanges.py similarity index 94% rename from tools/genChRanges.py rename to codegen/genChRanges.py index 567b31b4..3e58b70d 100755 --- a/tools/genChRanges.py +++ b/codegen/genChRanges.py @@ -16,7 +16,6 @@ # import sys -import time # # A routine to take a list of yes/no (1, 0) values and turn it @@ -41,8 +40,6 @@ def makeRange(lst): pos = e + 1 # ready to check for next range return ret -sources = "chvalid.def" # input filename - # minTableSize gives the minimum number of ranges which must be present # before a 256-byte lookup table is produced. If there are less than this # number, a macro with inline comparisons is generated @@ -54,9 +51,9 @@ Functs = {} state = 0 try: - defines = open("chvalid.def", "r") + defines = open("codegen/chvalid.def", "r") except: - print("Missing chvalid.def, aborting ...") + print("Missing codegen/chvalid.def, aborting ...") sys.exit(1) # @@ -202,19 +199,19 @@ except: print("Failed to open chvalid.c") sys.exit(1) -date = time.asctime(time.localtime(time.time())) +fkeys = sorted(Functs.keys()) header.write( -"""/* - * Summary: Unicode character range checking - * Description: this module exports interfaces for the character +"""/** + * @file + * + * @brief Unicode character range checking + * + * this module exports interfaces for the character * range validation APIs * * This file is automatically generated from the cvs source * definition files using the genChRanges.py Python script - * - * Generation date: %s - * Sources: %s */ #ifndef __XML_CHVALID_H__ @@ -227,6 +224,8 @@ header.write( extern "C" { #endif +/** @cond ignore */ + /* * Define our typedefs and structures * @@ -254,13 +253,27 @@ struct _xmlChRangeGroup { const xmlChLRange\t*longRange; }; +"""); + +for f in fkeys: + if len(Functs[f][1]) > 0: + header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) + if max(Functs[f][0]) > 0: # only check if at least one entry + rangeTable = makeRange(Functs[f][0]) + numRanges = len(rangeTable) + if numRanges >= minTableSize: # table is worthwhile + header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) + +header.write(""" /** * Range checking routine */ XMLPUBFUN int \t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group); -""" % (date, sources)); +/** @endcond */ +"""); + output.write( """/* * chvalid.c:\tthis module implements the character range @@ -268,9 +281,6 @@ output.write( * * This file is automatically generated from the cvs source * definition files using the genChRanges.py Python script - * - * Generation date: %s - * Sources: %s */ #define IN_LIBXML @@ -287,7 +297,7 @@ output.write( * allowed. * */ -""" % (date, sources)); +"""); # # Now output the generated data. @@ -298,8 +308,6 @@ output.write( # compares, otherwise we output a 256-byte table and a macro to use it. # -fkeys = sorted(Functs.keys()) - for f in fkeys: # First we convert the specified single-byte values into a group of ranges. @@ -310,15 +318,13 @@ for f in fkeys: rangeTable = makeRange(Functs[f][0]) numRanges = len(rangeTable) if numRanges >= minTableSize: # table is worthwhile - header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) header.write(""" /** - * %s_ch: - * @c: char to validate - * * Automatically generated by genChRanges.py + * + * @param c char to validate */ -""" % f) +""") header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) # write the constant data to the code file @@ -343,12 +349,11 @@ for f in fkeys: header.write(""" /** - * %s_ch: - * @c: char to validate - * * Automatically generated by genChRanges.py + * + * @param c char to validate */ -""" % f) +""") # okay, I'm tired of the messy lineup - let's automate it! pline = "#define %s_ch(c)" % f # 'ntab' is number of tabs needed to position to col. 33 from name end @@ -378,12 +383,11 @@ for f in fkeys: header.write(""" /** - * %sQ: - * @c: char to validate - * * Automatically generated by genChRanges.py + * + * @param c char to validate */ -""" % f) +""") pline = "#define %sQ(c)" % f ntab = 4 - (len(pline)) // 8 if ntab < 0: @@ -403,7 +407,7 @@ for f in fkeys: header.write(" 0)\n\n") else: if numRanges >= minTableSize: - header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f) + header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n" % f) else: # if < minTableSize, generate inline code firstFlag = 1 for rg in Functs[f][1]: @@ -417,14 +421,10 @@ for f in fkeys: else: # value range pline += "((0x%x <= (c)) &&" % rg[0] pline += " ((c) <= 0x%x))" % rg[1] - pline += "))\n\n" + pline += "))\n" header.write(pline) - if len(Functs[f][1]) > 0: - header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) - - # # Next we do the unicode ranges # @@ -477,14 +477,12 @@ for f in fkeys: output.write( """ /** - * xmlCharInRange: - * @val: character to be validated - * @rptr: pointer to range to be used to validate - * * Does a binary search of the range table to determine if char * is valid * - * Returns: true if character valid, false otherwise + * @param val character to be validated + * @param rptr pointer to range to be used to validate + * @returns true if character valid, false otherwise */ int xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { @@ -542,18 +540,16 @@ xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { for f in fkeys: output.write(""" /** - * %s: - * @ch: character to validate - * * This function is DEPRECATED. -""" % f); +"""); if max(Functs[f][0]) > 0: - output.write(" * Use %s_ch or %sQ instead" % (f, f)) + output.write(" * Use %s_ch() or %sQ() instead" % (f, f)) else: - output.write(" * Use %sQ instead" % f) + output.write(" * Use %sQ() instead" % f) output.write(""" * - * Returns true if argument valid, false otherwise + * @param ch character to validate + * @returns true if argument valid, false otherwise */ """) output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f)) diff --git a/tools/genTranscode.py b/codegen/genCharset.py similarity index 99% rename from tools/genTranscode.py rename to codegen/genCharset.py index 7a5168ae..7f73c48f 100755 --- a/tools/genTranscode.py +++ b/codegen/genCharset.py @@ -73,7 +73,7 @@ def genTranscodeTable(out, name, chars): printHexTable(out, 2, data) out.write('};\n\n') -out = open(f'iso8859x.inc', 'w') +out = open(f'codegen/charset.inc', 'w') out.write('''/* * Lookup tables for transcoding of 8-bit character sets. diff --git a/codegen/genEscape.py b/codegen/genEscape.py new file mode 100755 index 00000000..03bf383d --- /dev/null +++ b/codegen/genEscape.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +entities = [ + [ '', '�' ], + [ '\t', ' ' ], + [ '\n', ' ' ], + [ '\r', ' ' ], + [ '"', '"' ], + [ '&', '&' ], + [ '<', '<' ], + [ '>', '>' ], +] + +offset = [ None ] * 128 + +def gen_content(out): + pos = 0 + r = '' + + for rec in entities: + char, repl = rec + + if char: + offset[ord(char)] = pos + + if pos % 12 == 0: r += '\n ' + else: r += ' ' + r += '%3d,' % len(repl) + pos += 1 + + for c in repl: + if pos % 12 == 0: r += '\n ' + else: r += ' ' + r += "'%s'," % c + pos += 1 + + out.write('static const char xmlEscapeContent[] = {%s\n};\n\n' % r) + +def gen_tab(out, name, escape, is_xml): + r = '' + + for i in range(0x80): + + if chr(i) in escape: + v = offset[i] + elif i == 0: + v = 0 + elif is_xml and i < 32 and i != 9 and i != 10: + v = 0 + else: + v = -1 + + if i % 16 == 0: r += '\n ' + else: r += ' ' + r += '%2d,' % v + + out.write('static const signed char %s[128] = {%s\n};\n\n' % (name, r)) + +with open('codegen/escape.inc', 'w') as out: + gen_content(out) + + gen_tab(out, 'xmlEscapeTab', '\r&<>', True) + gen_tab(out, 'xmlEscapeTabQuot', '\r"&<>', True) + gen_tab(out, 'xmlEscapeTabAttr', '\t\n\r"&<>', True) + + out.write('#ifdef LIBXML_HTML_ENABLED\n\n') + gen_tab(out, 'htmlEscapeTab', '&<>', False) + gen_tab(out, 'htmlEscapeTabAttr', '"&', False) + out.write('#endif /* LIBXML_HTML_ENABLED */\n') diff --git a/tools/genHtmlEnt.py b/codegen/genHtml5Ent.py similarity index 93% rename from tools/genHtmlEnt.py rename to codegen/genHtml5Ent.py index f87a570f..e3be9f1d 100755 --- a/tools/genHtmlEnt.py +++ b/codegen/genHtml5Ent.py @@ -162,8 +162,9 @@ def gen_table(ctype, cname, values, fmt, elems_per_line): else: r += ' ' r += fmt % values[i] - return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n' + return f'static const {ctype} {cname}[{count}] = {{{r}\n}};\n\n' -print(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15)) -print(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10)) -print(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15)) +with open('codegen/html5ent.inc', 'w') as out: + out.write(gen_table('unsigned char', 'htmlEntAlpha', alpha, '%3d', 15)) + out.write(gen_table('unsigned short', 'htmlEntValues', values, '%5d', 10)) + out.write(gen_table('unsigned char', 'htmlEntStrings', strings, '%3s', 15)) diff --git a/tools/genHtml5LibTests.py b/codegen/genHtml5LibTests.py old mode 100644 new mode 100755 similarity index 100% rename from tools/genHtml5LibTests.py rename to codegen/genHtml5LibTests.py diff --git a/tools/gentest.py b/codegen/genTestApi.py old mode 100644 new mode 100755 similarity index 81% rename from tools/gentest.py rename to codegen/genTestApi.py index 8efa03a6..ba9f688c --- a/tools/gentest.py +++ b/codegen/genTestApi.py @@ -13,40 +13,40 @@ import xmlmod # Globals dtors = { - 'htmlDocPtr': 'xmlFreeDoc', - 'htmlParserCtxtPtr': 'htmlFreeParserCtxt', - 'xmlAutomataPtr': 'xmlFreeAutomata', - 'xmlBufferPtr': 'xmlBufferFree', - 'xmlCatalogPtr': 'xmlFreeCatalog', + 'htmlDoc *': 'xmlFreeDoc', + 'htmlParserCtxt *': 'htmlFreeParserCtxt', + 'xmlAutomata *': 'xmlFreeAutomata', + 'xmlBuffer *': 'xmlBufferFree', + 'xmlCatalog *': 'xmlFreeCatalog', 'xmlChar *': 'xmlFree', - 'xmlDOMWrapCtxtPtr': 'xmlDOMWrapFreeCtxt', - 'xmlDictPtr': 'xmlDictFree', - 'xmlDocPtr': 'xmlFreeDoc', - 'xmlDtdPtr': 'xmlFreeDtd', - 'xmlEntitiesTablePtr': 'xmlFreeEntitiesTable', - 'xmlEnumerationPtr': 'xmlFreeEnumeration', - 'xmlListPtr': 'xmlListDelete', - 'xmlModulePtr': 'xmlModuleFree', - 'xmlMutexPtr': 'xmlFreeMutex', - 'xmlNodePtr': 'xmlFreeNode', - 'xmlNodeSetPtr': 'xmlXPathFreeNodeSet', - 'xmlNsPtr': 'xmlFreeNs', - 'xmlOutputBufferPtr': 'xmlOutputBufferClose', - 'xmlParserCtxtPtr': 'xmlFreeParserCtxt', - 'xmlParserInputBufferPtr': 'xmlFreeParserInputBuffer', - 'xmlParserInputPtr': 'xmlFreeInputStream', - 'xmlRMutexPtr': 'xmlFreeRMutex', - 'xmlRelaxNGValidCtxtPtr': 'xmlRelaxNGFreeValidCtxt', - 'xmlSaveCtxtPtr': 'xmlSaveClose', - 'xmlSchemaFacetPtr': 'xmlSchemaFreeFacet', - 'xmlSchemaValPtr': 'xmlSchemaFreeValue', - 'xmlSchemaValidCtxtPtr': 'xmlSchemaFreeValidCtxt', - 'xmlTextWriterPtr': 'xmlFreeTextWriter', - 'xmlURIPtr': 'xmlFreeURI', - 'xmlValidCtxtPtr': 'xmlFreeValidCtxt', - 'xmlXPathContextPtr': 'xmlXPathFreeContext', - 'xmlXPathParserContextPtr': 'xmlXPathFreeParserContext', - 'xmlXPathObjectPtr': 'xmlXPathFreeObject', + 'xmlDOMWrapCtxt *': 'xmlDOMWrapFreeCtxt', + 'xmlDict *': 'xmlDictFree', + 'xmlDoc *': 'xmlFreeDoc', + 'xmlDtd *': 'xmlFreeDtd', + 'xmlEntitiesTable *': 'xmlFreeEntitiesTable', + 'xmlEnumeration *': 'xmlFreeEnumeration', + 'xmlList *': 'xmlListDelete', + 'xmlModule *': 'xmlModuleFree', + 'xmlMutex *': 'xmlFreeMutex', + 'xmlNode *': 'xmlFreeNode', + 'xmlNodeSet *': 'xmlXPathFreeNodeSet', + 'xmlNs *': 'xmlFreeNs', + 'xmlOutputBuffer *': 'xmlOutputBufferClose', + 'xmlParserCtxt *': 'xmlFreeParserCtxt', + 'xmlParserInputBuffer *': 'xmlFreeParserInputBuffer', + 'xmlParserInput *': 'xmlFreeInputStream', + 'xmlRMutex *': 'xmlFreeRMutex', + 'xmlRelaxNGValidCtxt *': 'xmlRelaxNGFreeValidCtxt', + 'xmlSaveCtxt *': 'xmlSaveClose', + 'xmlSchemaFacet *': 'xmlSchemaFreeFacet', + 'xmlSchemaVal *': 'xmlSchemaFreeValue', + 'xmlSchemaValidCtxt *': 'xmlSchemaFreeValidCtxt', + 'xmlTextWriter *': 'xmlFreeTextWriter', + 'xmlURI *': 'xmlFreeURI', + 'xmlValidCtxt *': 'xmlFreeValidCtxt', + 'xmlXPathContext *': 'xmlXPathFreeContext', + 'xmlXPathParserContext *': 'xmlXPathFreeParserContext', + 'xmlXPathObject *': 'xmlXPathFreeObject', } blockList = { @@ -194,7 +194,7 @@ for file in os.listdir(xmlDocDir): dtor = dtors.get(rtype) if dtor is not None: code = f'{dtor}({code})' - elif rtype == 'xmlHashTablePtr': + elif rtype == 'xmlHashTable *': code = f'xmlHashFree({code}, NULL)' mmfunc[name] = f' {code};\n' diff --git a/tools/genUnicode.py b/codegen/genUnicode.py similarity index 90% rename from tools/genUnicode.py rename to codegen/genUnicode.py index 67fef622..de881f43 100755 --- a/tools/genUnicode.py +++ b/codegen/genUnicode.py @@ -11,10 +11,8 @@ # import sys import string -import time webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html" -sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt" # # blockAliases is a small hack - it is used for mapping block names which @@ -31,7 +29,8 @@ blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," + # number, inline comparisons are generated minTableSize = 8 -(blockfile, catfile) = sources.split() +blockfile = "Blocks-4.0.1.txt" +catfile = "UnicodeData-4.0.1.txt" # @@ -197,8 +196,6 @@ except: print("Failed to open xmlunicode.c") sys.exit(1) -date = time.asctime(time.localtime(time.time())) - output.write( """/* * xmlunicode.c: this module implements the Unicode character APIs @@ -207,9 +204,6 @@ output.write( * UCS description files of the Unicode Character Database * %s * using the genUnicode.py Python script. - * - * Generation date: %s - * Sources: %s */ #define IN_LIBXML @@ -238,7 +232,7 @@ typedef struct { static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname); -""" % (webpage, date, sources)); +""" % webpage); # # For any categories with more than minTableSize ranges we generate @@ -281,13 +275,11 @@ for name in ckeys: output.write( """/** - * xmlUnicodeLookup: - * @tptr: pointer to the name table - * @tname: name to be found - * * binary table lookup for user-supplied name * - * Returns pointer to range function if found, otherwise NULL + * @param tptr pointer to the name table + * @param tname name to be found + * @returns pointer to range function if found, otherwise NULL */ static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname) { @@ -316,10 +308,10 @@ static xmlIntFunc for block in bkeys: name = block.replace('-', '') - output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name)) - output.write(" *\n * Check whether the character is part of %s UCS Block\n"% + output.write("/**\n * Check whether the character is part of %s UCS Block\n"% (block)) - output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); + output.write(" *\n * @param code UCS code point\n") + output.write(" * @returns 1 if true 0 otherwise\n */\n"); output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name) flag = 0 for (start, end) in BlockNames[block]: @@ -332,10 +324,10 @@ for block in bkeys: for name in ckeys: ranges = Categories[name] - output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name)) - output.write(" *\n * Check whether the character is part of %s UCS Category\n"% + output.write("/**\n * Check whether the character is part of %s UCS Category\n"% (name)) - output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); + output.write(" *\n * @param code UCS code point\n") + output.write(" * @returns 1 if true 0 otherwise\n */\n"); output.write("int\nxmlUCSIsCat%s(int code) {\n" % name) if len(Categories[name]) > minTableSize: output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)" @@ -385,13 +377,11 @@ static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s}; static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s}; /** - * xmlUCSIsBlock: - * @code: UCS code point - * @block: UCS block name - * * Check whether the character is part of the UCS Block * - * Returns 1 if true, 0 if false and -1 on unknown block + * @param code UCS code point + * @param block UCS block name + * @returns 1 if true, 0 if false and -1 on unknown block */ int xmlUCSIsBlock(int code, const char *block) { @@ -404,13 +394,11 @@ xmlUCSIsBlock(int code, const char *block) { } /** - * xmlUCSIsCat: - * @code: UCS code point - * @cat: UCS Category name - * * Check whether the character is part of the UCS Category * - * Returns 1 if true, 0 if false and -1 on unknown category + * @param code UCS code point + * @param cat UCS Category name + * @returns 1 if true, 0 if false and -1 on unknown category */ int xmlUCSIsCat(int code, const char *cat) { diff --git a/html5ent.inc b/codegen/html5ent.inc similarity index 100% rename from html5ent.inc rename to codegen/html5ent.inc diff --git a/tools/xmlmod.py b/codegen/xmlmod.py similarity index 100% rename from tools/xmlmod.py rename to codegen/xmlmod.py diff --git a/encoding.c b/encoding.c index 659f93a3..4e4b6359 100644 --- a/encoding.c +++ b/encoding.c @@ -279,7 +279,7 @@ UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen, #define UTF8ToHtmlWrapper NULL #endif -#include "iso8859x.inc" +#include "codegen/charset.inc" static xmlCharEncError EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen, diff --git a/python/generator.py b/python/generator.py index 63c48446..112367b6 100755 --- a/python/generator.py +++ b/python/generator.py @@ -350,7 +350,7 @@ skipped_types = { import os import xml.etree.ElementTree as etree -sys.path.append(srcPref + '/../tools') +sys.path.append(srcPref + '/../codegen') import xmlmod xmlDocDir = dstPref + '/../doc/xml' diff --git a/tools/genEscape.py b/tools/genEscape.py deleted file mode 100755 index fbd12c90..00000000 --- a/tools/genEscape.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 - -entities = [ - [ '', '�' ], - [ '\t', ' ' ], - [ '\n', ' ' ], - [ '\r', ' ' ], - [ '"', '"' ], - [ '&', '&' ], - [ '<', '<' ], - [ '>', '>' ], -] - -### xmlEscapeContent - -offset = [ None ] * 128 -pos = 0 -r = '' - -for rec in entities: - char, repl = rec - - if char: - offset[ord(char)] = pos - - if pos % 12 == 0: r += '\n ' - else: r += ' ' - r += '%3d,' % len(repl) - pos += 1 - - for c in repl: - if pos % 12 == 0: r += '\n ' - else: r += ' ' - r += "'%s'," % c - pos += 1 - -print('static const char xmlEscapeContent[] = {%s\n};\n' % r) - -def gen_tab(name, escape, is_xml): - r = '' - - for i in range(0x80): - - if chr(i) in escape: - v = offset[i] - elif i == 0: - v = 0 - elif is_xml and i < 32 and i != 9 and i != 10: - v = 0 - else: - v = -1 - - if i % 16 == 0: r += '\n ' - else: r += ' ' - r += '%2d,' % v - - print('static const signed char %s[128] = {%s\n};\n' % (name, r)) - -gen_tab('xmlEscapeTab', '\r&<>', True) -gen_tab('xmlEscapeTabQuot', '\r"&<>', True) -gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True) - -print('#ifdef LIBXML_HTML_ENABLED\n') -gen_tab('htmlEscapeTab', '&<>', False) -gen_tab('htmlEscapeTabAttr', '"&', False) -print('#endif /* LIBXML_HTML_ENABLED */') diff --git a/xmlIO.c b/xmlIO.c index 6c9449b2..7f548020 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -159,76 +159,7 @@ xmlSerializeHexCharRef(char *buf, int val) { return(out - buf); } -/* - * Tables generated with tools/genEscape.py - */ - -static const char xmlEscapeContent[] = { - 8, '&', '#', 'x', 'F', 'F', 'F', 'D', ';', 4, '&', '#', - '9', ';', 5, '&', '#', '1', '0', ';', 5, '&', '#', '1', - '3', ';', 6, '&', 'q', 'u', 'o', 't', ';', 5, '&', 'a', - 'm', 'p', ';', 4, '&', 'l', 't', ';', 4, '&', 'g', 't', - ';', -}; - -static const signed char xmlEscapeTab[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -static const signed char xmlEscapeTabQuot[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 20, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -static const signed char xmlEscapeTabAttr[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 0, 0, 20, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -#ifdef LIBXML_HTML_ENABLED - -static const signed char htmlEscapeTab[128] = { - 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, -1, 44, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -static const signed char htmlEscapeTabAttr[128] = { - 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -}; - -#endif /* LIBXML_HTML_ENABLED */ +#include "codegen/escape.inc" /* * @param text input text