mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	xmlUnicodeBlocks is logically const but was not marked as such. This fixes that, thus moving it to the read-only data segment.
		
			
				
	
	
		
			479 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			479 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/python -u
 | |
| #
 | |
| # Original script modified in November 2003 to take advantage of
 | |
| # the character-validation range routines, and updated to the
 | |
| # current Unicode information (Version 4.0.1)
 | |
| #
 | |
| # NOTE: there is an 'alias' facility for blocks which are not present in
 | |
| #	the current release, but are needed for ABI compatibility.  This
 | |
| #	must be accomplished MANUALLY!  Please see the comments below under
 | |
| #     'blockAliases'
 | |
| #
 | |
| import sys
 | |
| import string
 | |
| import time
 | |
| 
 | |
| webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html"
 | |
| sources = "Blocks-4.0.1.txt UnicodeData-4.0.1.txt"
 | |
| 
 | |
| #
 | |
| # blockAliases is a small hack - it is used for mapping block names which
 | |
| # were were used in the 3.1 release, but are missing or changed in the current
 | |
| # release.  The format is "OldBlockName:NewBlockName1[,NewBlockName2[,...]]"
 | |
| blockAliases = []
 | |
| blockAliases.append("CombiningMarksforSymbols:CombiningDiacriticalMarksforSymbols")
 | |
| blockAliases.append("Greek:GreekandCoptic")
 | |
| blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," + 
 | |
| 	"SupplementaryPrivateUseArea-B")
 | |
| 
 | |
| # minTableSize gives the minimum number of ranges which must be present
 | |
| # before a range table is produced.  If there are less than this
 | |
| # number, inline comparisons are generated
 | |
| minTableSize = 8
 | |
| 
 | |
| (blockfile, catfile) = string.split(sources)
 | |
| 
 | |
| 
 | |
| #
 | |
| # Now process the "blocks" file, reducing it to a dictionary
 | |
| # indexed by blockname, containing a tuple with the applicable
 | |
| # block range
 | |
| #
 | |
| BlockNames = {}
 | |
| try:
 | |
|     blocks = open(blockfile, "r")
 | |
| except:
 | |
|     print "Missing %s, aborting ..." % blockfile
 | |
|     sys.exit(1)
 | |
| 
 | |
| for line in blocks.readlines():
 | |
|     if line[0] == '#':
 | |
|         continue
 | |
|     line = string.strip(line)
 | |
|     if line == '':
 | |
|         continue
 | |
|     try:
 | |
|         fields = string.split(line, ';')
 | |
|         range = string.strip(fields[0])
 | |
|         (start, end) = string.split(range, "..")
 | |
|         name = string.strip(fields[1])
 | |
|         name = string.replace(name, ' ', '')
 | |
|     except:
 | |
|         print "Failed to process line: %s" % (line)
 | |
|         continue
 | |
|     start = "0x" + start
 | |
|     end = "0x" + end
 | |
|     try:
 | |
|         BlockNames[name].append((start, end))
 | |
|     except:
 | |
|         BlockNames[name] = [(start, end)]
 | |
| blocks.close()
 | |
| print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
 | |
| 
 | |
| for block in blockAliases:
 | |
|     alias = string.split(block,':')
 | |
|     alist = string.split(alias[1],',')
 | |
|     for comp in alist:
 | |
|         if BlockNames.has_key(comp):
 | |
|             if alias[0] not in BlockNames:
 | |
|                 BlockNames[alias[0]] = []
 | |
|             for r in BlockNames[comp]:
 | |
|                 BlockNames[alias[0]].append(r)
 | |
|         else:
 | |
|             print "Alias %s: %s not in Blocks" % (alias[0], comp)
 | |
|             continue
 | |
| 
 | |
| #
 | |
| # Next process the Categories file. This is more complex, since
 | |
| # the file is in code sequence, and we need to invert it.  We use
 | |
| # a dictionary with index category-name, with each entry containing
 | |
| # all the ranges (codepoints) of that category.  Note that category
 | |
| # names comprise two parts - the general category, and the "subclass"
 | |
| # within that category.  Therefore, both "general category" (which is
 | |
| # the first character of the 2-character category-name) and the full
 | |
| # (2-character) name are entered into this dictionary.
 | |
| #
 | |
| try:
 | |
|     data = open(catfile, "r")
 | |
| except:
 | |
|     print "Missing %s, aborting ..." % catfile
 | |
|     sys.exit(1)
 | |
| 
 | |
| nbchar = 0;
 | |
| Categories = {}
 | |
| for line in data.readlines():
 | |
|     if line[0] == '#':
 | |
|         continue
 | |
|     line = string.strip(line)
 | |
|     if line == '':
 | |
|         continue
 | |
|     try:
 | |
|         fields = string.split(line, ';')
 | |
|         point = string.strip(fields[0])
 | |
|         value = 0
 | |
|         while point != '':
 | |
|             value = value * 16
 | |
|             if point[0] >= '0' and point[0] <= '9':
 | |
|                 value = value + ord(point[0]) - ord('0')
 | |
|             elif point[0] >= 'A' and point[0] <= 'F':
 | |
|                 value = value + 10 + ord(point[0]) - ord('A')
 | |
|             elif point[0] >= 'a' and point[0] <= 'f':
 | |
|                 value = value + 10 + ord(point[0]) - ord('a')
 | |
|             point = point[1:]
 | |
|         name = fields[2]
 | |
|     except:
 | |
|         print "Failed to process line: %s" % (line)
 | |
|         continue
 | |
|     
 | |
|     nbchar = nbchar + 1
 | |
|     # update entry for "full name"
 | |
|     try:
 | |
|         Categories[name].append(value)
 | |
|     except:
 | |
|         try:
 | |
|             Categories[name] = [value]
 | |
|         except:
 | |
|             print "Failed to process line: %s" % (line)
 | |
|     # update "general category" name
 | |
|     try:
 | |
|         Categories[name[0]].append(value)
 | |
|     except:
 | |
|         try:
 | |
|             Categories[name[0]] = [value]
 | |
|         except:
 | |
|             print "Failed to process line: %s" % (line)
 | |
| 
 | |
| blocks.close()
 | |
| print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
 | |
| 
 | |
| #
 | |
| # The data is now all read.  Time to process it into a more useful form.
 | |
| #
 | |
| # reduce the number list into ranges
 | |
| for cat in Categories.keys():
 | |
|     list = Categories[cat]
 | |
|     start = -1
 | |
|     prev = -1
 | |
|     end = -1
 | |
|     ranges = []
 | |
|     for val in list:
 | |
|         if start == -1:
 | |
|             start = val
 | |
|             prev = val
 | |
|             continue
 | |
|         elif val == prev + 1:
 | |
|             prev = val
 | |
|             continue
 | |
|         elif prev == start:
 | |
|             ranges.append((prev, prev))
 | |
|             start = val
 | |
|             prev = val
 | |
|             continue
 | |
|         else:
 | |
|             ranges.append((start, prev))
 | |
|             start = val
 | |
|             prev = val
 | |
|             continue
 | |
|     if prev == start:
 | |
|         ranges.append((prev, prev))
 | |
|     else:
 | |
|         ranges.append((start, prev))
 | |
|     Categories[cat] = ranges
 | |
| 
 | |
| #
 | |
| # Assure all data is in alphabetic order, since we will be doing binary
 | |
| # searches on the tables.
 | |
| #
 | |
| bkeys = BlockNames.keys()
 | |
| bkeys.sort()
 | |
| 
 | |
| ckeys = Categories.keys()
 | |
| ckeys.sort()
 | |
| 
 | |
| #
 | |
| # Generate the resulting files
 | |
| #
 | |
| try:
 | |
|     header = open("include/libxml/xmlunicode.h", "w")
 | |
| except:
 | |
|     print "Failed to open include/libxml/xmlunicode.h"
 | |
|     sys.exit(1)
 | |
| 
 | |
| try:
 | |
|     output = open("xmlunicode.c", "w")
 | |
| except:
 | |
|     print "Failed to open xmlunicode.c"
 | |
|     sys.exit(1)
 | |
| 
 | |
| date = time.asctime(time.localtime(time.time()))
 | |
| 
 | |
| header.write(
 | |
| """/*
 | |
|  * Summary: Unicode character APIs
 | |
|  * Description: API for the Unicode character APIs
 | |
|  *
 | |
|  * This file is automatically generated from the
 | |
|  * UCS description files of the Unicode Character Database
 | |
|  * %s
 | |
|  * using the genUnicode.py Python script.
 | |
|  *
 | |
|  * Generation date: %s
 | |
|  * Sources: %s
 | |
|  * Author: Daniel Veillard
 | |
|  */
 | |
| 
 | |
| #ifndef __XML_UNICODE_H__
 | |
| #define __XML_UNICODE_H__
 | |
| 
 | |
| #include <libxml/xmlversion.h>
 | |
| 
 | |
| #ifdef LIBXML_UNICODE_ENABLED
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| """ % (webpage, date, sources));
 | |
| 
 | |
| output.write(
 | |
| """/*
 | |
|  * xmlunicode.c: this module implements the Unicode character APIs
 | |
|  *
 | |
|  * This file is automatically generated from the
 | |
|  * UCS description files of the Unicode Character Database
 | |
|  * %s
 | |
|  * using the genUnicode.py Python script.
 | |
|  *
 | |
|  * Generation date: %s
 | |
|  * Sources: %s
 | |
|  * Daniel Veillard <veillard@redhat.com>
 | |
|  */
 | |
| 
 | |
| #define IN_LIBXML
 | |
| #include "libxml.h"
 | |
| 
 | |
| #ifdef LIBXML_UNICODE_ENABLED
 | |
| 
 | |
| #include <string.h>
 | |
| #include <libxml/xmlversion.h>
 | |
| #include <libxml/xmlunicode.h>
 | |
| #include <libxml/chvalid.h>
 | |
| 
 | |
| typedef int (xmlIntFunc)(int);	/* just to keep one's mind untwisted */
 | |
| 
 | |
| typedef struct {
 | |
|     const char *rangename;
 | |
|     xmlIntFunc *func;
 | |
| } xmlUnicodeRange;
 | |
| 
 | |
| typedef struct {
 | |
|     const xmlUnicodeRange *table;
 | |
|     int		    numentries;
 | |
| } xmlUnicodeNameTable;
 | |
| 
 | |
| 
 | |
| static xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname);
 | |
| 
 | |
| static const xmlUnicodeRange xmlUnicodeBlocks[] = {
 | |
| """ % (webpage, date, sources));
 | |
| 
 | |
| flag = 0
 | |
| for block in bkeys:
 | |
|     name = string.replace(block, '-', '')
 | |
|     if flag:
 | |
|         output.write(',\n')
 | |
|     else:
 | |
|         flag = 1
 | |
|     output.write('  {"%s", xmlUCSIs%s}' % (block, name))
 | |
| output.write('};\n\n')
 | |
| 
 | |
| output.write('static xmlUnicodeRange xmlUnicodeCats[] = {\n')
 | |
| flag = 0;
 | |
| for name in ckeys:
 | |
|     if flag:
 | |
|         output.write(',\n')
 | |
|     else:
 | |
|         flag = 1
 | |
|     output.write('  {"%s", xmlUCSIsCat%s}' % (name, name))
 | |
| output.write('};\n\n')
 | |
| 
 | |
| #
 | |
| # For any categories with more than minTableSize ranges we generate
 | |
| # a range table suitable for xmlCharInRange
 | |
| #
 | |
| for name in ckeys:
 | |
|   if len(Categories[name]) > minTableSize:
 | |
|     numshort = 0
 | |
|     numlong = 0
 | |
|     ranges = Categories[name]
 | |
|     sptr = "NULL"
 | |
|     lptr = "NULL"
 | |
|     for range in ranges:
 | |
|       (low, high) = range
 | |
|       if high < 0x10000:
 | |
|         if numshort == 0:
 | |
|           pline = "static const xmlChSRange xml%sS[] = {" % name
 | |
|           sptr = "xml%sS" % name
 | |
|         else:
 | |
|           pline += ", "
 | |
|         numshort += 1
 | |
|       else:
 | |
|         if numlong == 0:
 | |
|           if numshort > 0:
 | |
|             output.write(pline + " };\n")
 | |
|           pline = "static const xmlChLRange xml%sL[] = {" % name
 | |
|           lptr = "xml%sL" % name
 | |
|         else:
 | |
|           pline += ", "
 | |
|         numlong += 1
 | |
|       if len(pline) > 60:
 | |
|         output.write(pline + "\n")
 | |
|         pline = "    "
 | |
|       pline += "{%s, %s}" % (hex(low), hex(high))
 | |
|     output.write(pline + " };\nstatic xmlChRangeGroup xml%sG = {%s,%s,%s,%s};\n\n"
 | |
|          % (name, numshort, numlong, sptr, lptr))
 | |
| 
 | |
| 
 | |
| output.write(
 | |
| """static xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
 | |
| static xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
 | |
| 
 | |
| /**
 | |
|  * xmlUnicodeLookup:
 | |
|  * @tptr: pointer to the name table
 | |
|  * @name: name to be found
 | |
|  *
 | |
|  * binary table lookup for user-supplied name
 | |
|  *
 | |
|  * Returns pointer to range function if found, otherwise NULL
 | |
|  */
 | |
| static xmlIntFunc
 | |
| *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) {
 | |
|     int low, high, mid, cmp;
 | |
|     xmlUnicodeRange *sptr;
 | |
| 
 | |
|     if ((tptr == NULL) || (tname == NULL)) return(NULL);
 | |
| 
 | |
|     low = 0;
 | |
|     high = tptr->numentries - 1;
 | |
|     sptr = tptr->table;
 | |
|     while (low <= high) {
 | |
| 	mid = (low + high) / 2;
 | |
| 	if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0)
 | |
| 	    return (sptr[mid].func);
 | |
| 	if (cmp < 0)
 | |
| 	    high = mid - 1;
 | |
| 	else
 | |
| 	    low = mid + 1;
 | |
|     }
 | |
|     return (NULL);    
 | |
| }
 | |
| 
 | |
| """ % (len(BlockNames), len(Categories)) )
 | |
| 
 | |
| for block in bkeys:
 | |
|     name = string.replace(block, '-', '')
 | |
|     header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
 | |
|     output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
 | |
|     output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
 | |
|                  (block))
 | |
|     output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
 | |
|     output.write("int\nxmlUCSIs%s(int code) {\n    return(" % name)
 | |
|     flag = 0
 | |
|     for (start, end) in BlockNames[block]:
 | |
|         if flag:
 | |
|             output.write(" ||\n           ")
 | |
|         else:
 | |
|             flag = 1
 | |
|         output.write("((code >= %s) && (code <= %s))" % (start, end))
 | |
|     output.write(");\n}\n\n")
 | |
| 
 | |
| header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code, const char *block);\n\n")
 | |
| output.write(
 | |
| """/**
 | |
|  * xmlUCSIsBlock:
 | |
|  * @code: UCS code point
 | |
|  * @block: UCS block name
 | |
|  *
 | |
|  * Check whether the character is part of the UCS Block
 | |
|  *
 | |
|  * Returns 1 if true, 0 if false and -1 on unknown block
 | |
|  */
 | |
| int
 | |
| xmlUCSIsBlock(int code, const char *block) {
 | |
|     xmlIntFunc *func;
 | |
| 
 | |
|     func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
 | |
|     if (func == NULL)
 | |
| 	return (-1);
 | |
|     return (func(code));
 | |
| }
 | |
| 
 | |
| """)
 | |
| 
 | |
| for name in ckeys:
 | |
|     ranges = Categories[name]
 | |
|     header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name)
 | |
|     output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
 | |
|     output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
 | |
|                  (name))
 | |
|     output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
 | |
|     output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
 | |
|     if len(Categories[name]) > minTableSize:
 | |
|         output.write("    return(xmlCharInRange((unsigned int)code, &xml%sG)"
 | |
|             % name)
 | |
|     else:
 | |
|         start = 1
 | |
|         for range in ranges:
 | |
|             (begin, end) = range;
 | |
|             if start:
 | |
|                 output.write("    return(");
 | |
|                 start = 0
 | |
|             else:
 | |
|                 output.write(" ||\n           ");
 | |
|             if (begin == end):
 | |
|                 output.write("(code == %s)" % (hex(begin)))
 | |
|             else:
 | |
|                 output.write("((code >= %s) && (code <= %s))" % (
 | |
|                          hex(begin), hex(end)))
 | |
|     output.write(");\n}\n\n")
 | |
| 
 | |
| header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code, const char *cat);\n")
 | |
| output.write(
 | |
| """/**
 | |
|  * xmlUCSIsCat:
 | |
|  * @code: UCS code point
 | |
|  * @cat: UCS Category name
 | |
|  *
 | |
|  * Check whether the character is part of the UCS Category
 | |
|  *
 | |
|  * Returns 1 if true, 0 if false and -1 on unknown category
 | |
|  */
 | |
| int
 | |
| xmlUCSIsCat(int code, const char *cat) {
 | |
|     xmlIntFunc *func;
 | |
| 
 | |
|     func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
 | |
|     if (func == NULL)
 | |
| 	return (-1);
 | |
|     return (func(code));
 | |
| }
 | |
| 
 | |
| #define bottom_xmlunicode
 | |
| #include "elfgcchack.h"
 | |
| #endif /* LIBXML_UNICODE_ENABLED */
 | |
| """)
 | |
| 
 | |
| header.write("""
 | |
| #ifdef __cplusplus
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif /* LIBXML_UNICODE_ENABLED */
 | |
| 
 | |
| #endif /* __XML_UNICODE_H__ */
 | |
| """);
 | |
| 
 | |
| header.close()
 | |
| output.close()
 |