mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-26 00:37:43 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			223 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			223 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| #
 | |
| # Portions of this script have been (shamelessly) stolen from the
 | |
| # prior work of Daniel Veillard (genUnicode.py)
 | |
| #
 | |
| # I, however, take full credit for any bugs, errors or difficulties :-)
 | |
| #
 | |
| # William Brack
 | |
| # October 2003
 | |
| #
 | |
| # 18 October 2003
 | |
| # Modified to maintain binary compatibility with previous library versions
 | |
| # by adding a suffix 'Q' ('quick') to the macro generated for the original,
 | |
| # function, and adding generation of a function (with the original name) which
 | |
| # instantiates the macro.
 | |
| #
 | |
| 
 | |
| import sys
 | |
| import rangetab
 | |
| 
 | |
| #
 | |
| # A routine to take a list of yes/no (1, 0) values and turn it
 | |
| # into a list of ranges.  This will later be used to determine whether
 | |
| # to generate single-byte lookup tables, or inline comparisons
 | |
| #
 | |
| def makeRange(lst):
 | |
|     ret = []
 | |
|     pos = 0
 | |
|     while pos < len(lst):
 | |
|         try:            # index generates exception if not present
 | |
|             s = lst[pos:].index(1)      # look for start of next range
 | |
|         except:
 | |
|             break                       # if no more, finished
 | |
|         pos += s                        # pointer to start of possible range
 | |
|         try:
 | |
|             e = lst[pos:].index(0)      # look for end of range
 | |
|             e += pos
 | |
|         except:                         # if no end, set to end of list
 | |
|             e = len(lst)
 | |
|         ret.append((pos, e-1))          # append range tuple to list
 | |
|         pos = e + 1                     # ready to check for next range
 | |
|     return ret
 | |
| 
 | |
| # minTableSize gives the minimum number of ranges which must be present
 | |
| # before a 256-byte lookup table is produced.  If there are less than this
 | |
| # number, a macro with inline comparisons is generated
 | |
| minTableSize = 6
 | |
| 
 | |
| # dictionary of functions, key=name, element contains char-map and range-list
 | |
| Functs = {}
 | |
| 
 | |
| state = 0
 | |
| 
 | |
| try:
 | |
|     defines = open("codegen/ranges.def", "r")
 | |
| except:
 | |
|     print("Missing codegen/ranges.def, aborting ...")
 | |
|     sys.exit(1)
 | |
| 
 | |
| #
 | |
| # The lines in the .def file have three types:-
 | |
| #   name:   Defines a new function block
 | |
| #   ur:     Defines individual or ranges of unicode values
 | |
| #   end:    Indicates the end of the function block
 | |
| #
 | |
| # These lines are processed below.
 | |
| #
 | |
| for line in defines.readlines():
 | |
|     # ignore blank lines, or lines beginning with '#'
 | |
|     if line[0] == '#':
 | |
|         continue
 | |
|     line = line.strip()
 | |
|     if line == '':
 | |
|         continue
 | |
|     # split line into space-separated fields, then split on type
 | |
|     try:
 | |
|         fields = line.split(' ')
 | |
|         #
 | |
|         # name line:
 | |
|         #   validate any previous function block already ended
 | |
|         #   validate this function not already defined
 | |
|         #   initialize an entry in the function dicitonary
 | |
|         #       including a mask table with no values yet defined
 | |
|         #
 | |
|         if fields[0] == 'name':
 | |
|             name = fields[1]
 | |
|             if state != 0:
 | |
|                 print("'name' %s found before previous name" \
 | |
|                       "completed" % (fields[1]))
 | |
|                 continue
 | |
|             state = 1
 | |
|             if name in Functs:
 | |
|                 print("name '%s' already present - may give" \
 | |
|                       " wrong results" % (name))
 | |
|             else:
 | |
|                 # dict entry with two list elements (chdata, rangedata)
 | |
|                 Functs[name] = [ [], [] ]
 | |
|                 for v in range(256):
 | |
|                     Functs[name][0].append(0)
 | |
|         #
 | |
|         # end line:
 | |
|         #   validate there was a preceding function name line
 | |
|         #   set state to show no current function active
 | |
|         #
 | |
|         elif fields[0] == 'end':
 | |
|             if state == 0:
 | |
|                 print("'end' found outside of function block")
 | |
|                 continue
 | |
|             state = 0
 | |
| 
 | |
|         #
 | |
|         # ur line:
 | |
|         #   validate function has been defined
 | |
|         #   process remaining fields on the line, which may be either
 | |
|         #       individual unicode values or ranges of values
 | |
|         #
 | |
|         elif fields[0] == 'ur':
 | |
|             if state != 1:
 | |
|                 raise Exception("'ur' found outside of 'name' block")
 | |
|             for el in fields[1:]:
 | |
|                 pos = el.find('..')
 | |
|                 # pos <=0 means not a range, so must be individual value
 | |
|                 if pos <= 0:
 | |
|                     # cheap handling of hex or decimal values
 | |
|                     if el[0:2] == '0x':
 | |
|                         value = int(el[2:],16)
 | |
|                     elif el[0] == "'":
 | |
|                         value = ord(el[1])
 | |
|                     else:
 | |
|                         value = int(el)
 | |
|                     if ((value < 0) | (value > 0x1fffff)):
 | |
|                         raise Exception('Illegal value (%s) in ch for'\
 | |
|                                 ' name %s' % (el,name))
 | |
|                     # for ur we have only ranges (makes things simpler),
 | |
|                     # so convert val to range
 | |
|                     currange = (value, value)
 | |
|                 # pos > 0 means this is a range, so isolate/validate
 | |
|                 # the interval
 | |
|                 else:
 | |
|                     # split the range into it's first-val, last-val
 | |
|                     (first, last) = el.split("..")
 | |
|                     # convert values from text into binary
 | |
|                     if first[0:2] == '0x':
 | |
|                         start = int(first[2:],16)
 | |
|                     elif first[0] == "'":
 | |
|                         start = ord(first[1])
 | |
|                     else:
 | |
|                         start = int(first)
 | |
|                     if last[0:2] == '0x':
 | |
|                         end = int(last[2:],16)
 | |
|                     elif last[0] == "'":
 | |
|                         end = ord(last[1])
 | |
|                     else:
 | |
|                         end = int(last)
 | |
|                     if (start < 0) | (end > 0x1fffff) | (start > end):
 | |
|                         raise Exception("Invalid range '%s'" % el)
 | |
|                     currange = (start, end)
 | |
|                 # common path - 'currange' has the range, now take care of it
 | |
|                 # We split on single-byte values vs. multibyte
 | |
|                 if currange[1] < 0x100: # single-byte
 | |
|                     for ch in range(currange[0],currange[1]+1):
 | |
|                         # validate that value not previously defined
 | |
|                         if Functs[name][0][ch]:
 | |
|                             msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
 | |
|                             raise Exception(msg)
 | |
|                         Functs[name][0][ch] = 1
 | |
|                 else:                   # multi-byte
 | |
|                     if currange in Functs[name][1]:
 | |
|                         raise Exception("range already defined in" \
 | |
|                                 " function")
 | |
|                     else:
 | |
|                         Functs[name][1].append(currange)
 | |
| 
 | |
|     except:
 | |
|         print("Failed to process line: %s" % (line))
 | |
|         raise
 | |
| 
 | |
| try:
 | |
|     output = open("codegen/ranges.inc", "w")
 | |
| except:
 | |
|     print("Failed to open codegen/ranges.inc")
 | |
|     sys.exit(1)
 | |
| 
 | |
| #
 | |
| # Now output the generated data.
 | |
| #
 | |
| 
 | |
| fkeys = sorted(Functs.keys())
 | |
| 
 | |
| for f in fkeys:
 | |
| 
 | |
| # First we convert the specified single-byte values into a group of ranges.
 | |
|     if max(Functs[f][0]) > 0:   # only check if at least one entry
 | |
|         rangeTable = makeRange(Functs[f][0])
 | |
|         numRanges = len(rangeTable)
 | |
|         if numRanges >= minTableSize:   # table is worthwhile
 | |
|             # write the constant data to the code file
 | |
|             output.write("const unsigned char %s_tab[256] = {\n" % f)
 | |
|             pline = "   "
 | |
|             for n in range(255):
 | |
|                 pline += " 0x%02x," % Functs[f][0][n]
 | |
|                 if len(pline) > 72:
 | |
|                     output.write(pline + "\n")
 | |
|                     pline = "   "
 | |
|             output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
 | |
| 
 | |
| #
 | |
| # Next we do the unicode ranges
 | |
| #
 | |
| 
 | |
| for f in fkeys:
 | |
|     if len(Functs[f][1]) > 0:   # only generate if unicode ranges present
 | |
|         rangeTable = Functs[f][1]
 | |
|         rangeTable.sort()       # ascending tuple sequence
 | |
|         group = rangetab.gen_range_tables(output, f, '_srng', '_lrng',
 | |
|                                           rangeTable)
 | |
| 
 | |
|         output.write("const xmlChRangeGroup %sGroup =\n\t%s;\n\n" %
 | |
|                      (f, group))
 | |
| 
 | |
| output.close()
 | |
| 
 |