#!/usr/bin/python -u
#
# tries to parse the output of gtk-doc declaration files and make
# an XML reusable description from them
#
# TODO: try to extracts comments from the DocBook output of
import sys
import string
macros = {}
variables = {}
structs = {}
typedefs = {}
enums = {}
functions = {}
user_functions = {}
ret_types = {}
types = {}
sections = []
files = {}
identifiers_file = {}
identifiers_type = {}
##################################################################
#
# Parsing: libxml-decl.txt
#
##################################################################
def mormalizeTypeSpaces(raw, function):
global types
tokens = string.split(raw)
type = ''
for token in tokens:
if type != '':
type = type + ' ' + token
else:
type = token
if types.has_key(type):
types[type].append(function)
else:
types[type] = [function]
return type
def removeComments(raw):
while string.find(raw, '/*') > 0:
e = string.find(raw, '/*')
tmp = raw[0:e]
raw = raw[e:]
e = string.find(raw, '*/')
if e > 0:
raw = tmp + raw[e + 2:]
else:
raw = tmp
return raw
def extractArgs(raw, function):
raw = removeComments(raw)
list = string.split(raw, ",")
ret = []
for arg in list:
i = len(arg)
if i == 0:
continue
i = i - 1
c = arg[i]
while string.find(string.letters, c) >= 0 or \
string.find(string.digits, c) >= 0:
i = i - 1
if i < 0:
break
c = arg[i]
name = arg[i+1:]
while string.find(string.whitespace, c) >= 0:
i = i - 1
if i < 0:
break
c = arg[i]
type = mormalizeTypeSpaces(arg[0:i+1], function)
# print "list: %s -> %s, %s" % (list, type, name)
ret.append((type, name))
return ret
def extractTypes(raw, function):
global ret_types
tokens = string.split(raw)
type = ''
for token in tokens:
if type != '':
type = type + ' ' + token
else:
type = token
if ret_types.has_key(type):
ret_types[type].append(function)
else:
ret_types[type] = [function]
return type
def parseMacro():
global input
global macros
global variables
var = 1
line = input.readline()[:-1]
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
elif string.find(line, "#define") >= 0:
var = 0
line = input.readline()[:-1]
if var == 1:
variables[name] = ''
identifiers_type[name] = "variable"
else:
macros[name] = ''
identifiers_type[name] = "macro"
def parseStruct():
global input
global structs
line = input.readline()[:-1]
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
line = input.readline()[:-1]
structs[name] = ''
identifiers_type[name] = "struct"
def parseTypedef():
global input
global typedefs
line = input.readline()[:-1]
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
line = input.readline()[:-1]
typedefs[name] = ''
identifiers_type[name] = "typedef"
def parseEnum():
global input
global enums
line = input.readline()[:-1]
consts = []
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
elif string.find(line, 'enum') >= 0:
pass
elif string.find(line, '{') >= 0:
pass
elif string.find(line, '}') >= 0:
pass
elif string.find(line, ';') >= 0:
pass
else:
comment = string.find(line, '/*')
if comment >= 0:
line = line[0:comment]
decls = string.split(line, ",")
for decl in decls:
val = string.split(decl, "=")[0]
tokens = string.split(val)
if len(tokens) >= 1:
token = tokens[0]
if string.find(string.letters, token[0]) >= 0:
consts.append(token)
identifiers_type[token] = "const"
line = input.readline()[:-1]
enums[name] = [consts, '']
identifiers_type[name] = "enum"
def parseStaticFunction():
global input
global user_functions
line = input.readline()[:-1]
type = None
signature = None
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
elif line[0:9] == "" and line[-10:] == "":
type = extractTypes(line[9:-10], name)
else:
signature = line
line = input.readline()[:-1]
args = extractArgs(signature, name)
user_functions[name] = [type , args, '']
identifiers_type[name] = "functype"
def parseFunction():
global input
global functions
line = input.readline()[:-1]
type = None
signature = None
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
elif line[0:9] == "" and line[-10:] == "":
type = extractTypes(line[9:-10], name)
else:
signature = line
line = input.readline()[:-1]
args = extractArgs(signature, name)
functions[name] = [type , args, '']
identifiers_type[name] = "function"
print "Parsing: libxml-decl.txt"
input = open('libxml-decl.txt')
while 1:
line = input.readline()
if not line:
break
line = line[:-1]
if line == "":
parseMacro()
elif line == "":
parseEnum()
elif line == "":
parseFunction()
elif line == "":
parseStruct()
elif line == "":
parseTypedef()
elif line == "":
parseStaticFunction()
elif len(line) >= 1 and line[0] == "<":
print "unhandled %s" % (line)
print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % (
len(macros.keys()), len(structs.keys()), len(typedefs.keys()),
len(enums))
c = 0
for enum in enums.keys():
consts = enums[enum][0]
c = c + len(consts)
print " %d variables, %d constants, %d functions and %d functypes" % (
len(variables.keys()), c, len(functions.keys()),
len(user_functions.keys()))
print "The functions manipulates %d different types" % (len(types.keys()))
print "The functions returns %d different types" % (len(ret_types.keys()))
##################################################################
#
# Parsing: libxml-decl-list.txt
#
##################################################################
def parseSection():
global input
global sections
global files
global identifiers_file
tokens = []
line = input.readline()[:-1]
while line != "":
if line[0:6] == "" and line[-7:] == "":
name = line[6:-7]
elif len(line) > 0:
tokens.append(line)
line = input.readline()[:-1]
sections.append(name)
files[name] = tokens
for token in tokens:
identifiers_file[token] = name
#
# Small transitivity for enum values
#
if enums.has_key(token):
for const in enums[token][0]:
identifiers_file[const] = name
print "Parsing: libxml-decl-list.txt"
input = open('libxml-decl-list.txt')
while 1:
line = input.readline()
if not line:
break
line = line[:-1]
if line == "":
parseSection()
elif len(line) >= 1 and line[0] == "<":
print "unhandled %s" % (line)
print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys()))
##################################################################
#
# Parsing: xml/*.xml
# To enrich the existing info with extracted comments
#
##################################################################
nbcomments = 0
def insertComment(name, title, value):
global nbcomments
if functions.has_key(name):
functions[name][2] = value
elif typedefs.has_key(name):
typedefs[name] = value
elif macros.has_key(name):
macros[name] = value
elif variables.has_key(name):
variables[name] = value
elif structs.has_key(name):
structs[name] = value
elif enums.has_key(name):
enums[name][1] = value
elif user_functions.has_key(name):
user_functions[name] = value
else:
print "lost comment %s: %s" % (name, value)
return
nbcomments = nbcomments + 1
import os
import xmllib
try:
import sgmlop
except ImportError:
sgmlop = None # accelerator not available
debug = 0
if sgmlop:
class FastParser:
"""sgmlop based XML parser. this is typically 15x faster
than SlowParser..."""
def __init__(self, target):
# setup callbacks
self.finish_starttag = target.start
self.finish_endtag = target.end
self.handle_data = target.data
# activate parser
self.parser = sgmlop.XMLParser()
self.parser.register(self)
self.feed = self.parser.feed
self.entity = {
"amp": "&", "gt": ">", "lt": "<",
"apos": "'", "quot": '"'
}
def close(self):
try:
self.parser.close()
finally:
self.parser = self.feed = None # nuke circular reference
def handle_entityref(self, entity):
# entity
try:
self.handle_data(self.entity[entity])
except KeyError:
self.handle_data("&%s;" % entity)
else:
FastParser = None
class SlowParser(xmllib.XMLParser):
"""slow but safe standard parser, based on the XML parser in
Python's standard library."""
def __init__(self, target):
self.unknown_starttag = target.start
self.handle_data = target.data
self.unknown_endtag = target.end
xmllib.XMLParser.__init__(self)
def getparser(target = None):
# get the fastest available parser, and attach it to an
# unmarshalling object. return both objects.
if target == None:
target = docParser()
if FastParser:
return FastParser(target), target
return SlowParser(target), target
class docParser:
def __init__(self):
self._methodname = None
self._data = []
self.id = None
self.title = None
self.descr = None
self.string = None
def close(self):
if debug:
print "close"
def getmethodname(self):
return self._methodname
def data(self, text):
if debug:
print "data %s" % text
self._data.append(text)
def start(self, tag, attrs):
if debug:
print "start %s, %s" % (tag, attrs)
if tag == 'refsect2':
self.id = None
self.title = None
self.descr = None
self.string = None
elif tag == 'para':
self._data = []
elif tag == 'title':
self._data = []
elif tag == 'anchor' and self.id == None:
if attrs.has_key('id'):
self.id = attrs['id']
self.id = string.replace(self.id, '-CAPS', '')
self.id = string.replace(self.id, '-', '_')
def end(self, tag):
if debug:
print "end %s" % tag
if tag == 'refsect2':
insertComment(self.id, self.title, self.string)
elif tag == 'para':
if self.string == None:
str = ''
for c in self._data:
str = str + c
str = string.replace(str, '\n', ' ')
str = string.replace(str, '\r', ' ')
str = string.replace(str, ' ', ' ')
str = string.replace(str, ' ', ' ')
str = string.replace(str, ' ', ' ')
while len(str) >= 1 and str[0] == ' ':
str=str[1:]
self.string = str
self._data = []
elif tag == 'title':
str = ''
for c in self._data:
str = str + c
str = string.replace(str, '\n', ' ')
str = string.replace(str, '\r', ' ')
str = string.replace(str, ' ', ' ')
str = string.replace(str, ' ', ' ')
str = string.replace(str, ' ', ' ')
while len(str) >= 1 and str[0] == ' ':
str=str[1:]
self.title = str
xmlfiles = 0
filenames = os.listdir("xml")
for filename in filenames:
try:
f = open("xml/" + filename, 'r')
except IOError, msg:
print file, ":", msg
continue
data = f.read()
(parser, target) = getparser()
parser.feed(data)
parser.close()
xmlfiles = xmlfiles + 1
print "Parsed: %d XML files collexting %d comments" % (xmlfiles, nbcomments)
##################################################################
#
# Saving: libxml2-api.xml
#
##################################################################
def escape(raw):
raw = string.replace(raw, '<', '<')
raw = string.replace(raw, '>', '>')
return raw
print "Saving XML description libxml2-api.xml"
output = open("libxml2-api.xml", "w")
output.write("\n")
output.write(" \n")
for file in files.keys():
output.write(" \n" % file)
for symbol in files[file]:
output.write(" \n" % (symbol))
output.write(" \n")
output.write(" \n")
output.write(" \n")
symbols=macros.keys()
for i in structs.keys(): symbols.append(i)
for i in variables.keys(): variables.append(i)
for i in typedefs.keys(): symbols.append(i)
for i in enums.keys():
symbols.append(i)
for j in enums[i][0]:
symbols.append(j)
for i in functions.keys(): symbols.append(i)
for i in user_functions.keys(): symbols.append(i)
symbols.sort()
prev = None
for i in symbols:
if i == prev:
# print "Symbol %s redefined" % (i)
continue
else:
prev = i
if identifiers_type.has_key(i):
type = identifiers_type[i]
if identifiers_file.has_key(i):
file = identifiers_file[i]
else:
file = None
output.write(" <%s name='%s'" % (type, i))
if file != None:
output.write(" file='%s'" % (file))
if type == "function":
output.write(">\n");
(ret, args, doc) = functions[i]
if doc != None and doc != '':
output.write(" %s\n" % (escape(doc)))
output.write(" \n" % (ret))
for arg in args:
output.write(" \n" % (
arg[1], arg[0]))
output.write(" %s>\n" % (type));
elif type == 'macro':
if macros[i] != None and macros[i] != '':
output.write(" info='%s'/>\n" % (escape(macros[i])))
else:
output.write("/>\n");
elif type == 'struct':
if structs[i] != None and structs[i] != '':
output.write(" info='%s'/>\n" % (escape(structs[i])))
else:
output.write("/>\n");
elif type == 'functype':
if user_functions[i] != None and user_functions[i] != '':
output.write(" info='%s'/>\n" % (escape(user_functions[i])))
else:
output.write("/>\n");
elif type == 'variable':
if variables[i] != None and variables[i] != '':
output.write(" info='%s'/>\n" % (escape(variables[i])))
else:
output.write("/>\n");
elif type == 'typedef':
if typedefs[i] != None and typedefs[i] != '':
output.write(" info='%s'/>\n" % (escape(typedefs[i])))
else:
output.write("/>\n");
else:
output.write("/>\n");
else:
print "Symbol %s not found in identifiers list" % (i)
output.write(" \n")
output.write("\n")
print "generated XML for %d symbols" % (len(symbols))