diff --git a/ChangeLog b/ChangeLog index acef54c3..e0974f2d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Tue Mar 5 16:33:42 CET 2002 Daniel Veillard + + * parser.c: make sure SAX endDocument is always called as + this could result in a Python memory leak otherwise (it's + used to decrement ref-counting) + * python/generator.py python/libxml.c python/libxml.py + python/libxml2-python-api.xml python/libxml2class.txt + python/tests/error.py python/tests/xpath.py: implemented + the suggestions made by Gary Benson and extended the tests + to match it. + Tue Mar 5 10:35:24 CET 2002 Daniel Veillard * python/generator.py: applied patch fixing #73450 diff --git a/parser.c b/parser.c index 3144c3e9..b8a67f19 100644 --- a/parser.c +++ b/parser.c @@ -7640,8 +7640,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { /* * SAX: end of the document processing. */ - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && - (!ctxt->disableSAX)) + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); if (! ctxt->wellFormed) { @@ -7757,8 +7756,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { /* * SAX: end of the document processing. */ - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && - (!ctxt->disableSAX)) + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); if (! ctxt->wellFormed) return(-1); @@ -8245,8 +8243,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { xmlGenericError(xmlGenericErrorContext, "PP: entering EOF\n"); #endif - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && - (!ctxt->disableSAX)) + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); goto done; } @@ -8269,8 +8266,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { xmlGenericError(xmlGenericErrorContext, "PP: entering EOF\n"); #endif - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && - (!ctxt->disableSAX)) + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); goto done; } @@ -8289,8 +8285,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { xmlGenericError(xmlGenericErrorContext, "PP: entering EOF\n"); #endif - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && - (!ctxt->disableSAX)) + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); goto done; } @@ -8767,8 +8762,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, ctxt->disableSAX = 1; } if (ctxt->instate != XML_PARSER_EOF) { - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) && - (!ctxt->disableSAX)) + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); } ctxt->instate = XML_PARSER_EOF; diff --git a/python/generator.py b/python/generator.py index 92403fba..dbfc5f7c 100755 --- a/python/generator.py +++ b/python/generator.py @@ -583,6 +583,9 @@ def nameFixup(name, classe, type, file): elif name[0:11] == "xmlXPathGet" and file == "python_accessor": func = name[11:] func = string.lower(func[0:1]) + func[1:] + elif name[0:11] == "xmlXPathSet" and file == "python_accessor": + func = name[8:] + func = string.lower(func[0:1]) + func[1:] elif name[0:11] == "xmlACatalog": func = name[11:] func = string.lower(func[0:1]) + func[1:] @@ -612,6 +615,8 @@ def nameFixup(name, classe, type, file): func = "URI" + func[3:] elif func[0:4] == "uTF8": func = "UTF8" + func[4:] + elif func[0:3] == 'sAX': + func = "SAX" + func[3:] return func @@ -712,7 +717,8 @@ def buildWrappers(): func = nameFixup(name, classe, type, file) info = (0, func, name, ret, args, file) function_classes[classe].append(info) - elif name[0:3] == "xml" and len(args) >= 2 and args[1][1] == type: + elif name[0:3] == "xml" and len(args) >= 2 and args[1][1] == type \ + and file != "python_accessor": found = 1 func = nameFixup(name, classe, type, file) info = (1, func, name, ret, args, file) @@ -722,7 +728,8 @@ def buildWrappers(): func = nameFixup(name, classe, type, file) info = (0, func, name, ret, args, file) function_classes[classe].append(info) - elif name[0:4] == "html" and len(args) >= 2 and args[1][1] == type: + elif name[0:4] == "html" and len(args) >= 2 and args[1][1] == type \ + and file != "python_accessor": found = 1 func = nameFixup(name, classe, type, file) info = (1, func, name, ret, args, file) @@ -789,7 +796,25 @@ def buildWrappers(): classes.write(")\n"); if ret[0] != "void": if classes_type.has_key(ret[0]): - classes.write(" if ret == None: return None\n"); + # + # Raise an exception + # + if string.find(name, "URI") >= 0: + classes.write( + " if ret == None:raise uriError('%s() failed')\n" + % (name)) + elif string.find(name, "XPath") >= 0: + classes.write( + " if ret == None:raise xpathError('%s() failed')\n" + % (name)) + elif string.find(name, "Parse") >= 0: + classes.write( + " if ret == None:raise parserError('%s() failed')\n" + % (name)) + else: + classes.write( + " if ret == None:raise treeError('%s() failed')\n" + % (name)) classes.write(" return "); classes.write(classes_type[ret[0]][1] % ("ret")); classes.write("\n"); @@ -884,12 +909,48 @@ def buildWrappers(): classes.write(")\n"); if ret[0] != "void": if classes_type.has_key(ret[0]): - classes.write(" if ret == None: return None\n"); + # + # Raise an exception + # + if string.find(name, "URI") >= 0: + classes.write( + " if ret == None:raise uriError('%s() failed')\n" + % (name)) + elif string.find(name, "XPath") >= 0: + classes.write( + " if ret == None:raise xpathError('%s() failed')\n" + % (name)) + elif string.find(name, "Parse") >= 0: + classes.write( + " if ret == None:raise parserError('%s() failed')\n" + % (name)) + else: + classes.write( + " if ret == None:raise treeError('%s() failed')\n" + % (name)) classes.write(" return "); classes.write(classes_type[ret[0]][1] % ("ret")); classes.write("\n"); elif converter_type.has_key(ret[0]): - classes.write(" if ret == None: return None\n"); + # + # Raise an exception + # + if string.find(name, "URI") >= 0: + classes.write( + " if ret == None:raise uriError('%s() failed')\n" + % (name)) + elif string.find(name, "XPath") >= 0: + classes.write( + " if ret == None:raise xpathError('%s() failed')\n" + % (name)) + elif string.find(name, "Parse") >= 0: + classes.write( + " if ret == None:raise parserError('%s() failed')\n" + % (name)) + else: + classes.write( + " if ret == None:raise treeError('%s() failed')\n" + % (name)) classes.write(" return "); classes.write(converter_type[ret[0]] % ("ret")); classes.write("\n"); diff --git a/python/libxml.c b/python/libxml.c index f79af7d6..8c7d295e 100644 --- a/python/libxml.c +++ b/python/libxml.c @@ -324,7 +324,7 @@ pythonProcessingInstruction(void *user_data, if (PyObject_HasAttrString(handler, "processingInstruction")) { result = PyObject_CallMethod(handler, - "ignorableWhitespace", "ss", target, data); + "processingInstruction", "ss", target, data); Py_XDECREF(result); } } @@ -663,7 +663,7 @@ libxml_xmlCreatePushParser(PyObject *self, PyObject *args) { &chunk, &size, &URI)) return(NULL); -#ifdef DEBUG_ERROR +#ifdef DEBUG printf("libxml_xmlCreatePushParser(%p, %s, %d, %s) called\n", pyobj_SAX, chunk, size, URI); #endif @@ -691,7 +691,7 @@ libxml_htmlCreatePushParser(PyObject *self, PyObject *args) { &chunk, &size, &URI)) return(NULL); -#ifdef DEBUG_ERROR +#ifdef DEBUG printf("libxml_htmlCreatePushParser(%p, %s, %d, %s) called\n", pyobj_SAX, chunk, size, URI); #endif @@ -706,6 +706,60 @@ libxml_htmlCreatePushParser(PyObject *self, PyObject *args) { return(pyret); } +PyObject * +libxml_xmlSAXParseFile(PyObject *self, PyObject *args) { + int recover; + xmlChar *URI; + PyObject *pyobj_SAX = NULL; + xmlSAXHandlerPtr SAX = NULL; + + if (!PyArg_ParseTuple(args, "Osi:xmlSAXParseFile", &pyobj_SAX, + &URI, &recover)) + return(NULL); + +#ifdef DEBUG + printf("libxml_xmlSAXParseFile(%p, %s, %d) called\n", + pyobj_SAX, URI, recover); +#endif + if (pyobj_SAX == Py_None) { + Py_INCREF(Py_None); + return(Py_None); + } + SAX = &pythonSaxHandler; + Py_INCREF(pyobj_SAX); + /* The reference is released in pythonEndDocument() */ + xmlSAXParseFileWithData(SAX, URI, recover, pyobj_SAX); + Py_INCREF(Py_None); + return(Py_None); +} + +PyObject * +libxml_htmlSAXParseFile(PyObject *self, PyObject *args) { + xmlChar *URI; + xmlChar *encoding; + PyObject *pyobj_SAX = NULL; + xmlSAXHandlerPtr SAX = NULL; + + if (!PyArg_ParseTuple(args, "Osz:htmlSAXParseFile", &pyobj_SAX, + &URI, &encoding)) + return(NULL); + +#ifdef DEBUG + printf("libxml_htmlSAXParseFile(%p, %s, %s) called\n", + pyobj_SAX, URI, encoding); +#endif + if (pyobj_SAX == Py_None) { + Py_INCREF(Py_None); + return(Py_None); + } + SAX = &pythonSaxHandler; + Py_INCREF(pyobj_SAX); + /* The reference is released in pythonEndDocument() */ + htmlSAXParseFile(URI, encoding, SAX, pyobj_SAX); + Py_INCREF(Py_None); + return(Py_None); +} + /************************************************************************ * * * Error message callback * diff --git a/python/libxml.py b/python/libxml.py index da736bef..18f68409 100644 --- a/python/libxml.py +++ b/python/libxml.py @@ -1,5 +1,133 @@ import libxml2mod +# +# Errors raised by the wrappers when some tree handling failed. +# +class treeError: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + +class parserError: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + +class uriError: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + +class xpathError: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + +# +# Example of a class to handle SAX events +# +class SAXCallback: + """Base class for SAX handlers""" + def startDocument(self): + """called at the start of the document""" + pass + + def endDocument(self): + """called at the end of the document""" + pass + + def startElement(self, tag, attrs): + """called at the start of every element, tag is the name of + the element, attrs is a dictionary of the element's attributes""" + pass + + def endElement(self, tag): + """called at the start of every element, tag is the name of + the element""" + pass + + def characters(self, data): + """called when character data have been read, data is the string + containing the data, multiple consecutive characters() callback + are possible.""" + pass + + def cdataBlock(self, data): + """called when CDATA section have been read, data is the string + containing the data, multiple consecutive cdataBlock() callback + are possible.""" + pass + + def reference(self, name): + """called when an entity reference has been found""" + pass + + def ignorableWhitespace(self, data): + """called when potentially ignorable white spaces have been found""" + pass + + def processingInstruction(self, target, data): + """called when a PI has been found, target contains the PI name and + data is the associated data in the PI""" + pass + + def comment(self, content): + """called when a comment has been found, content contains the comment""" + pass + + def externalSubset(self, name, externalID, systemID): + """called when a DOCTYPE declaration has been found, name is the + DTD name and externalID, systemID are the DTD public and system + identifier for that DTd if available""" + pass + + def internalSubset(self, name, externalID, systemID): + """called when a DOCTYPE declaration has been found, name is the + DTD name and externalID, systemID are the DTD public and system + identifier for that DTD if available""" + pass + + def entityDecl(self, name, type, externalID, systemID, content): + """called when an ENTITY declaration has been found, name is the + entity name and externalID, systemID are the entity public and + system identifier for that entity if available, type indicates + the entity type, and content reports it's string content""" + pass + + def notationDecl(self, name, externalID, systemID): + """called when an NOTATION declaration has been found, name is the + notation name and externalID, systemID are the notation public and + system identifier for that notation if available""" + pass + + def attributeDecl(self, elem, name, type, defi, defaultValue, nameList): + """called when an ATTRIBUTE definition has been found""" + pass + + def elementDecl(self, name, type, content): + """called when an ELEMENT definition has been found""" + pass + + def entityDecl(self, name, publicId, systemID, notationName): + """called when an unparsed ENTITY declaration has been found, + name is the entity name and publicId,, systemID are the entity + public and system identifier for that entity if available, + and notationName indicate the associated NOTATION""" + pass + + def warning(self, msg): + print msg + + def error(self, msg): + raise parserError(msg) + + def fatalError(self, msg): + raise parserError(msg) + # # This class is the ancestor of all the Node classes. It provides # the basic functionalities shared by all nodes (and handle diff --git a/python/libxml2-python-api.xml b/python/libxml2-python-api.xml index 5026ee14..938d968b 100644 --- a/python/libxml2-python-api.xml +++ b/python/libxml2-python-api.xml @@ -36,6 +36,20 @@ + + Interface to parse an XML file or resource pointed by an URI to build an event flow to the SAX object + + + + + + + Interface to parse an HTML file or resource pointed by an URI to build an event flow to the SAX object + + + + + Get the document tree from a parser context. @@ -102,7 +116,7 @@ - + Get the xpathContext from an xpathParserContext @@ -118,6 +132,18 @@ + + Set the doc of an xpathContext + + + + + + Set the current node of an xpathContext + + + + Get the current node from an xpathContext diff --git a/python/libxml2class.txt b/python/libxml2class.txt index 16cab8e2..4b08b79c 100644 --- a/python/libxml2class.txt +++ b/python/libxml2class.txt @@ -117,10 +117,12 @@ nodePop() nodePush() # functions from module python +SAXParseFile() createPushParser() debugMemory() dumpMemory() htmlCreatePushParser() +htmlSAXParseFile() newNode() registerErrorHandler() @@ -542,6 +544,8 @@ Class xpathContext() contextSize() function() functionURI() + setContextDoc() + setContextNode() # functions from module python registerXPathFunction() diff --git a/python/tests/error.py b/python/tests/error.py index 93379450..cc771de1 100755 --- a/python/tests/error.py +++ b/python/tests/error.py @@ -16,8 +16,17 @@ def callback(ctx, str): err = err + "%s %s" % (ctx, str) +got_exc = 0 libxml2.registerErrorHandler(callback, "-->") -doc = libxml2.parseFile("missing.xml") +try: + doc = libxml2.parseFile("missing.xml") +except libxml2.parserError: + got_exc = 1 + +if got_exc == 0: + print "Failed to get a parser exception" + sys.exit(1) + if err != expect: print "error" print "received %s" %(err) @@ -26,7 +35,10 @@ if err != expect: i = 10000 while i > 0: - doc = libxml2.parseFile("missing.xml") + try: + doc = libxml2.parseFile("missing.xml") + except libxml2.parserError: + got_exc = 1 err = "" i = i - 1 diff --git a/python/tests/xpath.py b/python/tests/xpath.py index 73ab735e..2e036e1f 100755 --- a/python/tests/xpath.py +++ b/python/tests/xpath.py @@ -22,6 +22,14 @@ if len(res) != 2: if res[0].name != "doc" or res[1].name != "foo": print "xpath query: wrong node set value" sys.exit(1) +ctxt.setContextNode(res[0]) +res = ctxt.xpathEval("foo") +if len(res) != 1: + print "xpath query: wrong node set size" + sys.exit(1) +if res[0].name != "foo": + print "xpath query: wrong node set value" + sys.exit(1) doc.freeDoc() ctxt.xpathFreeContext() i = 1000