From c6cae7b690c1ded6f03a378eb59025522cc40105 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Fri, 11 Apr 2003 09:02:11 +0000 Subject: [PATCH] Added the Expand() and Next() operation to work on subtrees within the * xmlreader.c include/libxml/xmlreader.h: Added the Expand() and Next() operation to work on subtrees within the reader framework. * doc/libxml2-api.xml python/libxml2class.txt: resulting updates * python/tests/reader5.py: added an example for those new functions of the reader. Daniel --- ChangeLog | 9 ++++ doc/libxml2-api.xml | 34 +++++++++++-- include/libxml/xmlreader.h | 2 + python/libxml2class.txt | 4 ++ python/tests/reader5.py | 48 +++++++++++++++++ xmlreader.c | 102 +++++++++++++++++++++++++++++++++++++ 6 files changed, 196 insertions(+), 3 deletions(-) create mode 100755 python/tests/reader5.py diff --git a/ChangeLog b/ChangeLog index a9f69d5b..882daed8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Fri Apr 11 10:59:24 CEST 2003 Daniel Veillard + + * xmlreader.c include/libxml/xmlreader.h: Added the Expand() + and Next() operation to work on subtrees within the reader + framework. + * doc/libxml2-api.xml python/libxml2class.txt: resulting updates + * python/tests/reader5.py: added an example for those new + functions of the reader. + Thu Apr 10 23:38:13 CEST 2003 Daniel Veillard * HTMLtree.c: patch from Vasily Tchekalkin to fix #109865 diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml index 64cfa0b5..6ee16a60 100644 --- a/doc/libxml2-api.xml +++ b/doc/libxml2-api.xml @@ -883,7 +883,6 @@ - @@ -929,6 +928,7 @@ + @@ -1127,6 +1127,7 @@ + @@ -1150,6 +1151,7 @@ + @@ -1552,6 +1554,7 @@ + @@ -1701,6 +1704,7 @@ + @@ -3633,6 +3637,12 @@ actually an xmlCharEncoding'/> + + Create a parser context for an HTML in-memory document. + + + + Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports. @@ -4560,6 +4570,14 @@ actually an xmlCharEncoding'/> + + Builds the QName @prefix:@ncname in @memory if there is enough space and prefix is not NULL nor empty, otherwise allocate a new string. If prefix is NULL or empty it returns ncname. + + + + + + Computes he final URI of the reference done by checking that the given URI is valid, and building the final URI using the base URI. This is processed according to section 5.2 of the RFC 2396 5.2. Resolving Relative References to Absolute Form @@ -8076,10 +8094,10 @@ actually an xmlCharEncoding'/> - + parse an XML qualified name string [NS 5] QName ::= (Prefix ':')? LocalPart [NS 6] Prefix ::= NCName [NS 7] LocalPart ::= NCName - + @@ -8283,6 +8301,11 @@ actually an xmlCharEncoding'/> + + Reads the contents of the current node and the full subtree. It then makes the subtree availsble until the next xmlTextReaderRead() call + + + Provides the value of the attribute with the specified qualified name. @@ -8405,6 +8428,11 @@ actually an xmlCharEncoding'/> + + Skip to the node following the current one in document order while avoiding the subtree if any. + + + Get the node type of the current node Reference: http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html diff --git a/include/libxml/xmlreader.h b/include/libxml/xmlreader.h index e9475a77..f035258f 100644 --- a/include/libxml/xmlreader.h +++ b/include/libxml/xmlreader.h @@ -106,6 +106,8 @@ int xmlTextReaderGetParserProp (xmlTextReaderPtr reader, int prop); xmlNodePtr xmlTextReaderCurrentNode (xmlTextReaderPtr reader); xmlDocPtr xmlTextReaderCurrentDoc (xmlTextReaderPtr reader); +xmlNodePtr xmlTextReaderExpand (xmlTextReaderPtr reader); +int xmlTextReaderNext (xmlTextReaderPtr reader); /* * Error handling extensions diff --git a/python/libxml2class.txt b/python/libxml2class.txt index 92c6601b..950397d9 100644 --- a/python/libxml2class.txt +++ b/python/libxml2class.txt @@ -6,6 +6,7 @@ # functions from module HTMLparser +htmlCreateMemoryParserCtxt() htmlHandleOmittedElem() htmlIsScriptAttribute() htmlParseDoc() @@ -132,6 +133,7 @@ relaxNGNewMemParserCtxt() relaxNGNewParserCtxt() # functions from module tree +buildQName() compressMode() isXHTML() newComment() @@ -594,6 +596,7 @@ Class xmlTextReader(xmlTextReaderCore) CurrentDoc() CurrentNode() Depth() + Expand() GetAttribute() GetAttributeNo() GetAttributeNs() @@ -613,6 +616,7 @@ Class xmlTextReader(xmlTextReaderCore) MoveToNextAttribute() Name() NamespaceUri() + Next() NodeType() Normalization() Prefix() diff --git a/python/tests/reader5.py b/python/tests/reader5.py new file mode 100755 index 00000000..fbfe4a63 --- /dev/null +++ b/python/tests/reader5.py @@ -0,0 +1,48 @@ +#!/usr/bin/python -u +# +# this tests the Expand() API of the xmlTextReader interface +# this extract the Dragon bibliography entries from the XML specification +# +import libxml2 +import StringIO +import sys + +# Memory debug specific +libxml2.debugMemory(1) + +expect="""Aho, Alfred V., +Ravi Sethi, and Jeffrey D. Ullman. +Compilers: Principles, Techniques, and Tools. +Reading: Addison-Wesley, 1986, rpt. corr. 1988.""" + +f = open('../../test/valid/REC-xml-19980210.xml') +input = libxml2.inputBuffer(f) +reader = input.newTextReader("REC") +res="" +while reader.Read(): + while reader.Name() == 'bibl': + node = reader.Expand() # expand the subtree + if node.xpathEval("@id = 'Aho'"): # use XPath on it + res = res + node.serialize() + if reader.Next() != 1: # skip the subtree + break; + +if res != expect: + print "Error: didn't get the expected output" + print "got '%s'" % (res) + print "expected '%s'" % (expect) + + +# +# cleanup +# +del input +del reader + +# Memory debug specific +libxml2.cleanupParser() +if libxml2.debugMemory(1) == 0: + print "OK" +else: + print "Memory leak %d bytes" % (libxml2.debugMemory(1)) + libxml2.dumpMemory() diff --git a/xmlreader.c b/xmlreader.c index 64da1bbe..529ef56f 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -554,6 +554,56 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) { } +/** + * xmlTextReaderGetSuccessor: + * @cur: the current node + * + * Get the successor of a node if available. + * + * Returns the successor node or NULL + */ +static xmlNodePtr +xmlTextReaderGetSuccessor(xmlNodePtr cur) { + if (cur == NULL) return(NULL) ; /* ERROR */ + if (cur->next != NULL) return(cur->next) ; + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur->next != NULL) return(cur->next); + } while (cur != NULL); + return(cur); +} + +/** + * xmlTextReaderDoExpand: + * @reader: the xmlTextReaderPtr used + * + * Makes sure that the current node is fully read as well as all its + * descendant. It means the full DOM subtree must be available at the + * end of the call. + * + * Returns 1 if the node was expanded successfully, 0 if there is no more + * nodes to read, or -1 in case of error + */ +static int +xmlTextReaderDoExpand(xmlTextReaderPtr reader) { + int val; + + if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL)) + return(-1); + + do { + if (xmlTextReaderGetSuccessor(reader->node) != NULL) + return(1); + if (reader->mode == XML_TEXTREADER_MODE_EOF) + return(1); + val = xmlTextReaderPushData(reader); + if (val < 0) + return(-1); + } while(reader->mode != XML_TEXTREADER_MODE_EOF); + return(1); +} + /** * xmlTextReaderRead: * @reader: the xmlTextReaderPtr used @@ -804,6 +854,7 @@ node_found: #endif /* LIBXML_REGEXP_ENABLED */ return(1); node_end: + reader->mode = XML_TEXTREADER_DONE; return(0); } @@ -822,6 +873,57 @@ xmlTextReaderReadState(xmlTextReaderPtr reader) { return(reader->mode); } +/** + * xmlTextReaderExpand: + * @reader: the xmlTextReaderPtr used + * + * Reads the contents of the current node and the full subtree. It then makes + * the subtree availsble until the next xmlTextReaderRead() call + * + * Returns a node pointer valid until the next xmlTextReaderRead() call + * or NULL in case of error. + */ +xmlNodePtr +xmlTextReaderExpand(xmlTextReaderPtr reader) { + if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL)) + return(NULL); + if (xmlTextReaderDoExpand(reader) < 0) + return(NULL); + return(reader->node); +} + +/** + * xmlTextReaderNext: + * @reader: the xmlTextReaderPtr used + * + * Skip to the node following the current one in document order while + * avoiding the subtree if any. + * + * Returns 1 if the node was read successfully, 0 if there is no more + * nodes to read, or -1 in case of error + */ +int +xmlTextReaderNext(xmlTextReaderPtr reader) { + int ret; + xmlNodePtr cur; + + if (reader == NULL) + return(-1); + cur = reader->node; + if ((cur == NULL) || (cur->type != XML_ELEMENT_NODE)) + return(xmlTextReaderRead(reader)); + if (reader->state == XML_TEXTREADER_END) + return(xmlTextReaderRead(reader)); + if (cur->_private == (void *)xmlTextReaderIsEmpty) + return(xmlTextReaderRead(reader)); + do { + ret = xmlTextReaderRead(reader); + if (ret != 1) + return(ret); + } while (reader->node != cur); + return(xmlTextReaderRead(reader)); +} + /** * xmlTextReaderReadInnerXml: * @reader: the xmlTextReaderPtr used