From c6cae7b690c1ded6f03a378eb59025522cc40105 Mon Sep 17 00:00:00 2001
From: Daniel Veillard <veillard@src.gnome.org>
Date: Fri, 11 Apr 2003 09:02:11 +0000
Subject: [PATCH] Added the Expand() and Next() operation to work on subtrees
 within the

* xmlreader.c include/libxml/xmlreader.h: Added the Expand()
  and Next() operation to work on subtrees within the reader
  framework.
* doc/libxml2-api.xml python/libxml2class.txt: resulting updates
* python/tests/reader5.py: added an example for those new
  functions of the reader.
Daniel
---
 ChangeLog                  |   9 ++++
 doc/libxml2-api.xml        |  34 +++++++++++--
 include/libxml/xmlreader.h |   2 +
 python/libxml2class.txt    |   4 ++
 python/tests/reader5.py    |  48 +++++++++++++++++
 xmlreader.c                | 102 +++++++++++++++++++++++++++++++++++++
 6 files changed, 196 insertions(+), 3 deletions(-)
 create mode 100755 python/tests/reader5.py
diff --git a/ChangeLog b/ChangeLog
index a9f69d5b..882daed8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Fri Apr 11 10:59:24 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+	* xmlreader.c include/libxml/xmlreader.h: Added the Expand()
+	  and Next() operation to work on subtrees within the reader
+	  framework.
+	* doc/libxml2-api.xml python/libxml2class.txt: resulting updates
+	* python/tests/reader5.py: added an example for those new
+	  functions of the reader.
+
 Thu Apr 10 23:38:13 CEST 2003 Daniel Veillard <daniel@veillard.com>
 
 	* HTMLtree.c: patch from Vasily Tchekalkin to fix #109865
diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml
index 64cfa0b5..6ee16a60 100644
--- a/doc/libxml2-api.xml
+++ b/doc/libxml2-api.xml
@@ -883,7 +883,6 @@
      <exports symbol='xmlRemoveID'/>
      <exports symbol='xmlRemoveRef'/>
      <exports symbol='xmlSnprintfElementContent'/>
-     <exports symbol='xmlSplitQName2'/>
      <exports symbol='xmlSprintfElementContent'/>
      <exports symbol='xmlValidBuildContentModel'/>
      <exports symbol='xmlValidCtxt'/>
@@ -929,6 +928,7 @@
      <exports symbol='_htmlEntityDesc'/>
      <exports symbol='htmlAttrAllowed'/>
      <exports symbol='htmlAutoCloseTag'/>
+     <exports symbol='htmlCreateMemoryParserCtxt'/>
      <exports symbol='htmlCreatePushParserCtxt'/>
      <exports symbol='htmlDefaultSubelement'/>
      <exports symbol='htmlDocPtr'/>
@@ -1127,6 +1127,7 @@
      <exports symbol='xmlTextReaderCurrentNode'/>
      <exports symbol='xmlTextReaderDepth'/>
      <exports symbol='xmlTextReaderErrorFunc'/>
+     <exports symbol='xmlTextReaderExpand'/>
      <exports symbol='xmlTextReaderGetAttribute'/>
      <exports symbol='xmlTextReaderGetAttributeNo'/>
      <exports symbol='xmlTextReaderGetAttributeNs'/>
@@ -1150,6 +1151,7 @@
      <exports symbol='xmlTextReaderMoveToNextAttribute'/>
      <exports symbol='xmlTextReaderName'/>
      <exports symbol='xmlTextReaderNamespaceUri'/>
+     <exports symbol='xmlTextReaderNext'/>
      <exports symbol='xmlTextReaderNodeType'/>
      <exports symbol='xmlTextReaderNormalization'/>
      <exports symbol='xmlTextReaderPrefix'/>
@@ -1552,6 +1554,7 @@
      <exports symbol='xmlBufferWriteCHAR'/>
      <exports symbol='xmlBufferWriteChar'/>
      <exports symbol='xmlBufferWriteQuotedString'/>
+     <exports symbol='xmlBuildQName'/>
      <exports symbol='xmlChar'/>
      <exports symbol='xmlChildrenNode'/>
      <exports symbol='xmlCopyDoc'/>
@@ -1701,6 +1704,7 @@
      <exports symbol='xmlSetNsProp'/>
      <exports symbol='xmlSetProp'/>
      <exports symbol='xmlSetTreeDoc'/>
+     <exports symbol='xmlSplitQName2'/>
      <exports symbol='xmlStringGetNodeList'/>
      <exports symbol='xmlStringLenGetNodeList'/>
      <exports symbol='xmlTextConcat'/>
@@ -3633,6 +3637,12 @@ actually an xmlCharEncoding'/>
       <arg name='filename' type='const char *' info='the filename'/>
       <arg name='encoding' type='const char *' info='a free form C string describing the HTML document encoding, or NULL'/>
     </function>
+    <function name='htmlCreateMemoryParserCtxt' file='HTMLparser'>
+      <info>Create a parser context for an HTML in-memory document.</info>
+      <return type='htmlParserCtxtPtr' info='the new parser context or NULL'/>
+      <arg name='buffer' type='const char *' info='a pointer to a char array'/>
+      <arg name='size' type='int' info='the size of the array'/>
+    </function>
     <function name='htmlCreatePushParserCtxt' file='HTMLparser'>
       <info>Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.</info>
       <return type='htmlParserCtxtPtr' info='the new parser context or NULL'/>
@@ -4560,6 +4570,14 @@ actually an xmlCharEncoding'/>
       <arg name='buf' type='xmlBufferPtr' info='the XML buffer output'/>
       <arg name='string' type='const xmlChar *' info='the string to add'/>
     </function>
+    <function name='xmlBuildQName' file='tree'>
+      <info>Builds the QName @prefix:@ncname in @memory if there is enough space and prefix is not NULL nor empty, otherwise allocate a new string. If prefix is NULL or empty it returns ncname.</info>
+      <return type='xmlChar *' info='the new string which must be freed by the caller if different from @memory and @ncname or NULL in case of error'/>
+      <arg name='ncname' type='const xmlChar *' info='the Name'/>
+      <arg name='prefix' type='const xmlChar *' info='the prefix'/>
+      <arg name='memory' type='xmlChar *' info='preallocated memory'/>
+      <arg name='len' type='int' info='preallocated memory length'/>
+    </function>
     <function name='xmlBuildURI' file='uri'>
       <info>Computes he final URI of the reference done by checking that the given URI is valid, and building the final URI using the base URI. This is processed according to section 5.2 of the RFC 2396  5.2. Resolving Relative References to Absolute Form</info>
       <return type='xmlChar *' info='a new URI string (to be freed by the caller) or NULL in case of error.'/>
@@ -8076,10 +8094,10 @@ actually an xmlCharEncoding'/>
       <arg name='name' type='const xmlChar *' info='an XML parser context'/>
       <arg name='prefix' type='xmlChar **' info='a xmlChar **'/>
     </function>
-    <function name='xmlSplitQName2' file='valid'>
+    <function name='xmlSplitQName2' file='tree'>
       <info>parse an XML qualified name string  [NS 5] QName ::= (Prefix &apos;:&apos;)? LocalPart  [NS 6] Prefix ::= NCName  [NS 7] LocalPart ::= NCName</info>
       <return type='xmlChar *' info='NULL if not a QName, otherwise the local part, and prefix is updated to get the Prefix if any.'/>
-      <arg name='name' type='const xmlChar *' info='an XML parser context'/>
+      <arg name='name' type='const xmlChar *' info='the full QName'/>
       <arg name='prefix' type='xmlChar **' info='a xmlChar **'/>
     </function>
     <function name='xmlSprintfElementContent' file='valid'>
@@ -8283,6 +8301,11 @@ actually an xmlCharEncoding'/>
       <arg name='severity' type='xmlParserSeverities' info=''/>
       <arg name='locator' type='xmlTextReaderLocatorPtr' info=''/>
     </functype>
+    <function name='xmlTextReaderExpand' file='xmlreader'>
+      <info>Reads the contents of the current node and the full subtree. It then makes the subtree availsble until the next xmlTextReaderRead() call</info>
+      <return type='xmlNodePtr' info='a node pointer valid until the next xmlTextReaderRead() call or NULL in case of error.'/>
+      <arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
+    </function>
     <function name='xmlTextReaderGetAttribute' file='xmlreader'>
       <info>Provides the value of the attribute with the specified qualified name.</info>
       <return type='xmlChar *' info='a string containing the value of the specified attribute, or NULL in case of error. The string must be deallocated by the caller.'/>
@@ -8405,6 +8428,11 @@ actually an xmlCharEncoding'/>
       <return type='xmlChar *' info='the namespace URI or NULL if not available'/>
       <arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
     </function>
+    <function name='xmlTextReaderNext' file='xmlreader'>
+      <info>Skip to the node following the current one in document order while avoiding the subtree if any.</info>
+      <return type='int' info='1 if the node was read successfully, 0 if there is no more nodes to read, or -1 in case of error'/>
+      <arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
+    </function>
     <function name='xmlTextReaderNodeType' file='xmlreader'>
       <info>Get the node type of the current node Reference: http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html</info>
       <return type='int' info='the xmlNodeType of the current node or -1 in case of error'/>
diff --git a/include/libxml/xmlreader.h b/include/libxml/xmlreader.h
index e9475a77..f035258f 100644
--- a/include/libxml/xmlreader.h
+++ b/include/libxml/xmlreader.h
@@ -106,6 +106,8 @@ int		xmlTextReaderGetParserProp	(xmlTextReaderPtr reader,
 						 int prop);
 xmlNodePtr	xmlTextReaderCurrentNode	(xmlTextReaderPtr reader);
 xmlDocPtr	xmlTextReaderCurrentDoc		(xmlTextReaderPtr reader);
+xmlNodePtr	xmlTextReaderExpand		(xmlTextReaderPtr reader);
+int		xmlTextReaderNext		(xmlTextReaderPtr reader);
 
 /*
  * Error handling extensions
diff --git a/python/libxml2class.txt b/python/libxml2class.txt
index 92c6601b..950397d9 100644
--- a/python/libxml2class.txt
+++ b/python/libxml2class.txt
@@ -6,6 +6,7 @@
 
 
 # functions from module HTMLparser
+htmlCreateMemoryParserCtxt()
 htmlHandleOmittedElem()
 htmlIsScriptAttribute()
 htmlParseDoc()
@@ -132,6 +133,7 @@ relaxNGNewMemParserCtxt()
 relaxNGNewParserCtxt()
 
 # functions from module tree
+buildQName()
 compressMode()
 isXHTML()
 newComment()
@@ -594,6 +596,7 @@ Class xmlTextReader(xmlTextReaderCore)
     CurrentDoc()
     CurrentNode()
     Depth()
+    Expand()
     GetAttribute()
     GetAttributeNo()
     GetAttributeNs()
@@ -613,6 +616,7 @@ Class xmlTextReader(xmlTextReaderCore)
     MoveToNextAttribute()
     Name()
     NamespaceUri()
+    Next()
     NodeType()
     Normalization()
     Prefix()
diff --git a/python/tests/reader5.py b/python/tests/reader5.py
new file mode 100755
index 00000000..fbfe4a63
--- /dev/null
+++ b/python/tests/reader5.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python -u
+#
+# this tests the Expand() API of the xmlTextReader interface
+# this extract the Dragon bibliography entries from the XML specification
+#
+import libxml2
+import StringIO
+import sys
+
+# Memory debug specific
+libxml2.debugMemory(1)
+
+expect="""<bibl id="Aho" key="Aho/Ullman">Aho, Alfred V., 
+Ravi Sethi, and Jeffrey D. Ullman.
+<emph>Compilers:  Principles, Techniques, and Tools</emph>.
+Reading:  Addison-Wesley, 1986, rpt. corr. 1988.</bibl>"""
+
+f = open('../../test/valid/REC-xml-19980210.xml')
+input = libxml2.inputBuffer(f)
+reader = input.newTextReader("REC")
+res=""
+while reader.Read():
+    while reader.Name() == 'bibl':
+        node = reader.Expand()            # expand the subtree
+	if node.xpathEval("@id = 'Aho'"): # use XPath on it
+	    res = res + node.serialize()
+	if reader.Next() != 1:            # skip the subtree
+	    break;
+
+if res != expect:
+    print "Error: didn't get the expected output"
+    print "got '%s'" % (res)
+    print "expected '%s'" % (expect)
+    
+
+#
+# cleanup
+#
+del input
+del reader
+
+# Memory debug specific
+libxml2.cleanupParser()
+if libxml2.debugMemory(1) == 0:
+    print "OK"
+else:
+    print "Memory leak %d bytes" % (libxml2.debugMemory(1))
+    libxml2.dumpMemory()
diff --git a/xmlreader.c b/xmlreader.c
index 64da1bbe..529ef56f 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -554,6 +554,56 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) {
 }
 
 
+/**
+ * xmlTextReaderGetSuccessor:
+ * @cur:  the current node
+ *
+ * Get the successor of a node if available.
+ *
+ * Returns the successor node or NULL
+ */
+static xmlNodePtr
+xmlTextReaderGetSuccessor(xmlNodePtr cur) {
+    if (cur == NULL) return(NULL) ; /* ERROR */
+    if (cur->next != NULL) return(cur->next) ;
+    do {
+        cur = cur->parent;
+        if (cur == NULL) return(NULL);
+        if (cur->next != NULL) return(cur->next);
+    } while (cur != NULL);
+    return(cur);
+}
+
+/**
+ * xmlTextReaderDoExpand:
+ * @reader:  the xmlTextReaderPtr used
+ *
+ * Makes sure that the current node is fully read as well as all its
+ * descendant. It means the full DOM subtree must be available at the
+ * end of the call.
+ *
+ * Returns 1 if the node was expanded successfully, 0 if there is no more
+ *          nodes to read, or -1 in case of error
+ */
+static int
+xmlTextReaderDoExpand(xmlTextReaderPtr reader) {
+    int val;
+
+    if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL))
+        return(-1);
+
+    do {
+        if (xmlTextReaderGetSuccessor(reader->node) != NULL)
+	    return(1);
+	if (reader->mode == XML_TEXTREADER_MODE_EOF)
+	    return(1);
+	val = xmlTextReaderPushData(reader);
+	if (val < 0)
+	    return(-1);
+    } while(reader->mode != XML_TEXTREADER_MODE_EOF);
+    return(1);
+}
+
 /**
  * xmlTextReaderRead:
  * @reader:  the xmlTextReaderPtr used
@@ -804,6 +854,7 @@ node_found:
 #endif /* LIBXML_REGEXP_ENABLED */
     return(1);
 node_end:
+    reader->mode = XML_TEXTREADER_DONE;
     return(0);
 }
 
@@ -822,6 +873,57 @@ xmlTextReaderReadState(xmlTextReaderPtr reader) {
     return(reader->mode);
 }
 
+/**
+ * xmlTextReaderExpand:
+ * @reader:  the xmlTextReaderPtr used
+ *
+ * Reads the contents of the current node and the full subtree. It then makes
+ * the subtree availsble until the next xmlTextReaderRead() call
+ *
+ * Returns a node pointer valid until the next xmlTextReaderRead() call
+ *         or NULL in case of error.
+ */
+xmlNodePtr
+xmlTextReaderExpand(xmlTextReaderPtr reader) {
+    if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL))
+        return(NULL);
+    if (xmlTextReaderDoExpand(reader) < 0)
+        return(NULL);
+    return(reader->node);
+}
+
+/**
+ * xmlTextReaderNext:
+ * @reader:  the xmlTextReaderPtr used
+ *
+ * Skip to the node following the current one in document order while
+ * avoiding the subtree if any.
+ *
+ * Returns 1 if the node was read successfully, 0 if there is no more
+ *          nodes to read, or -1 in case of error
+ */
+int
+xmlTextReaderNext(xmlTextReaderPtr reader) {
+    int ret;
+    xmlNodePtr cur;
+
+    if (reader == NULL)
+	return(-1);
+    cur = reader->node;
+    if ((cur == NULL) || (cur->type != XML_ELEMENT_NODE))
+        return(xmlTextReaderRead(reader));
+    if (reader->state == XML_TEXTREADER_END)
+        return(xmlTextReaderRead(reader));
+    if (cur->_private == (void *)xmlTextReaderIsEmpty)
+        return(xmlTextReaderRead(reader));
+    do {
+        ret = xmlTextReaderRead(reader);
+	if (ret != 1)
+	    return(ret);
+    } while (reader->node != cur);
+    return(xmlTextReaderRead(reader));
+}
+
 /**
  * xmlTextReaderReadInnerXml:
  * @reader:  the xmlTextReaderPtr used