a bit of cleanup small fix more work on the XmlTextReader tutorial a few

* xmllint.c: a bit of cleanup * xmlreader.c: small fix * doc/xmlreader.html: more work on the XmlTextReader tutorial * python/libxml.py: a few fixes pointed out by Hannu Krosing Daniel
2025-08-08 17:42:14 +03:00 · 2003-01-04 16:35:29 +00:00
parent 623a9eb2df
commit e59494fa54
5 changed files with 218 additions and 39 deletions
--- a/7
+++ b/7
@@ -1,3 +1,10 @@
 Sat Jan  4 17:33:17 CET 2003 Daniel Veillard <daniel@veillard.com>
 	* xmllint.c: a bit of cleanup
 	* xmlreader.c: small fix
 	* doc/xmlreader.html: more work on the XmlTextReader tutorial
 	* python/libxml.py: a few fixes pointed out by Hannu Krosing
 Sat Jan  4 13:46:14 CET 2003 Daniel Veillard <daniel@veillard.com>
 	* python/setup.py.in: patch from St<53>phane Bidoul to include
--- a/doc/xmlreader.html
+++ b/doc/xmlreader.html
@@ -23,7 +23,7 @@ A:link, A:visited, A:active { text-decoration: underline }-->
 <p></p>
 <p>This document describes the use of the XmlTextReader streaming API added
-to libxml2 in version 2.5.0 . This API is closely modelled on the <a
+to libxml2 in version 2.5.0 . This API is closely modeled after the <a
 href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html">XmlTextReader</a>
 and <a
 href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlReader.html">XmlReader</a>
@@ -38,6 +38,8 @@ examples using both C and the Python bindings:</p>
  <li><a href="#Walking">Walking a simple tree</a></li>
  <li><a href="#Extracting">Extracting informations for the current
  node</a></li>
  <li><a href="#Extracting1">Extracting informations for the
  attributes</a></li>
  <li><a href="#Validating">Validating a document</a></li>
  <li><a href="#Entities">Entities substitution</a></li>
 </ul>
@@ -132,26 +134,28 @@ int streamFile(char *filename) {
 def processNode(reader):
    pass
-try:
+def streamFile(filename):
-    reader = newTextReaderFilename(filename)
+    try:
-except:
+        reader = libxml2.newTextReaderFilename(filename)
-    print "unable to open %s" % (filename)
+    except:
        print "unable to open %s" % (filename)
        return
 ret = reader.Read()
 while ret == 1:
    processNode(reader)
    ret = reader.Read()
-if ret != 0:
+    while ret == 1:
-    print "%s : failed to parse" % (filename)
+        processNode(reader)
        ret = reader.Read()
    if ret != 0:
        print "%s : failed to parse" % (filename)
 </pre>
 <p>The only things worth adding are that the <a
 href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html">xmlTextReader
 is abstracted as a class like in C#</a> with the same method names (but the
-properties are currently accessed with methods) and to note one doesn't need
+properties are currently accessed with methods) and that one doesn't need to
-to free the reader at the end of the processing, it will get garbage
+free the reader at the end of the processing, it will get garbage collected
-collected once all references have disapeared</p>
+once all references have disapeared</p>
 <h2><a name="Extracting">Extracting informations for the current node</a></h2>
@@ -206,10 +210,184 @@ XmlTextReader class</a> set of properties and methods:</p>
    current node.</li>
 </ul>
-<p></p>
+<p>Let's look first at a small example to get this in practice by redefining
 the processNode() function in the Python example:</p>
 <pre>def processNode(reader):
    print "%d %d %s %d" % (reader.Depth(), reader.NodeType(),
                           reader.Name(), reader.IsEmptyElement())</pre>
 <p>and look at the result of calling streamFile("tst.xml") for various
 content of the XML test file.</p>
 <p>For the minimal document "<code>&lt;doc/&gt;</code>" we get:</p>
 <pre>0 1 doc 1</pre>
 <p>Only one node is found, its depth is 0, type 1 indocate an element start,
 of name "doc" and it is empty. Trying now with
 "<code>&lt;doc&gt;&lt;/doc&gt;</code>" instead leads to:</p>
 <pre>0 1 doc 0
 0 15 doc 0</pre>
 <p>The document root node is not flagged as empty anymore and both a start
 and an end of element are detected. The following document shows how
 character data are reported:</p>
 <pre>&lt;doc&gt;&lt;a/&gt;&lt;b&gt;some text&lt;/b&gt;
 &lt;c/&gt;&lt;/doc&gt;</pre>
 <p>We modifying the processNode() function to also report the node Value:</p>
 <pre>def processNode(reader):
    print "%d %d %s %d %s" % (reader.Depth(), reader.NodeType(),
                              reader.Name(), reader.IsEmptyElement(),
                              reader.Value())</pre>
 <p>The result of the test is:</p>
 <pre>0 1 doc 0 None
 1 1 a 1 None
 1 1 b 0 None
 2 3 #text 0 some text
 1 15 b 0 None
 1 3 #text 0
 1 1 c 1 None
 0 15 doc 0 None</pre>
 <p>There is a few things to note:</p>
 <ul>
  <li>the increase of the depth value (first row) as children nodes are
    explored</li>
  <li>the text node child of the b element, of type 3 and its content</li>
  <li>the text node containing the line return between elements b and c</li>
  <li>that elements have the Value None (or NULL in C)</li>
 </ul>
 <p>The equivalent routine for <code>processNode()</code> as used by
 <code>xmllint --stream --debug</code> is the following and can be found in
 the xmllint.c module in the source distribution:</p>
 <pre>static void processNode(xmlTextReaderPtr reader) {
    xmlChar *name, *value;
    name = xmlTextReaderName(reader);
    if (name == NULL)
        name = xmlStrdup(BAD_CAST "--");
    value = xmlTextReaderValue(reader);
    printf("%d %d %s %d",
            xmlTextReaderDepth(reader),
            xmlTextReaderNodeType(reader),
            name,
            xmlTextReaderIsEmptyElement(reader));
    xmlFree(name);
    if (value == NULL)
        printf("\n");
    else {
        printf(" %s\n", value);
        xmlFree(value);
    }
 }</pre>
 <h2><a name="Extracting1">Extracting informations for the attributes</a></h2>
 <p>The previous examples don't indicate how attributes are processed. The
 simple test "<code>&lt;doc a="b"/&gt;</code>" provides the following
 result:</p>
 <pre>0 1 doc 1 None</pre>
 <p>This prove that attributes nodes are not traversed by default. The
 <em>HasAttributes</em> property allow to detect their presence. To check
 their content the API has special instructions basically 2 kind of operations
 are possible:</p>
 <ol>
  <li>to move the reader to the attribute nodes of the current element, in
    that case the cursor is positionned on the attribute node</li>
  <li>to directly query the element node for the attribute value</li>
 </ol>
 <p>In both case the attribute can be designed either by its position in the
 list of attribute (<em>MoveToAttributeNo</em> or <em>GetAttributeNo</em>) or
 by their name (and namespace):</p>
 <ul>
  <li><em>GetAttributeNo</em>(no): provides the value of the attribute with
    the specified index no relative to the containing element.</li>
  <li><em>GetAttribute</em>(name): provides the value of the attribute with
    the specified qualified name.</li>
  <li>GetAttributeNs(localName, namespaceURI): provides the value of the
    attribute with the specified local name and namespace URI.</li>
  <li><em>MoveToAttributeNo</em>(no): moves the position of the current
    instance to the attribute with the specified index relative to the
    containing element.</li>
  <li><em>MoveToAttribute</em>(name): moves the position of the current
    instance to the attribute with the specified qualified name.</li>
  <li><em>MoveToAttributeNs</em>(localName, namespaceURI): moves the position
    of the current instance to the attribute with the specified local name
    and namespace URI.</li>
  <li><em>MoveToFirstAttribute</em>: moves the position of the current
    instance to the first attribute associated with the current node.</li>
  <li><em>MoveToNextAttribute</em>: moves the position of the current
    instance to the next attribute associated with the current node.</li>
  <li><em>MoveToElement</em>: moves the position of the current instance to
    the node that contains the current Attribute  node.</li>
 </ul>
 <p>After modifying the processNode() function to show attributes:</p>
 <pre>def processNode(reader):
    print "%d %d %s %d %s" % (reader.Depth(), reader.NodeType(),
                              reader.Name(), reader.IsEmptyElement(),
                              reader.Value())
    if reader.NodeType() == 1: # Element
        while reader.MoveToNextAttribute():
            print "-- %d %d (%s) [%s]" % (reader.Depth(), reader.NodeType(),
                                          reader.Name(),reader.Value())</pre>
 <p>the output for the same input document reflects the attribute:</p>
 <pre>0 1 doc 1 None
 -- 1 2 (a) [b]</pre>
 <p>There is a couple of things to note on the attribute processing:</p>
 <ul>
  <li>their depth is the one of the carrying element plus one</li>
  <li>namespace declarations are seen as attributes like in DOM</li>
 </ul>
 <h2><a name="Validating">Validating a document</a></h2>
 <p>Libxml2 implementation adds some extra feature on top of the XmlTextReader
 API, the main one is the ability to DTD validate the parsed document
 progressively. This is simply the activation of the associated feature of the
 parser used by the reader structure. There are a few options available
 defined as the enum xmlParserProperties in the libxml/xmlreader.h header
 file:</p>
 <ul>
  <li>XML_PARSER_LOADDTD: force loading the DTD (without validating)</li>
  <li>XML_PARSER_DEFAULTATTRS: force attribute defaulting (this also imply
    loading the DTD)</li>
  <li>XML_PARSER_VALIDATE: activate DTD validation (this also imply loading
    the DTD)</li>
  <li>XML_PARSER_SUBST_ENTITIES: substitute entities on the fly, entity
    reference nodes are not generated and are replaced by their expanded
    content.</li>
  <li>more settings might be added, those were the one available at the 2.5.0
    release...</li>
 </ul>
 <p>The GetParserProp() and SetParserProp() methods can then be used to get
 and set the values of those parser properties of the reader. For example</p>
 <pre>def parseAndValidate(file):
    reader = libxml2.newTextReaderFilename(file)
    reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
    ret = reader.Read()
    while ret == 1:
        ret = reader.Read()
    if ret != 0:
        print "Error parsing and validating %s" % (file)</pre>
 <p>This routine will parse and validate the file. Errors message can be
 captured by registering an error handler. See python/tests/reader2.py for
 more complete Python examples. At the C level the equivalent call to cativate
 the validation feature is just:</p>
 <pre>ret = xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1)</pre>
 <p>and a return value of 0 indicates success.</p>
 <h2><a name="Entities">Entities substitution</a></h2>
 <p> </p>
--- a/python/libxml.py
+++ b/python/libxml.py
@@ -302,11 +302,6 @@ class xmlCore:
        return libxml2mod.name(self._o)
    def get_type(self):
        return libxml2mod.type(self._o)
    def get_doc(self):
        ret = libxml2mod.doc(self._o)
        if ret == None:
            return None
        return xmlDoc(_obj=ret)
    def free(self):
        libxml2mod.freeDoc(self._o)
@@ -356,7 +351,7 @@ def nodeWrap(o):
    if name == "entity_decl":
        return xmlEntity(_obj=o)
    if name == "dtd":
-        return xmlAttr(_obj=o)
+        return xmlDtd(_obj=o)
    return xmlNode(_obj=o)
 def xpathObjectRet(o):
--- a/xmllint.c
+++ b/xmllint.c
@@ -572,26 +572,24 @@ static int count = 0;
 static int elem, attrs;
 static void processNode(xmlTextReaderPtr reader) {
-    if (debug) {
+    xmlChar *name, *value;
 	xmlChar *name, *value;
-	name = xmlTextReaderName(reader);
+    name = xmlTextReaderName(reader);
-	if (name == NULL)
+    if (name == NULL)
-	    name = xmlStrdup(BAD_CAST "--");
+	name = xmlStrdup(BAD_CAST "--");
-	value = xmlTextReaderValue(reader);
+    value = xmlTextReaderValue(reader);
-	printf("%d %d %d %s", 
+    printf("%d %d %s %d", 
-		xmlTextReaderDepth(reader),
+	    xmlTextReaderDepth(reader),
-		xmlTextReaderNodeType(reader),
+	    xmlTextReaderNodeType(reader),
-		xmlTextReaderIsEmptyElement(reader),
+	    name,
-		name);
+	    xmlTextReaderIsEmptyElement(reader));
-	xmlFree(name);
+    xmlFree(name);
-	if (value == NULL)
+    if (value == NULL)
-	    printf("\n");
+	printf("\n");
-	else {
+    else {
-	    printf(" %s\n", value);
+	printf(" %s\n", value);
-	    xmlFree(value);
+	xmlFree(value);
 	}
    }
 }
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -602,6 +602,7 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
 	    reader->state = XML_TEXTREADER_ELEMENT;
 	} else {
 	    reader->node = reader->ctxt->nodeTab[0];
 	    reader->state = XML_TEXTREADER_ELEMENT;
 	}
 	reader->depth = 0;
 	goto node_found;