mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-08-08 17:42:14 +03:00
a bit of cleanup small fix more work on the XmlTextReader tutorial a few
* xmllint.c: a bit of cleanup * xmlreader.c: small fix * doc/xmlreader.html: more work on the XmlTextReader tutorial * python/libxml.py: a few fixes pointed out by Hannu Krosing Daniel
This commit is contained in:
@@ -1,3 +1,10 @@
|
|||||||
|
Sat Jan 4 17:33:17 CET 2003 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* xmllint.c: a bit of cleanup
|
||||||
|
* xmlreader.c: small fix
|
||||||
|
* doc/xmlreader.html: more work on the XmlTextReader tutorial
|
||||||
|
* python/libxml.py: a few fixes pointed out by Hannu Krosing
|
||||||
|
|
||||||
Sat Jan 4 13:46:14 CET 2003 Daniel Veillard <daniel@veillard.com>
|
Sat Jan 4 13:46:14 CET 2003 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* python/setup.py.in: patch from St<53>phane Bidoul to include
|
* python/setup.py.in: patch from St<53>phane Bidoul to include
|
||||||
|
@@ -23,7 +23,7 @@ A:link, A:visited, A:active { text-decoration: underline }-->
|
|||||||
<p></p>
|
<p></p>
|
||||||
|
|
||||||
<p>This document describes the use of the XmlTextReader streaming API added
|
<p>This document describes the use of the XmlTextReader streaming API added
|
||||||
to libxml2 in version 2.5.0 . This API is closely modelled on the <a
|
to libxml2 in version 2.5.0 . This API is closely modeled after the <a
|
||||||
href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html">XmlTextReader</a>
|
href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html">XmlTextReader</a>
|
||||||
and <a
|
and <a
|
||||||
href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlReader.html">XmlReader</a>
|
href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlReader.html">XmlReader</a>
|
||||||
@@ -38,6 +38,8 @@ examples using both C and the Python bindings:</p>
|
|||||||
<li><a href="#Walking">Walking a simple tree</a></li>
|
<li><a href="#Walking">Walking a simple tree</a></li>
|
||||||
<li><a href="#Extracting">Extracting informations for the current
|
<li><a href="#Extracting">Extracting informations for the current
|
||||||
node</a></li>
|
node</a></li>
|
||||||
|
<li><a href="#Extracting1">Extracting informations for the
|
||||||
|
attributes</a></li>
|
||||||
<li><a href="#Validating">Validating a document</a></li>
|
<li><a href="#Validating">Validating a document</a></li>
|
||||||
<li><a href="#Entities">Entities substitution</a></li>
|
<li><a href="#Entities">Entities substitution</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -132,26 +134,28 @@ int streamFile(char *filename) {
|
|||||||
def processNode(reader):
|
def processNode(reader):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
def streamFile(filename):
|
||||||
reader = newTextReaderFilename(filename)
|
try:
|
||||||
except:
|
reader = libxml2.newTextReaderFilename(filename)
|
||||||
print "unable to open %s" % (filename)
|
except:
|
||||||
|
print "unable to open %s" % (filename)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
ret = reader.Read()
|
|
||||||
while ret == 1:
|
|
||||||
processNode(reader)
|
|
||||||
ret = reader.Read()
|
ret = reader.Read()
|
||||||
if ret != 0:
|
while ret == 1:
|
||||||
print "%s : failed to parse" % (filename)
|
processNode(reader)
|
||||||
|
ret = reader.Read()
|
||||||
|
|
||||||
|
if ret != 0:
|
||||||
|
print "%s : failed to parse" % (filename)
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
<p>The only things worth adding are that the <a
|
<p>The only things worth adding are that the <a
|
||||||
href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html">xmlTextReader
|
href="http://dotgnu.org/pnetlib-doc/System/Xml/XmlTextReader.html">xmlTextReader
|
||||||
is abstracted as a class like in C#</a> with the same method names (but the
|
is abstracted as a class like in C#</a> with the same method names (but the
|
||||||
properties are currently accessed with methods) and to note one doesn't need
|
properties are currently accessed with methods) and that one doesn't need to
|
||||||
to free the reader at the end of the processing, it will get garbage
|
free the reader at the end of the processing, it will get garbage collected
|
||||||
collected once all references have disapeared</p>
|
once all references have disapeared</p>
|
||||||
|
|
||||||
<h2><a name="Extracting">Extracting informations for the current node</a></h2>
|
<h2><a name="Extracting">Extracting informations for the current node</a></h2>
|
||||||
|
|
||||||
@@ -206,10 +210,184 @@ XmlTextReader class</a> set of properties and methods:</p>
|
|||||||
current node.</li>
|
current node.</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<p></p>
|
<p>Let's look first at a small example to get this in practice by redefining
|
||||||
|
the processNode() function in the Python example:</p>
|
||||||
|
<pre>def processNode(reader):
|
||||||
|
print "%d %d %s %d" % (reader.Depth(), reader.NodeType(),
|
||||||
|
reader.Name(), reader.IsEmptyElement())</pre>
|
||||||
|
|
||||||
|
<p>and look at the result of calling streamFile("tst.xml") for various
|
||||||
|
content of the XML test file.</p>
|
||||||
|
|
||||||
|
<p>For the minimal document "<code><doc/></code>" we get:</p>
|
||||||
|
<pre>0 1 doc 1</pre>
|
||||||
|
|
||||||
|
<p>Only one node is found, its depth is 0, type 1 indocate an element start,
|
||||||
|
of name "doc" and it is empty. Trying now with
|
||||||
|
"<code><doc></doc></code>" instead leads to:</p>
|
||||||
|
<pre>0 1 doc 0
|
||||||
|
0 15 doc 0</pre>
|
||||||
|
|
||||||
|
<p>The document root node is not flagged as empty anymore and both a start
|
||||||
|
and an end of element are detected. The following document shows how
|
||||||
|
character data are reported:</p>
|
||||||
|
<pre><doc><a/><b>some text</b>
|
||||||
|
<c/></doc></pre>
|
||||||
|
|
||||||
|
<p>We modifying the processNode() function to also report the node Value:</p>
|
||||||
|
<pre>def processNode(reader):
|
||||||
|
print "%d %d %s %d %s" % (reader.Depth(), reader.NodeType(),
|
||||||
|
reader.Name(), reader.IsEmptyElement(),
|
||||||
|
reader.Value())</pre>
|
||||||
|
|
||||||
|
<p>The result of the test is:</p>
|
||||||
|
<pre>0 1 doc 0 None
|
||||||
|
1 1 a 1 None
|
||||||
|
1 1 b 0 None
|
||||||
|
2 3 #text 0 some text
|
||||||
|
1 15 b 0 None
|
||||||
|
1 3 #text 0
|
||||||
|
|
||||||
|
1 1 c 1 None
|
||||||
|
0 15 doc 0 None</pre>
|
||||||
|
|
||||||
|
<p>There is a few things to note:</p>
|
||||||
|
<ul>
|
||||||
|
<li>the increase of the depth value (first row) as children nodes are
|
||||||
|
explored</li>
|
||||||
|
<li>the text node child of the b element, of type 3 and its content</li>
|
||||||
|
<li>the text node containing the line return between elements b and c</li>
|
||||||
|
<li>that elements have the Value None (or NULL in C)</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>The equivalent routine for <code>processNode()</code> as used by
|
||||||
|
<code>xmllint --stream --debug</code> is the following and can be found in
|
||||||
|
the xmllint.c module in the source distribution:</p>
|
||||||
|
<pre>static void processNode(xmlTextReaderPtr reader) {
|
||||||
|
xmlChar *name, *value;
|
||||||
|
|
||||||
|
name = xmlTextReaderName(reader);
|
||||||
|
if (name == NULL)
|
||||||
|
name = xmlStrdup(BAD_CAST "--");
|
||||||
|
value = xmlTextReaderValue(reader);
|
||||||
|
|
||||||
|
printf("%d %d %s %d",
|
||||||
|
xmlTextReaderDepth(reader),
|
||||||
|
xmlTextReaderNodeType(reader),
|
||||||
|
name,
|
||||||
|
xmlTextReaderIsEmptyElement(reader));
|
||||||
|
xmlFree(name);
|
||||||
|
if (value == NULL)
|
||||||
|
printf("\n");
|
||||||
|
else {
|
||||||
|
printf(" %s\n", value);
|
||||||
|
xmlFree(value);
|
||||||
|
}
|
||||||
|
}</pre>
|
||||||
|
|
||||||
|
<h2><a name="Extracting1">Extracting informations for the attributes</a></h2>
|
||||||
|
|
||||||
|
<p>The previous examples don't indicate how attributes are processed. The
|
||||||
|
simple test "<code><doc a="b"/></code>" provides the following
|
||||||
|
result:</p>
|
||||||
|
<pre>0 1 doc 1 None</pre>
|
||||||
|
|
||||||
|
<p>This prove that attributes nodes are not traversed by default. The
|
||||||
|
<em>HasAttributes</em> property allow to detect their presence. To check
|
||||||
|
their content the API has special instructions basically 2 kind of operations
|
||||||
|
are possible:</p>
|
||||||
|
<ol>
|
||||||
|
<li>to move the reader to the attribute nodes of the current element, in
|
||||||
|
that case the cursor is positionned on the attribute node</li>
|
||||||
|
<li>to directly query the element node for the attribute value</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<p>In both case the attribute can be designed either by its position in the
|
||||||
|
list of attribute (<em>MoveToAttributeNo</em> or <em>GetAttributeNo</em>) or
|
||||||
|
by their name (and namespace):</p>
|
||||||
|
<ul>
|
||||||
|
<li><em>GetAttributeNo</em>(no): provides the value of the attribute with
|
||||||
|
the specified index no relative to the containing element.</li>
|
||||||
|
<li><em>GetAttribute</em>(name): provides the value of the attribute with
|
||||||
|
the specified qualified name.</li>
|
||||||
|
<li>GetAttributeNs(localName, namespaceURI): provides the value of the
|
||||||
|
attribute with the specified local name and namespace URI.</li>
|
||||||
|
<li><em>MoveToAttributeNo</em>(no): moves the position of the current
|
||||||
|
instance to the attribute with the specified index relative to the
|
||||||
|
containing element.</li>
|
||||||
|
<li><em>MoveToAttribute</em>(name): moves the position of the current
|
||||||
|
instance to the attribute with the specified qualified name.</li>
|
||||||
|
<li><em>MoveToAttributeNs</em>(localName, namespaceURI): moves the position
|
||||||
|
of the current instance to the attribute with the specified local name
|
||||||
|
and namespace URI.</li>
|
||||||
|
<li><em>MoveToFirstAttribute</em>: moves the position of the current
|
||||||
|
instance to the first attribute associated with the current node.</li>
|
||||||
|
<li><em>MoveToNextAttribute</em>: moves the position of the current
|
||||||
|
instance to the next attribute associated with the current node.</li>
|
||||||
|
<li><em>MoveToElement</em>: moves the position of the current instance to
|
||||||
|
the node that contains the current Attribute node.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>After modifying the processNode() function to show attributes:</p>
|
||||||
|
<pre>def processNode(reader):
|
||||||
|
print "%d %d %s %d %s" % (reader.Depth(), reader.NodeType(),
|
||||||
|
reader.Name(), reader.IsEmptyElement(),
|
||||||
|
reader.Value())
|
||||||
|
if reader.NodeType() == 1: # Element
|
||||||
|
while reader.MoveToNextAttribute():
|
||||||
|
print "-- %d %d (%s) [%s]" % (reader.Depth(), reader.NodeType(),
|
||||||
|
reader.Name(),reader.Value())</pre>
|
||||||
|
|
||||||
|
<p>the output for the same input document reflects the attribute:</p>
|
||||||
|
<pre>0 1 doc 1 None
|
||||||
|
-- 1 2 (a) [b]</pre>
|
||||||
|
|
||||||
|
<p>There is a couple of things to note on the attribute processing:</p>
|
||||||
|
<ul>
|
||||||
|
<li>their depth is the one of the carrying element plus one</li>
|
||||||
|
<li>namespace declarations are seen as attributes like in DOM</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
<h2><a name="Validating">Validating a document</a></h2>
|
<h2><a name="Validating">Validating a document</a></h2>
|
||||||
|
|
||||||
|
<p>Libxml2 implementation adds some extra feature on top of the XmlTextReader
|
||||||
|
API, the main one is the ability to DTD validate the parsed document
|
||||||
|
progressively. This is simply the activation of the associated feature of the
|
||||||
|
parser used by the reader structure. There are a few options available
|
||||||
|
defined as the enum xmlParserProperties in the libxml/xmlreader.h header
|
||||||
|
file:</p>
|
||||||
|
<ul>
|
||||||
|
<li>XML_PARSER_LOADDTD: force loading the DTD (without validating)</li>
|
||||||
|
<li>XML_PARSER_DEFAULTATTRS: force attribute defaulting (this also imply
|
||||||
|
loading the DTD)</li>
|
||||||
|
<li>XML_PARSER_VALIDATE: activate DTD validation (this also imply loading
|
||||||
|
the DTD)</li>
|
||||||
|
<li>XML_PARSER_SUBST_ENTITIES: substitute entities on the fly, entity
|
||||||
|
reference nodes are not generated and are replaced by their expanded
|
||||||
|
content.</li>
|
||||||
|
<li>more settings might be added, those were the one available at the 2.5.0
|
||||||
|
release...</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>The GetParserProp() and SetParserProp() methods can then be used to get
|
||||||
|
and set the values of those parser properties of the reader. For example</p>
|
||||||
|
<pre>def parseAndValidate(file):
|
||||||
|
reader = libxml2.newTextReaderFilename(file)
|
||||||
|
reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
|
||||||
|
ret = reader.Read()
|
||||||
|
while ret == 1:
|
||||||
|
ret = reader.Read()
|
||||||
|
if ret != 0:
|
||||||
|
print "Error parsing and validating %s" % (file)</pre>
|
||||||
|
|
||||||
|
<p>This routine will parse and validate the file. Errors message can be
|
||||||
|
captured by registering an error handler. See python/tests/reader2.py for
|
||||||
|
more complete Python examples. At the C level the equivalent call to cativate
|
||||||
|
the validation feature is just:</p>
|
||||||
|
<pre>ret = xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1)</pre>
|
||||||
|
|
||||||
|
<p>and a return value of 0 indicates success.</p>
|
||||||
|
|
||||||
<h2><a name="Entities">Entities substitution</a></h2>
|
<h2><a name="Entities">Entities substitution</a></h2>
|
||||||
|
|
||||||
<p> </p>
|
<p> </p>
|
||||||
|
@@ -302,11 +302,6 @@ class xmlCore:
|
|||||||
return libxml2mod.name(self._o)
|
return libxml2mod.name(self._o)
|
||||||
def get_type(self):
|
def get_type(self):
|
||||||
return libxml2mod.type(self._o)
|
return libxml2mod.type(self._o)
|
||||||
def get_doc(self):
|
|
||||||
ret = libxml2mod.doc(self._o)
|
|
||||||
if ret == None:
|
|
||||||
return None
|
|
||||||
return xmlDoc(_obj=ret)
|
|
||||||
def free(self):
|
def free(self):
|
||||||
libxml2mod.freeDoc(self._o)
|
libxml2mod.freeDoc(self._o)
|
||||||
|
|
||||||
@@ -356,7 +351,7 @@ def nodeWrap(o):
|
|||||||
if name == "entity_decl":
|
if name == "entity_decl":
|
||||||
return xmlEntity(_obj=o)
|
return xmlEntity(_obj=o)
|
||||||
if name == "dtd":
|
if name == "dtd":
|
||||||
return xmlAttr(_obj=o)
|
return xmlDtd(_obj=o)
|
||||||
return xmlNode(_obj=o)
|
return xmlNode(_obj=o)
|
||||||
|
|
||||||
def xpathObjectRet(o):
|
def xpathObjectRet(o):
|
||||||
|
34
xmllint.c
34
xmllint.c
@@ -572,26 +572,24 @@ static int count = 0;
|
|||||||
static int elem, attrs;
|
static int elem, attrs;
|
||||||
|
|
||||||
static void processNode(xmlTextReaderPtr reader) {
|
static void processNode(xmlTextReaderPtr reader) {
|
||||||
if (debug) {
|
xmlChar *name, *value;
|
||||||
xmlChar *name, *value;
|
|
||||||
|
|
||||||
name = xmlTextReaderName(reader);
|
name = xmlTextReaderName(reader);
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
name = xmlStrdup(BAD_CAST "--");
|
name = xmlStrdup(BAD_CAST "--");
|
||||||
value = xmlTextReaderValue(reader);
|
value = xmlTextReaderValue(reader);
|
||||||
|
|
||||||
printf("%d %d %d %s",
|
printf("%d %d %s %d",
|
||||||
xmlTextReaderDepth(reader),
|
xmlTextReaderDepth(reader),
|
||||||
xmlTextReaderNodeType(reader),
|
xmlTextReaderNodeType(reader),
|
||||||
xmlTextReaderIsEmptyElement(reader),
|
name,
|
||||||
name);
|
xmlTextReaderIsEmptyElement(reader));
|
||||||
xmlFree(name);
|
xmlFree(name);
|
||||||
if (value == NULL)
|
if (value == NULL)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
else {
|
else {
|
||||||
printf(" %s\n", value);
|
printf(" %s\n", value);
|
||||||
xmlFree(value);
|
xmlFree(value);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -602,6 +602,7 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
|
|||||||
reader->state = XML_TEXTREADER_ELEMENT;
|
reader->state = XML_TEXTREADER_ELEMENT;
|
||||||
} else {
|
} else {
|
||||||
reader->node = reader->ctxt->nodeTab[0];
|
reader->node = reader->ctxt->nodeTab[0];
|
||||||
|
reader->state = XML_TEXTREADER_ELEMENT;
|
||||||
}
|
}
|
||||||
reader->depth = 0;
|
reader->depth = 0;
|
||||||
goto node_found;
|
goto node_found;
|
||||||
|
Reference in New Issue
Block a user