1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

seriously changed the way data are pushed to the underlying parser, go by

* xmlreader.c: seriously changed the way data are pushed to
  the underlying parser, go by block of 512 bytes instead of
  tryng to detect tag boundaries at that level. Changed the
  way empty element are detected and tagged.
* python/tests/reader.py python/tests/reader2.py
  python/tests/reader3.py: small changes mostly due to context
  reporting being different and DTD node being reported. Some
  errors previously undetected are now caught and fixed.
* doc/xmlreader.html: flagged last section as TODO
Daniel
This commit is contained in:
Daniel Veillard
2003-01-05 01:27:54 +00:00
parent 51a447a578
commit 067bae5ff8
6 changed files with 78 additions and 80 deletions

View File

@ -1,3 +1,15 @@
Sun Jan 5 02:23:20 CET 2003 Daniel Veillard <daniel@veillard.com>
* xmlreader.c: seriously changed the way data are pushed to
the underlying parser, go by block of 512 bytes instead of
tryng to detect tag boundaries at that level. Changed the
way empty element are detected and tagged.
* python/tests/reader.py python/tests/reader2.py
python/tests/reader3.py: small changes mostly due to context
reporting being different and DTD node being reported. Some
errors previously undetected are now caught and fixed.
* doc/xmlreader.html: flagged last section as TODO
Sat Jan 4 20:40:28 CET 2003 Daniel Veillard <daniel@veillard.com> Sat Jan 4 20:40:28 CET 2003 Daniel Veillard <daniel@veillard.com>
* python/libxml.py: integrated the Python 2.2 optimizations * python/libxml.py: integrated the Python 2.2 optimizations

View File

@ -390,6 +390,8 @@ the validation feature is just:</p>
<h2><a name="Entities">Entities substitution</a></h2> <h2><a name="Entities">Entities substitution</a></h2>
<p>@@TODO@@</p>
<p> </p> <p> </p>
<p><a href="mailto:veillard@redhat.com">Daniel Veillard</a></p> <p><a href="mailto:veillard@redhat.com">Daniel Veillard</a></p>

View File

@ -14,56 +14,56 @@ input = libxml2.inputBuffer(f)
reader = input.newTextReader("test1") reader = input.newTextReader("test1")
ret = reader.Read() ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to first element" print "test1: Error reading to first element"
sys.exit(1) sys.exit(1)
if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \ if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 1 or reader.HasAttributes() != 0: reader.NodeType() != 1 or reader.HasAttributes() != 0:
print "Error reading the first element" print "test1: Error reading the first element"
sys.exit(1) sys.exit(1)
ret = reader.Read() ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to second element" print "test1: Error reading to second element"
sys.exit(1) sys.exit(1)
if reader.Name() != "b" or reader.IsEmptyElement() != 1 or \ if reader.Name() != "b" or reader.IsEmptyElement() != 1 or \
reader.NodeType() != 1 or reader.HasAttributes() != 1: reader.NodeType() != 1 or reader.HasAttributes() != 1:
print "Error reading the second element" print "test1: Error reading the second element"
sys.exit(1) sys.exit(1)
ret = reader.Read() ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to third element" print "test1: Error reading to third element"
sys.exit(1) sys.exit(1)
if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \ if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 1 or reader.HasAttributes() != 0: reader.NodeType() != 1 or reader.HasAttributes() != 0:
print "Error reading the third element" print "test1: Error reading the third element"
sys.exit(1) sys.exit(1)
ret = reader.Read() ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to text node" print "test1: Error reading to text node"
sys.exit(1) sys.exit(1)
if reader.Name() != "#text" or reader.IsEmptyElement() != 0 or \ if reader.Name() != "#text" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 3 or reader.HasAttributes() != 0 or \ reader.NodeType() != 3 or reader.HasAttributes() != 0 or \
reader.Value() != "content of c": reader.Value() != "content of c":
print "Error reading the text node" print "test1: Error reading the text node"
sys.exit(1) sys.exit(1)
ret = reader.Read() ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to end of third element" print "test1: Error reading to end of third element"
sys.exit(1) sys.exit(1)
if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \ if reader.Name() != "c" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 15 or reader.HasAttributes() != 0: reader.NodeType() != 15 or reader.HasAttributes() != 0:
print "Error reading the end of third element" print "test1: Error reading the end of third element"
sys.exit(1) sys.exit(1)
ret = reader.Read() ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to end of first element" print "test1: Error reading to end of first element"
sys.exit(1) sys.exit(1)
if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \ if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \
reader.NodeType() != 15 or reader.HasAttributes() != 0: reader.NodeType() != 15 or reader.HasAttributes() != 0:
print "Error reading the end of first element" print "test1: Error reading the end of first element"
sys.exit(1) sys.exit(1)
ret = reader.Read() ret = reader.Read()
if ret != 0: if ret != 0:
print "Error reading to end of document" print "test1: Error reading to end of document"
sys.exit(1) sys.exit(1)
# #
@ -239,7 +239,7 @@ if reader.MoveToNextAttribute() != 0:
# #
# a couple of tests for namespace nodes # a couple of tests for namespace nodes
# #
f = StringIO.StringIO("""<a xmlns="http://example.com/foo">""") f = StringIO.StringIO("""<a xmlns="http://example.com/foo"/>""")
input = libxml2.inputBuffer(f) input = libxml2.inputBuffer(f)
reader = input.newTextReader("test6") reader = input.newTextReader("test6")
ret = reader.Read() ret = reader.Read()
@ -256,7 +256,7 @@ if reader.NamespaceUri() != "http://www.w3.org/2000/xmlns/" or \
print "test6: failed to read the namespace node" print "test6: failed to read the namespace node"
sys.exit(1) sys.exit(1)
f = StringIO.StringIO("""<a xmlns:prefix="http://example.com/foo">""") f = StringIO.StringIO("""<a xmlns:prefix="http://example.com/foo"/>""")
input = libxml2.inputBuffer(f) input = libxml2.inputBuffer(f)
reader = input.newTextReader("test7") reader = input.newTextReader("test7")
ret = reader.Read() ret = reader.Read()

View File

@ -16,10 +16,10 @@ expect="""../../test/valid/rss.xml:172: validity error: Element rss does not car
</rss> </rss>
^ ^
../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined ../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined
<p><termdef id="dt-arc" term="Arc"> <p><termdef id="dt-arc" term="Arc">An <term>arc</term> is contained within an
^ ^
../../test/valid/xlink.xml:529: validity error: attribute def line 199 references an unknown ID "dt-xlg" ../../test/valid/xlink.xml:530: validity error: attribute def line 199 references an unknown ID "dt-xlg"
<?Pub *0000052575?>
^ ^
""" """
def callback(ctx, str): def callback(ctx, str):
@ -61,7 +61,8 @@ s = """
<b>bbb</b> <b>bbb</b>
</test> </test>
""" """
expect="""1,test expect="""10,test
1,test
3,#text 3,#text
1,x 1,x
1,c 1,c
@ -110,7 +111,8 @@ s = """<!DOCTYPE test [
</test> </test>
""" """
tst_ent = """<x>hello</x>""" tst_ent = """<x>hello</x>"""
expect="""1 test expect="""10 test
1 test
3 #text 3 #text
1 x 1 x
3 #text 3 #text
@ -161,7 +163,8 @@ s = """<!DOCTYPE test [
&x; &x;
&x; &x;
</test>""" </test>"""
expect="""1 test 0 expect="""10 test 0
1 test 0
3 #text 1 3 #text 1
1 x 1 1 x 1
1 y 2 1 y 2
@ -213,7 +216,8 @@ s = """<!DOCTYPE test [
&x; &x;
&x; &x;
</test>""" </test>"""
expect="""1 test 0 expect="""10 test 0
1 test 0
3 #text 1 3 #text 1
5 x 1 5 x 1
3 #text 1 3 #text 1

View File

@ -1,6 +1,6 @@
#!/usr/bin/python -u #!/usr/bin/python -u
# #
# this tests the validation with the XmlTextReader interface # this tests the entities substitutions with the XmlTextReader interface
# #
import sys import sys
import StringIO import StringIO
@ -22,6 +22,11 @@ f = StringIO.StringIO(docstr)
input = libxml2.inputBuffer(f) input = libxml2.inputBuffer(f)
reader = input.newTextReader("test_noent") reader = input.newTextReader("test_noent")
ret = reader.Read() ret = reader.Read()
if ret != 1:
print "Error reading to root"
sys.exit(1)
if reader.Name() == "doc" or reader.NodeType() == 10:
ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to root" print "Error reading to root"
sys.exit(1) sys.exit(1)
@ -55,6 +60,11 @@ input = libxml2.inputBuffer(f)
reader = input.newTextReader("test_noent") reader = input.newTextReader("test_noent")
reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1)
ret = reader.Read() ret = reader.Read()
if ret != 1:
print "Error reading to root"
sys.exit(1)
if reader.Name() == "doc" or reader.NodeType() == 10:
ret = reader.Read()
if ret != 1: if ret != 1:
print "Error reading to root" print "Error reading to root"
sys.exit(1) sys.exit(1)

View File

@ -15,6 +15,8 @@
* - provide an API to expand part of the tree * - provide an API to expand part of the tree
* - provide an API to preserve part of the tree * - provide an API to preserve part of the tree
* - Streaming XInclude support * - Streaming XInclude support
* - validation against a provided DTD
* - XML Schemas validation
* - setting(s) for NoBlanks * - setting(s) for NoBlanks
* - performances and tuning ... * - performances and tuning ...
*/ */
@ -99,7 +101,6 @@ struct _xmlTextReader {
xmlNodePtr curnode;/* current attribute node */ xmlNodePtr curnode;/* current attribute node */
int depth; /* depth of the current node */ int depth; /* depth of the current node */
xmlNodePtr faketext;/* fake xmlNs chld */ xmlNodePtr faketext;/* fake xmlNs chld */
int wasempty;/* was the last node empty */
/* entity stack when traversing entities content */ /* entity stack when traversing entities content */
xmlNodePtr ent; /* Current Entity Ref Node */ xmlNodePtr ent; /* Current Entity Ref Node */
@ -108,6 +109,8 @@ struct _xmlTextReader {
xmlNodePtr *entTab; /* array of entities */ xmlNodePtr *entTab; /* array of entities */
}; };
static const char *xmlTextReaderIsEmpty = "This element is empty";
#ifdef DEBUG_READER #ifdef DEBUG_READER
static void static void
xmlTextReaderDebug(xmlTextReaderPtr reader) { xmlTextReaderDebug(xmlTextReaderPtr reader) {
@ -222,13 +225,10 @@ xmlTextReaderStartElement(void *ctx, const xmlChar *fullname,
*/ */
origctxt = reader->ctxt; origctxt = reader->ctxt;
reader->startElement(ctx, fullname, atts); reader->startElement(ctx, fullname, atts);
#if 0 if ((ctxt->node != NULL) && (ctxt->input != NULL) &&
123 (ctxt->input->cur != NULL) && (ctxt->input->cur[0] == '/') &&
if (origctxt->validate) { (ctxt->input->cur[1] == '>'))
ctxt->valid &= xmlValidatePushElement(&origctxt->vctxt, ctxt->node->_private = (void *) xmlTextReaderIsEmpty;
ctxt->myDoc, ctxt->node, fullname);
}
#endif
} }
if (reader != NULL) if (reader != NULL)
reader->state = XML_TEXTREADER_ELEMENT; reader->state = XML_TEXTREADER_ELEMENT;
@ -258,12 +258,6 @@ xmlTextReaderEndElement(void *ctx, const xmlChar *fullname) {
reader->endElement(ctx, fullname); reader->endElement(ctx, fullname);
} }
if (reader != NULL) {
if (reader->state == XML_TEXTREADER_ELEMENT)
reader->wasempty = 1;
else
reader->wasempty = 0;
}
} }
/** /**
@ -290,13 +284,6 @@ xmlTextReaderCharacters(void *ctx, const xmlChar *ch, int len)
* when processing an entity, the context may have been changed * when processing an entity, the context may have been changed
*/ */
origctxt = reader->ctxt; origctxt = reader->ctxt;
#if 0
123
if (origctxt->validate) {
ctxt->valid &= xmlValidatePushCData(&origctxt->vctxt, ch, len);
}
#endif
} }
} }
@ -319,10 +306,6 @@ xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len)
#endif #endif
if ((reader != NULL) && (reader->cdataBlock != NULL)) { if ((reader != NULL) && (reader->cdataBlock != NULL)) {
reader->cdataBlock(ctx, ch, len); reader->cdataBlock(ctx, ch, len);
if (ctxt->validate) {
ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len);
}
} }
} }
@ -365,7 +348,11 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
} else } else
break; break;
} }
if (inbuf->content[cur] == '>') { /*
* parse by block of 512 bytes
*/
if ((cur >= reader->cur + 512) || (cur >= inbuf->use)) {
if (cur < inbuf->use)
cur = cur + 1; cur = cur + 1;
val = xmlParseChunk(reader->ctxt, val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur], (const char *) &inbuf->content[reader->cur],
@ -412,7 +399,9 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
if (reader->mode == XML_TEXTREADER_MODE_EOF) { if (reader->mode == XML_TEXTREADER_MODE_EOF) {
if (reader->mode != XML_TEXTREADER_DONE) { if (reader->mode != XML_TEXTREADER_DONE) {
val = xmlParseChunk(reader->ctxt, val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur], 0, 1); (const char *) &inbuf->content[reader->cur],
cur - reader->cur, 1);
reader->cur = cur;
reader->mode = XML_TEXTREADER_DONE; reader->mode = XML_TEXTREADER_DONE;
} }
} }
@ -564,7 +553,7 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) {
*/ */
int int
xmlTextReaderRead(xmlTextReaderPtr reader) { xmlTextReaderRead(xmlTextReaderPtr reader) {
int val, olddepth = 0, wasempty = 0; int val, olddepth = 0;
xmlTextReaderState oldstate = 0; xmlTextReaderState oldstate = 0;
xmlNodePtr oldnode = NULL; xmlNodePtr oldnode = NULL;
@ -588,14 +577,11 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
if (val < 0) if (val < 0)
return(-1); return(-1);
} while ((reader->ctxt->node == NULL) && } while ((reader->ctxt->node == NULL) &&
(reader->mode != XML_TEXTREADER_MODE_EOF)); ((reader->mode != XML_TEXTREADER_MODE_EOF) &&
(reader->mode != XML_TEXTREADER_DONE)));
if (reader->ctxt->node == NULL) { if (reader->ctxt->node == NULL) {
if (reader->ctxt->myDoc != NULL) { if (reader->ctxt->myDoc != NULL) {
reader->node = reader->ctxt->myDoc->children; reader->node = reader->ctxt->myDoc->children;
if ((reader->ctxt->input != NULL) &&
(reader->ctxt->input->cur != NULL) &&
(reader->ctxt->input->cur[-2] != '/'))
reader->wasempty = -1;
} }
if (reader->node == NULL) if (reader->node == NULL)
return(-1); return(-1);
@ -610,16 +596,6 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
oldstate = reader->state; oldstate = reader->state;
olddepth = reader->ctxt->nodeNr; olddepth = reader->ctxt->nodeNr;
oldnode = reader->node; oldnode = reader->node;
/*
* the <p></p> vs. <p/> distinction at the API level royally sucks,
* Microsoft priviledge ...
*/
if (reader->wasempty == -1)
wasempty = 0;
else
wasempty = (((reader->wasempty == 1) && (reader->ctxt->node != NULL) &&
(reader->ctxt->node->last == reader->node)) ||
(reader->node != reader->ctxt->node));
get_next_node: get_next_node:
/* /*
@ -653,7 +629,8 @@ get_next_node:
if (reader->node->next != NULL) { if (reader->node->next != NULL) {
if ((oldstate == XML_TEXTREADER_ELEMENT) && if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->node->type == XML_ELEMENT_NODE) && (reader->node->type == XML_ELEMENT_NODE) &&
(wasempty == 0)) { (reader->node->children == NULL) &&
(reader->node->_private != (void *)xmlTextReaderIsEmpty)) {
reader->state = XML_TEXTREADER_END; reader->state = XML_TEXTREADER_END;
goto node_found; goto node_found;
} }
@ -675,7 +652,8 @@ get_next_node:
} }
if ((oldstate == XML_TEXTREADER_ELEMENT) && if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->node->type == XML_ELEMENT_NODE) && (reader->node->type == XML_ELEMENT_NODE) &&
(wasempty == 0)) { (reader->node->children == NULL) &&
(reader->node->_private != (void *)xmlTextReaderIsEmpty)) {
reader->state = XML_TEXTREADER_END; reader->state = XML_TEXTREADER_END;
goto node_found; goto node_found;
} }
@ -1733,15 +1711,7 @@ xmlTextReaderIsEmptyElement(xmlTextReaderPtr reader) {
return(0); return(0);
if (reader->state == XML_TEXTREADER_END) if (reader->state == XML_TEXTREADER_END)
return(0); return(0);
if (reader->wasempty == -1) return(reader->node->_private == (void *)xmlTextReaderIsEmpty);
return(0);
if (reader->node != reader->ctxt->node)
return(1);
if ((reader->ctxt->node != NULL) &&
(reader->node == reader->ctxt->node->last) &&
(reader->wasempty == 1))
return(1);
return(0);
} }
/** /**