1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

a lot of performance work especially the speed of streaming through the

* globals.c libxml.h parser.c parserInternals.c tree.c xmllint.c
  xmlreader.c include/libxml/parser.h: a lot of performance work
  especially the speed of streaming through the reader and push
  interface. Some thread related optimizations. Nearly doubled the
  speed of parsing through the reader.
Daniel
This commit is contained in:
Daniel Veillard
2003-04-21 21:36:41 +00:00
parent 83c8a5cd31
commit a880b12475
10 changed files with 477 additions and 405 deletions

View File

@ -1,3 +1,11 @@
Mon Apr 21 23:33:38 CEST 2003 Daniel Veillard <daniel@veillard.com>
* globals.c libxml.h parser.c parserInternals.c tree.c xmllint.c
xmlreader.c include/libxml/parser.h: a lot of performance work
especially the speed of streaming through the reader and push
interface. Some thread related optimizations. Nearly doubled the
speed of parsing through the reader.
Sun Apr 20 10:36:05 MDT 2003 John Fleck <jfleck@inkstain.net> Sun Apr 20 10:36:05 MDT 2003 John Fleck <jfleck@inkstain.net>
* doc/xmllint.xml * doc/xmllint.xml

View File

@ -488,6 +488,7 @@ xmlRegisterNodeDefault(xmlRegisterNodeFunc func)
{ {
xmlRegisterNodeFunc old = xmlRegisterNodeDefaultValue; xmlRegisterNodeFunc old = xmlRegisterNodeDefaultValue;
__xmlRegisterCallbacks = 1;
xmlRegisterNodeDefaultValue = func; xmlRegisterNodeDefaultValue = func;
return(old); return(old);
} }
@ -505,6 +506,7 @@ xmlDeregisterNodeDefault(xmlDeregisterNodeFunc func)
{ {
xmlDeregisterNodeFunc old = xmlDeregisterNodeDefaultValue; xmlDeregisterNodeFunc old = xmlDeregisterNodeDefaultValue;
__xmlRegisterCallbacks = 1;
xmlDeregisterNodeDefaultValue = func; xmlDeregisterNodeDefaultValue = func;
return(old); return(old);
} }

View File

@ -233,6 +233,7 @@ struct _xmlParserCtxt {
int linenumbers; /* set line number in element content */ int linenumbers; /* set line number in element content */
void *catalogs; /* document's own catalog */ void *catalogs; /* document's own catalog */
int recovery; /* run in recovery mode */ int recovery; /* run in recovery mode */
int progressive; /* is this a progressive parsing */
}; };
/** /**

View File

@ -40,4 +40,10 @@
#include "trio.h" #include "trio.h"
#endif #endif
/*
* Internal variable indicating if a callback has been registered for
* node creation/destruction. It avoids spending a lot of time in locking
* function while checking if the callback exists.
*/
extern int __xmlRegisterCallbacks;
#endif /* ! __XML_LIBXML_H__ */ #endif /* ! __XML_LIBXML_H__ */

698
parser.c
View File

@ -368,7 +368,8 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
xmlPopInput(ctxt); \ xmlPopInput(ctxt); \
} while (0) } while (0)
#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \ #define SHRINK if ((ctxt->progressive == 0) && \
(ctxt->input->cur - ctxt->input->base > INPUT_CHUNK))\
xmlSHRINK (ctxt); xmlSHRINK (ctxt);
static void xmlSHRINK (xmlParserCtxtPtr ctxt) { static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
@ -378,7 +379,8 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
xmlPopInput(ctxt); xmlPopInput(ctxt);
} }
#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ #define GROW if ((ctxt->progressive == 0) && \
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
xmlGROW (ctxt); xmlGROW (ctxt);
static void xmlGROW (xmlParserCtxtPtr ctxt) { static void xmlGROW (xmlParserCtxtPtr ctxt) {
@ -386,7 +388,7 @@ static void xmlGROW (xmlParserCtxtPtr ctxt) {
if ((*ctxt->input->cur == 0) && if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
xmlPopInput(ctxt); xmlPopInput(ctxt);
} }
#define SKIP_BLANKS xmlSkipBlankChars(ctxt) #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
@ -8190,6 +8192,55 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
return(-1); return(-1);
} }
/**
* xmlParseGetLasts:
* @ctxt: an XML parser context
* @lastlt: pointer to store the last '<' from the input
* @lastgt: pointer to store the last '>' from the input
*
* Lookup the last < and > in the current chunk
*/
static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
const xmlChar **lastgt) {
const xmlChar *tmp;
if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
xmlGenericError(xmlGenericErrorContext,
"Internal error: xmlParseGetLasts\n");
return;
}
if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
tmp = ctxt->input->end;
tmp--;
while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
(*tmp != '>')) tmp--;
if (tmp < ctxt->input->base) {
*lastlt = NULL;
*lastgt = NULL;
} else if (*tmp == '<') {
*lastlt = tmp;
tmp--;
while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
if (tmp < ctxt->input->base)
*lastgt = NULL;
else
*lastgt = tmp;
} else {
*lastgt = tmp;
tmp--;
while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
if (tmp < ctxt->input->base)
*lastlt = NULL;
else
*lastlt = tmp;
}
} else {
*lastlt = NULL;
*lastgt = NULL;
}
}
/** /**
* xmlParseTryOrFinish: * xmlParseTryOrFinish:
* @ctxt: an XML parser context * @ctxt: an XML parser context
@ -8204,6 +8255,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int ret = 0; int ret = 0;
int avail; int avail;
xmlChar cur, next; xmlChar cur, next;
const xmlChar *lastlt, *lastgt;
#ifdef DEBUG_PUSH #ifdef DEBUG_PUSH
switch (ctxt->instate) { switch (ctxt->instate) {
@ -8258,9 +8310,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
} }
#endif #endif
while (1) { if (ctxt->input->cur - ctxt->input->base > 4096) {
SHRINK; xmlSHRINK(ctxt);
ctxt->checkIndex = 0;
}
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
while (1) {
/* /*
* Pop-up of finished entities. * Pop-up of finished entities.
*/ */
@ -8269,7 +8325,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input ==NULL) break; if (ctxt->input ==NULL) break;
if (ctxt->input->buf == NULL) if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else { else {
/* /*
* If we are operating on converted input, try to flush * If we are operating on converted input, try to flush
@ -8412,12 +8469,317 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
#endif #endif
} }
break; break;
case XML_PARSER_START_TAG: {
xmlChar *name, *oldname;
if ((avail < 2) && (ctxt->inputNr == 1))
goto done;
cur = ctxt->input->cur[0];
if (cur != '<') {
ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Start tag expect, '<' not found\n");
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_EOF;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EOF\n");
#endif
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
goto done;
}
if (!terminate) {
if (ctxt->progressive) {
if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
goto done;
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
goto done;
}
}
if (ctxt->spaceNr == 0)
spacePush(ctxt, -1);
else
spacePush(ctxt, *ctxt->space);
name = xmlParseStartTag(ctxt);
if (name == NULL) {
spacePop(ctxt);
ctxt->instate = XML_PARSER_EOF;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EOF\n");
#endif
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
goto done;
}
namePush(ctxt, name);
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match
* the element type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
/*
* Check for an Empty Element.
*/
if ((RAW == '/') && (NXT(1) == '>')) {
SKIP(2);
if ((ctxt->sax != NULL) &&
(ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
ctxt->sax->endElement(ctxt->userData, name);
oldname = namePop(ctxt);
spacePop(ctxt);
if (oldname != NULL) {
#ifdef DEBUG_STACK
xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
#endif
xmlFree(oldname);
}
if (ctxt->name == NULL) {
ctxt->instate = XML_PARSER_EPILOG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EPILOG\n");
#endif
} else {
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
}
break;
}
if (RAW == '>') {
NEXT;
} else {
ctxt->errNo = XML_ERR_GT_REQUIRED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Couldn't find end of Start Tag %s\n",
name);
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
/*
* end of parsing of this node.
*/
nodePop(ctxt);
oldname = namePop(ctxt);
spacePop(ctxt);
if (oldname != NULL) {
#ifdef DEBUG_STACK
xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
#endif
xmlFree(oldname);
}
}
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
break;
}
case XML_PARSER_CONTENT: {
const xmlChar *test;
unsigned int cons;
if ((avail < 2) && (ctxt->inputNr == 1))
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
test = CUR_PTR;
cons = ctxt->input->consumed;
if ((cur == '<') && (next == '/')) {
ctxt->instate = XML_PARSER_END_TAG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering END_TAG\n");
#endif
break;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
} else if ((cur == '<') && (next != '!')) {
ctxt->instate = XML_PARSER_START_TAG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering START_TAG\n");
#endif
break;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
(ctxt->input->cur[2] == '[') &&
(ctxt->input->cur[3] == 'C') &&
(ctxt->input->cur[4] == 'D') &&
(ctxt->input->cur[5] == 'A') &&
(ctxt->input->cur[6] == 'T') &&
(ctxt->input->cur[7] == 'A') &&
(ctxt->input->cur[8] == '[')) {
SKIP(9);
ctxt->instate = XML_PARSER_CDATA_SECTION;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CDATA_SECTION\n");
#endif
break;
} else if ((cur == '<') && (next == '!') &&
(avail < 9)) {
goto done;
} else if (cur == '&') {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Reference\n");
#endif
xmlParseReference(ctxt);
} else {
/* TODO Avoid the extra copy, handle directly !!! */
/*
* Goal of the following test is:
* - minimize calls to the SAX 'character' callback
* when they are mergeable
* - handle an problem for isBlank when we only parse
* a sequence of blank chars and the next one is
* not available to check against '<' presence.
* - tries to homogenize the differences in SAX
* callbacks between the push and pull versions
* of the parser.
*/
if ((ctxt->inputNr == 1) &&
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
if (!terminate) {
if (ctxt->progressive) {
if ((lastlt == NULL) ||
(ctxt->input->cur > lastlt))
goto done;
} else if (xmlParseLookupSequence(ctxt,
'<', 0, 0) < 0) {
goto done;
}
}
}
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing char data\n");
#endif
xmlParseCharData(ctxt, 0);
}
/*
* Pop-up of finished entities.
*/
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"detected an error in element content\n");
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_EOF;
break;
}
break;
}
case XML_PARSER_END_TAG:
if (avail < 2)
goto done;
if (!terminate) {
if (ctxt->progressive) {
if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
goto done;
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
goto done;
}
}
xmlParseEndTag(ctxt);
if (ctxt->name == NULL) {
ctxt->instate = XML_PARSER_EPILOG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EPILOG\n");
#endif
} else {
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
}
break;
case XML_PARSER_CDATA_SECTION: {
/*
* The Push mode need to have the SAX callback for
* cdataBlock merge back contiguous callbacks.
*/
int base;
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
if (base < 0) {
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (ctxt->sax->cdataBlock != NULL)
ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
XML_PARSER_BIG_BUFFER_SIZE);
}
SKIP(XML_PARSER_BIG_BUFFER_SIZE);
ctxt->checkIndex = 0;
}
goto done;
} else {
if ((ctxt->sax != NULL) && (base > 0) &&
(!ctxt->disableSAX)) {
if (ctxt->sax->cdataBlock != NULL)
ctxt->sax->cdataBlock(ctxt->userData,
ctxt->input->cur, base);
}
SKIP(base + 3);
ctxt->checkIndex = 0;
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
}
break;
}
case XML_PARSER_MISC: case XML_PARSER_MISC:
SKIP_BLANKS; SKIP_BLANKS;
if (ctxt->input->buf == NULL) if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else else
avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); avail = ctxt->input->buf->buffer->use -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2) if (avail < 2)
goto done; goto done;
cur = ctxt->input->cur[0]; cur = ctxt->input->cur[0];
@ -8432,7 +8794,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
#endif #endif
xmlParsePI(ctxt); xmlParsePI(ctxt);
} else if ((cur == '<') && (next == '!') && } else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { (ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
if ((!terminate) && if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
goto done; goto done;
@ -8443,9 +8806,12 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
xmlParseComment(ctxt); xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_MISC; ctxt->instate = XML_PARSER_MISC;
} else if ((cur == '<') && (next == '!') && } else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && (ctxt->input->cur[2] == 'D') &&
(ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && (ctxt->input->cur[3] == 'O') &&
(ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && (ctxt->input->cur[4] == 'C') &&
(ctxt->input->cur[5] == 'T') &&
(ctxt->input->cur[6] == 'Y') &&
(ctxt->input->cur[7] == 'P') &&
(ctxt->input->cur[8] == 'E')) { (ctxt->input->cur[8] == 'E')) {
if ((!terminate) && if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
@ -8484,21 +8850,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done; goto done;
} else { } else {
ctxt->instate = XML_PARSER_START_TAG; ctxt->instate = XML_PARSER_START_TAG;
ctxt->progressive = 1;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH #ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"PP: entering START_TAG\n"); "PP: entering START_TAG\n");
#endif #endif
} }
break; break;
case XML_PARSER_IGNORE:
xmlGenericError(xmlGenericErrorContext,
"PP: internal error, state == IGNORE");
ctxt->instate = XML_PARSER_DTD;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering DTD\n");
#endif
break;
case XML_PARSER_PROLOG: case XML_PARSER_PROLOG:
SKIP_BLANKS; SKIP_BLANKS;
if (ctxt->input->buf == NULL) if (ctxt->input->buf == NULL)
@ -8534,6 +8893,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done; goto done;
} else { } else {
ctxt->instate = XML_PARSER_START_TAG; ctxt->instate = XML_PARSER_START_TAG;
ctxt->progressive = 1;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH #ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"PP: entering START_TAG\n"); "PP: entering START_TAG\n");
@ -8591,290 +8952,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done; goto done;
} }
break; break;
case XML_PARSER_START_TAG: {
xmlChar *name, *oldname;
if ((avail < 2) && (ctxt->inputNr == 1))
goto done;
cur = ctxt->input->cur[0];
if (cur != '<') {
ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Start tag expect, '<' not found\n");
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_EOF;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EOF\n");
#endif
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
goto done;
}
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
goto done;
if (ctxt->spaceNr == 0)
spacePush(ctxt, -1);
else
spacePush(ctxt, *ctxt->space);
name = xmlParseStartTag(ctxt);
if (name == NULL) {
spacePop(ctxt);
ctxt->instate = XML_PARSER_EOF;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EOF\n");
#endif
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
goto done;
}
namePush(ctxt, xmlStrdup(name));
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match
* the element type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
/*
* Check for an Empty Element.
*/
if ((RAW == '/') && (NXT(1) == '>')) {
SKIP(2);
if ((ctxt->sax != NULL) &&
(ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
ctxt->sax->endElement(ctxt->userData, name);
xmlFree(name);
oldname = namePop(ctxt);
spacePop(ctxt);
if (oldname != NULL) {
#ifdef DEBUG_STACK
xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
#endif
xmlFree(oldname);
}
if (ctxt->name == NULL) {
ctxt->instate = XML_PARSER_EPILOG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EPILOG\n");
#endif
} else {
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
}
break;
}
if (RAW == '>') {
NEXT;
} else {
ctxt->errNo = XML_ERR_GT_REQUIRED;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Couldn't find end of Start Tag %s\n",
name);
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
/*
* end of parsing of this node.
*/
nodePop(ctxt);
oldname = namePop(ctxt);
spacePop(ctxt);
if (oldname != NULL) {
#ifdef DEBUG_STACK
xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
#endif
xmlFree(oldname);
}
}
xmlFree(name);
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
break;
}
case XML_PARSER_CONTENT: {
const xmlChar *test;
unsigned int cons;
if ((avail < 2) && (ctxt->inputNr == 1))
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
test = CUR_PTR;
cons = ctxt->input->consumed;
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
(ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
(ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
(ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
(ctxt->input->cur[8] == '[')) {
SKIP(9);
ctxt->instate = XML_PARSER_CDATA_SECTION;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CDATA_SECTION\n");
#endif
break;
} else if ((cur == '<') && (next == '!') &&
(avail < 9)) {
goto done;
} else if ((cur == '<') && (next == '/')) {
ctxt->instate = XML_PARSER_END_TAG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering END_TAG\n");
#endif
break;
} else if (cur == '<') {
ctxt->instate = XML_PARSER_START_TAG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering START_TAG\n");
#endif
break;
} else if (cur == '&') {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Reference\n");
#endif
xmlParseReference(ctxt);
} else {
/* TODO Avoid the extra copy, handle directly !!! */
/*
* Goal of the following test is:
* - minimize calls to the SAX 'character' callback
* when they are mergeable
* - handle an problem for isBlank when we only parse
* a sequence of blank chars and the next one is
* not available to check against '<' presence.
* - tries to homogenize the differences in SAX
* callbacks between the push and pull versions
* of the parser.
*/
if ((ctxt->inputNr == 1) &&
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
goto done;
}
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing char data\n");
#endif
xmlParseCharData(ctxt, 0);
}
/*
* Pop-up of finished entities.
*/
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"detected an error in element content\n");
ctxt->wellFormed = 0;
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_EOF;
break;
}
break;
}
case XML_PARSER_CDATA_SECTION: {
/*
* The Push mode need to have the SAX callback for
* cdataBlock merge back contiguous callbacks.
*/
int base;
base = xmlParseLookupSequence(ctxt, ']', ']', '>');
if (base < 0) {
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (ctxt->sax->cdataBlock != NULL)
ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
XML_PARSER_BIG_BUFFER_SIZE);
}
SKIP(XML_PARSER_BIG_BUFFER_SIZE);
ctxt->checkIndex = 0;
}
goto done;
} else {
if ((ctxt->sax != NULL) && (base > 0) &&
(!ctxt->disableSAX)) {
if (ctxt->sax->cdataBlock != NULL)
ctxt->sax->cdataBlock(ctxt->userData,
ctxt->input->cur, base);
}
SKIP(base + 3);
ctxt->checkIndex = 0;
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
}
break;
}
case XML_PARSER_END_TAG:
if (avail < 2)
goto done;
if ((!terminate) &&
(xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
goto done;
xmlParseEndTag(ctxt);
if (ctxt->name == NULL) {
ctxt->instate = XML_PARSER_EPILOG;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering EPILOG\n");
#endif
} else {
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering CONTENT\n");
#endif
}
break;
case XML_PARSER_DTD: { case XML_PARSER_DTD: {
/* /*
* Sorry but progressive parsing of the internal subset * Sorry but progressive parsing of the internal subset
@ -8965,6 +9042,15 @@ found_end_int_subset:
"PP: entering CONTENT\n"); "PP: entering CONTENT\n");
#endif #endif
break; break;
case XML_PARSER_IGNORE:
xmlGenericError(xmlGenericErrorContext,
"PP: internal error, state == IGNORE");
ctxt->instate = XML_PARSER_DTD;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering DTD\n");
#endif
break;
case XML_PARSER_PI: case XML_PARSER_PI:
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"PP: internal error, state == PI\n"); "PP: internal error, state == PI\n");
@ -9056,8 +9142,10 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif #endif
#if 0
if ((terminate) || (ctxt->input->buf->buffer->use > 80)) if ((terminate) || (ctxt->input->buf->buffer->use > 80))
xmlParseTryOrFinish(ctxt, terminate); xmlParseTryOrFinish(ctxt, terminate);
#endif
} else if (ctxt->instate != XML_PARSER_EOF) { } else if (ctxt->instate != XML_PARSER_EOF) {
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
xmlParserInputBufferPtr in = ctxt->input->buf; xmlParserInputBufferPtr in = ctxt->input->buf;

View File

@ -1051,8 +1051,10 @@ xmlParserInputShrink(xmlParserInputPtr in) {
* Do not shrink on large buffers whose only a tiny fraction * Do not shrink on large buffers whose only a tiny fraction
* was consumed * was consumed
*/ */
#if 0
if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK) if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK)
return; return;
#endif
if (used > INPUT_CHUNK) { if (used > INPUT_CHUNK) {
ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
if (ret > 0) { if (ret > 0) {

View File

@ -16,7 +16,7 @@ expect="""../../test/valid/rss.xml:172: validity error: Element rss does not car
</rss> </rss>
^ ^
../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined ../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined
<p><termdef id="dt-arc" term="Arc">An <term>arc</term> is contained within an <p><termdef id="dt-arc" term="Arc">An <ter
^ ^
../../test/valid/xlink.xml:530: validity error: attribute def line 199 references an unknown ID "dt-xlg" ../../test/valid/xlink.xml:530: validity error: attribute def line 199 references an unknown ID "dt-xlg"

48
tree.c
View File

@ -38,6 +38,8 @@
#include <libxml/HTMLtree.h> #include <libxml/HTMLtree.h>
#endif #endif
int __xmlRegisterCallbacks = 0;
xmlNsPtr xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns); xmlNsPtr xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns);
/************************************************************************ /************************************************************************
@ -744,7 +746,7 @@ xmlNewDtd(xmlDocPtr doc, const xmlChar *name,
doc->extSubset = cur; doc->extSubset = cur;
cur->doc = doc; cur->doc = doc;
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -854,7 +856,7 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name,
} }
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -875,7 +877,7 @@ xmlFreeDtd(xmlDtdPtr cur) {
return; return;
} }
if (xmlDeregisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr)cur); xmlDeregisterNodeDefaultValue((xmlNodePtr)cur);
if (cur->children != NULL) { if (cur->children != NULL) {
@ -946,7 +948,7 @@ xmlNewDoc(const xmlChar *version) {
cur->doc = cur; cur->doc = cur;
cur->charset = XML_CHAR_ENCODING_UTF8; cur->charset = XML_CHAR_ENCODING_UTF8;
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -969,7 +971,7 @@ xmlFreeDoc(xmlDocPtr cur) {
return; return;
} }
if (xmlDeregisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr)cur); xmlDeregisterNodeDefaultValue((xmlNodePtr)cur);
/* /*
@ -1552,7 +1554,7 @@ xmlNewProp(xmlNodePtr node, const xmlChar *name, const xmlChar *value) {
} }
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -1632,7 +1634,7 @@ xmlNewNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name,
} }
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -1712,7 +1714,7 @@ xmlNewNsPropEatName(xmlNodePtr node, xmlNsPtr ns, xmlChar *name,
} }
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -1767,7 +1769,7 @@ xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value) {
} }
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -1811,7 +1813,7 @@ xmlFreeProp(xmlAttrPtr cur) {
return; return;
} }
if (xmlDeregisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue((xmlNodePtr)cur); xmlDeregisterNodeDefaultValue((xmlNodePtr)cur);
/* Check for ID removal -> leading to invalid references ! */ /* Check for ID removal -> leading to invalid references ! */
@ -1912,7 +1914,7 @@ xmlNewPI(const xmlChar *name, const xmlChar *content) {
cur->content = xmlStrdup(content); cur->content = xmlStrdup(content);
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -1953,7 +1955,7 @@ xmlNewNode(xmlNsPtr ns, const xmlChar *name) {
cur->name = xmlStrdup(name); cur->name = xmlStrdup(name);
cur->ns = ns; cur->ns = ns;
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -1994,7 +1996,7 @@ xmlNewNodeEatName(xmlNsPtr ns, xmlChar *name) {
cur->name = name; cur->name = name;
cur->ns = ns; cur->ns = ns;
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)cur); xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
return(cur); return(cur);
} }
@ -2118,7 +2120,7 @@ xmlNewDocFragment(xmlDocPtr doc) {
cur->doc = doc; cur->doc = doc;
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -2151,7 +2153,7 @@ xmlNewText(const xmlChar *content) {
cur->content = xmlStrdup(content); cur->content = xmlStrdup(content);
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -2254,7 +2256,7 @@ xmlNewCharRef(xmlDocPtr doc, const xmlChar *name) {
} else } else
cur->name = xmlStrdup(name); cur->name = xmlStrdup(name);
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -2308,7 +2310,7 @@ xmlNewReference(xmlDocPtr doc, const xmlChar *name) {
cur->last = (xmlNodePtr) ent; cur->last = (xmlNodePtr) ent;
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -2359,7 +2361,7 @@ xmlNewTextLen(const xmlChar *content, int len) {
cur->content = xmlStrndup(content, len); cur->content = xmlStrndup(content, len);
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -2411,7 +2413,7 @@ xmlNewComment(const xmlChar *content) {
cur->content = xmlStrdup(content); cur->content = xmlStrdup(content);
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -2446,7 +2448,7 @@ xmlNewCDataBlock(xmlDocPtr doc, const xmlChar *content, int len) {
cur->content = xmlStrndup(content, len); cur->content = xmlStrndup(content, len);
} }
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue(cur); xmlRegisterNodeDefaultValue(cur);
return(cur); return(cur);
} }
@ -3076,7 +3078,7 @@ xmlFreeNodeList(xmlNodePtr cur) {
/* unroll to speed up freeing the document */ /* unroll to speed up freeing the document */
if (cur->type != XML_DTD_NODE) { if (cur->type != XML_DTD_NODE) {
if (xmlDeregisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue(cur); xmlDeregisterNodeDefaultValue(cur);
if ((cur->children != NULL) && if ((cur->children != NULL) &&
@ -3161,7 +3163,7 @@ xmlFreeNode(xmlNodePtr cur) {
return; return;
} }
if (xmlDeregisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue))
xmlDeregisterNodeDefaultValue(cur); xmlDeregisterNodeDefaultValue(cur);
if ((cur->children != NULL) && if ((cur->children != NULL) &&
@ -3630,7 +3632,7 @@ xmlStaticCopyNode(const xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent,
* in case ret does get coalesced in xmlAddChild * in case ret does get coalesced in xmlAddChild
* the deregister-node callback is called; so we register ret now already * the deregister-node callback is called; so we register ret now already
*/ */
if (xmlRegisterNodeDefaultValue) if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
xmlRegisterNodeDefaultValue((xmlNodePtr)ret); xmlRegisterNodeDefaultValue((xmlNodePtr)ret);
tmp = xmlAddChild(parent, ret); tmp = xmlAddChild(parent, ret);

View File

@ -763,12 +763,11 @@ static void parseAndPrintFile(char *filename) {
} }
if (f != NULL) { if (f != NULL) {
int ret; int ret;
int res, size = 3; int res, size = 1024;
char chars[1024]; char chars[1024];
xmlParserCtxtPtr ctxt; xmlParserCtxtPtr ctxt;
if (repeat) /* if (repeat) size = 1024; */
size = 1024;
res = fread(chars, 1, 4, f); res = fread(chars, 1, 4, f);
if (res > 0) { if (res > 0) {
ctxt = xmlCreatePushParserCtxt(NULL, NULL, ctxt = xmlCreatePushParserCtxt(NULL, NULL,

View File

@ -56,6 +56,7 @@
#define DUMP_READER #define DUMP_READER
#endif #endif
#define CHUNK_SIZE 512
/************************************************************************ /************************************************************************
* * * *
* The parser: maps the Text Reader API on top of the existing * * The parser: maps the Text Reader API on top of the existing *
@ -340,9 +341,8 @@ xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len)
*/ */
static int static int
xmlTextReaderPushData(xmlTextReaderPtr reader) { xmlTextReaderPushData(xmlTextReaderPtr reader) {
unsigned int cur = reader->cur;
xmlBufferPtr inbuf; xmlBufferPtr inbuf;
int val; int val, s;
int oldstate; int oldstate;
if ((reader->input == NULL) || (reader->input->buffer == NULL)) if ((reader->input == NULL) || (reader->input->buffer == NULL))
@ -351,8 +351,9 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
oldstate = reader->state; oldstate = reader->state;
reader->state = XML_TEXTREADER_NONE; reader->state = XML_TEXTREADER_NONE;
inbuf = reader->input->buffer; inbuf = reader->input->buffer;
while (reader->state == XML_TEXTREADER_NONE) { while (reader->state == XML_TEXTREADER_NONE) {
if (cur >= inbuf->use) { if (inbuf->use < reader->cur + CHUNK_SIZE) {
/* /*
* Refill the buffer unless we are at the end of the stream * Refill the buffer unless we are at the end of the stream
*/ */
@ -365,47 +366,39 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
(reader->ctxt->myDoc != NULL)) (reader->ctxt->myDoc != NULL))
return(val); return(val);
} }
} else } else
break; break;
} }
/* /*
* parse by block of 512 bytes * parse by block of CHUNK_SIZE bytes, various tests show that
* it's the best tradeoff at least on a 1.2GH Duron
*/ */
if ((cur >= reader->cur + 512) || (cur >= inbuf->use)) { if (inbuf->use >= reader->cur + CHUNK_SIZE) {
if (cur < inbuf->use)
cur = cur + 1;
val = xmlParseChunk(reader->ctxt, val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur], (const char *) &inbuf->content[reader->cur],
cur - reader->cur, 0); CHUNK_SIZE, 0);
reader->cur += CHUNK_SIZE;
if (val != 0) if (val != 0)
return(-1); return(-1);
reader->cur = cur;
break;
} else { } else {
cur = cur + 1; s = inbuf->use - reader->cur;
val = xmlParseChunk(reader->ctxt,
/* (const char *) &inbuf->content[reader->cur],
* One may have to force a flush at some point when parsing really s, 0);
* large CDATA sections reader->cur += s;
*/ if (val != 0)
if ((cur - reader->cur > 4096) && (reader->base == 0) && return(-1);
(reader->mode == XML_TEXTREADER_MODE_INTERACTIVE)) { break;
cur = cur + 1;
val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur],
cur - reader->cur, 0);
if (val != 0)
return(-1);
reader->cur = cur;
}
} }
} }
/* /*
* Discard the consumed input when needed and possible * Discard the consumed input when needed and possible
*/ */
if (reader->mode == XML_TEXTREADER_MODE_INTERACTIVE) { if (reader->mode == XML_TEXTREADER_MODE_INTERACTIVE) {
if ((reader->cur >= 4096) && (reader->base == 0)) { if (reader->cur >= 4096) {
val = xmlBufferShrink(inbuf, cur); val = xmlBufferShrink(inbuf, reader->cur);
if (val >= 0) { if (val >= 0) {
reader->cur -= val; reader->cur -= val;
} }
@ -416,12 +409,13 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
* At the end of the stream signal that the work is done to the Push * At the end of the stream signal that the work is done to the Push
* parser. * parser.
*/ */
if (reader->mode == XML_TEXTREADER_MODE_EOF) { else if (reader->mode == XML_TEXTREADER_MODE_EOF) {
if (reader->mode != XML_TEXTREADER_DONE) { if (reader->mode != XML_TEXTREADER_DONE) {
s = inbuf->use - reader->cur;
val = xmlParseChunk(reader->ctxt, val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur], (const char *) &inbuf->content[reader->cur],
cur - reader->cur, 1); s, 1);
reader->cur = cur; reader->cur = inbuf->use;
reader->mode = XML_TEXTREADER_DONE; reader->mode = XML_TEXTREADER_DONE;
} }
} }
@ -767,17 +761,17 @@ get_next_node:
* that the parser didn't finished or that we arent at the end * that the parser didn't finished or that we arent at the end
* of stream, continue processing. * of stream, continue processing.
*/ */
while (((oldstate == XML_TEXTREADER_BACKTRACK) || while ((reader->node->next == NULL) &&
(reader->ctxt->nodeNr == olddepth) &&
((oldstate == XML_TEXTREADER_BACKTRACK) ||
(reader->node->children == NULL) || (reader->node->children == NULL) ||
(reader->node->type == XML_ENTITY_REF_NODE) || (reader->node->type == XML_ENTITY_REF_NODE) ||
(reader->node->type == XML_DTD_NODE) || (reader->node->type == XML_DTD_NODE) ||
(reader->node->type == XML_DOCUMENT_NODE) || (reader->node->type == XML_DOCUMENT_NODE) ||
(reader->node->type == XML_HTML_DOCUMENT_NODE)) && (reader->node->type == XML_HTML_DOCUMENT_NODE)) &&
(reader->node->next == NULL) &&
((reader->ctxt->node == NULL) || ((reader->ctxt->node == NULL) ||
(reader->ctxt->node == reader->node) || (reader->ctxt->node == reader->node) ||
(reader->ctxt->node == reader->node->parent)) && (reader->ctxt->node == reader->node->parent)) &&
(reader->ctxt->nodeNr == olddepth) &&
(reader->ctxt->instate != XML_PARSER_EOF)) { (reader->ctxt->instate != XML_PARSER_EOF)) {
val = xmlTextReaderPushData(reader); val = xmlTextReaderPushData(reader);
if (val < 0) if (val < 0)
@ -785,45 +779,6 @@ get_next_node:
if (reader->node == NULL) if (reader->node == NULL)
goto node_end; goto node_end;
} }
/*
* If we are in the middle of a piece of CDATA make sure it's finished
* Maybe calling a function checking that a non-character() callback was
* received would be cleaner for the loop exit.
*/
if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->ctxt->instate == XML_PARSER_CDATA_SECTION)) {
while ((reader->ctxt->instate == XML_PARSER_CDATA_SECTION) &&
(((reader->node->content == NULL) &&
(reader->node->next != NULL) &&
(reader->node->next->type == XML_CDATA_SECTION_NODE) &&
(reader->node->next->next == NULL) &&
(reader->node->parent->next == NULL)) ||
((reader->node->children != NULL) &&
(reader->node->children->type == XML_CDATA_SECTION_NODE) &&
(reader->node->children->next == NULL) &&
(reader->node->children->next == NULL)))) {
val = xmlTextReaderPushData(reader);
if (val < 0)
return(-1);
}
}
if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->ctxt->instate == XML_PARSER_CONTENT)) {
while ((reader->ctxt->instate == XML_PARSER_CONTENT) &&
(((reader->node->content == NULL) &&
(reader->node->next != NULL) &&
(reader->node->next->type == XML_TEXT_NODE) &&
(reader->node->next->next == NULL) &&
(reader->node->parent->next == NULL)) ||
((reader->node->children != NULL) &&
(reader->node->children->type == XML_TEXT_NODE) &&
(reader->node->children->next == NULL) &&
(reader->node->children->next == NULL)))) {
val = xmlTextReaderPushData(reader);
if (val < 0)
return(-1);
}
}
if (oldstate != XML_TEXTREADER_BACKTRACK) { if (oldstate != XML_TEXTREADER_BACKTRACK) {
if ((reader->node->children != NULL) && if ((reader->node->children != NULL) &&
(reader->node->type != XML_ENTITY_REF_NODE) && (reader->node->type != XML_ENTITY_REF_NODE) &&
@ -899,6 +854,15 @@ get_next_node:
node_found: node_found:
DUMP_READER DUMP_READER
/*
* If we are in the middle of a piece of CDATA make sure it's finished
*/
if ((reader->node != NULL) &&
((reader->node->type == XML_TEXT_NODE) ||
(reader->node->type == XML_CDATA_SECTION_NODE))) {
xmlTextReaderExpand(reader);
}
/* /*
* Handle entities enter and exit when in entity replacement mode * Handle entities enter and exit when in entity replacement mode
*/ */