From a880b124750afcce94ea3d947125d0f8a034e766 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Mon, 21 Apr 2003 21:36:41 +0000 Subject: [PATCH] a lot of performance work especially the speed of streaming through the * globals.c libxml.h parser.c parserInternals.c tree.c xmllint.c xmlreader.c include/libxml/parser.h: a lot of performance work especially the speed of streaming through the reader and push interface. Some thread related optimizations. Nearly doubled the speed of parsing through the reader. Daniel --- ChangeLog | 8 + globals.c | 2 + include/libxml/parser.h | 1 + libxml.h | 6 + parser.c | 698 ++++++++++++++++++++++------------------ parserInternals.c | 2 + python/tests/reader2.py | 2 +- tree.c | 48 +-- xmllint.c | 5 +- xmlreader.c | 110 +++---- 10 files changed, 477 insertions(+), 405 deletions(-) diff --git a/ChangeLog b/ChangeLog index b7008b36..f9482f03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Mon Apr 21 23:33:38 CEST 2003 Daniel Veillard + + * globals.c libxml.h parser.c parserInternals.c tree.c xmllint.c + xmlreader.c include/libxml/parser.h: a lot of performance work + especially the speed of streaming through the reader and push + interface. Some thread related optimizations. Nearly doubled the + speed of parsing through the reader. + Sun Apr 20 10:36:05 MDT 2003 John Fleck * doc/xmllint.xml diff --git a/globals.c b/globals.c index 380c94e6..0fc54e27 100644 --- a/globals.c +++ b/globals.c @@ -488,6 +488,7 @@ xmlRegisterNodeDefault(xmlRegisterNodeFunc func) { xmlRegisterNodeFunc old = xmlRegisterNodeDefaultValue; + __xmlRegisterCallbacks = 1; xmlRegisterNodeDefaultValue = func; return(old); } @@ -505,6 +506,7 @@ xmlDeregisterNodeDefault(xmlDeregisterNodeFunc func) { xmlDeregisterNodeFunc old = xmlDeregisterNodeDefaultValue; + __xmlRegisterCallbacks = 1; xmlDeregisterNodeDefaultValue = func; return(old); } diff --git a/include/libxml/parser.h b/include/libxml/parser.h index d623e73f..4578ad91 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -233,6 +233,7 @@ struct _xmlParserCtxt { int linenumbers; /* set line number in element content */ void *catalogs; /* document's own catalog */ int recovery; /* run in recovery mode */ + int progressive; /* is this a progressive parsing */ }; /** diff --git a/libxml.h b/libxml.h index 8b1b9495..fcf0f9e6 100644 --- a/libxml.h +++ b/libxml.h @@ -40,4 +40,10 @@ #include "trio.h" #endif +/* + * Internal variable indicating if a callback has been registered for + * node creation/destruction. It avoids spending a lot of time in locking + * function while checking if the callback exists. + */ +extern int __xmlRegisterCallbacks; #endif /* ! __XML_LIBXML_H__ */ diff --git a/parser.c b/parser.c index 2e8021e5..728b5bd0 100644 --- a/parser.c +++ b/parser.c @@ -368,7 +368,8 @@ static int spacePop(xmlParserCtxtPtr ctxt) { xmlPopInput(ctxt); \ } while (0) -#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \ +#define SHRINK if ((ctxt->progressive == 0) && \ + (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK))\ xmlSHRINK (ctxt); static void xmlSHRINK (xmlParserCtxtPtr ctxt) { @@ -378,7 +379,8 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) { xmlPopInput(ctxt); } -#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ +#define GROW if ((ctxt->progressive == 0) && \ + (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ xmlGROW (ctxt); static void xmlGROW (xmlParserCtxtPtr ctxt) { @@ -386,7 +388,7 @@ static void xmlGROW (xmlParserCtxtPtr ctxt) { if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) xmlPopInput(ctxt); - } +} #define SKIP_BLANKS xmlSkipBlankChars(ctxt) @@ -8190,6 +8192,55 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, return(-1); } +/** + * xmlParseGetLasts: + * @ctxt: an XML parser context + * @lastlt: pointer to store the last '<' from the input + * @lastgt: pointer to store the last '>' from the input + * + * Lookup the last < and > in the current chunk + */ +static void +xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, + const xmlChar **lastgt) { + const xmlChar *tmp; + + if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { + xmlGenericError(xmlGenericErrorContext, + "Internal error: xmlParseGetLasts\n"); + return; + } + if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) { + tmp = ctxt->input->end; + tmp--; + while ((tmp >= ctxt->input->base) && (*tmp != '<') && + (*tmp != '>')) tmp--; + if (tmp < ctxt->input->base) { + *lastlt = NULL; + *lastgt = NULL; + } else if (*tmp == '<') { + *lastlt = tmp; + tmp--; + while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; + if (tmp < ctxt->input->base) + *lastgt = NULL; + else + *lastgt = tmp; + } else { + *lastgt = tmp; + tmp--; + while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; + if (tmp < ctxt->input->base) + *lastlt = NULL; + else + *lastlt = tmp; + } + + } else { + *lastlt = NULL; + *lastgt = NULL; + } +} /** * xmlParseTryOrFinish: * @ctxt: an XML parser context @@ -8204,6 +8255,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { int ret = 0; int avail; xmlChar cur, next; + const xmlChar *lastlt, *lastgt; #ifdef DEBUG_PUSH switch (ctxt->instate) { @@ -8258,9 +8310,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { } #endif - while (1) { - SHRINK; + if (ctxt->input->cur - ctxt->input->base > 4096) { + xmlSHRINK(ctxt); + ctxt->checkIndex = 0; + } + xmlParseGetLasts(ctxt, &lastlt, &lastgt); + while (1) { /* * Pop-up of finished entities. */ @@ -8269,7 +8325,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (ctxt->input ==NULL) break; if (ctxt->input->buf == NULL) - avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); + avail = ctxt->input->length - + (ctxt->input->cur - ctxt->input->base); else { /* * If we are operating on converted input, try to flush @@ -8412,12 +8469,317 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { #endif } break; + case XML_PARSER_START_TAG: { + xmlChar *name, *oldname; + + if ((avail < 2) && (ctxt->inputNr == 1)) + goto done; + cur = ctxt->input->cur[0]; + if (cur != '<') { + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Start tag expect, '<' not found\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + if (!terminate) { + if (ctxt->progressive) { + if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) + goto done; + } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { + goto done; + } + } + if (ctxt->spaceNr == 0) + spacePush(ctxt, -1); + else + spacePush(ctxt, *ctxt->space); + name = xmlParseStartTag(ctxt); + if (name == NULL) { + spacePop(ctxt); + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + namePush(ctxt, name); + + /* + * [ VC: Root Element Type ] + * The Name in the document type declaration must match + * the element type of the root element. + */ + if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && + ctxt->node && (ctxt->node == ctxt->myDoc->children)) + ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); + + /* + * Check for an Empty Element. + */ + if ((RAW == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && + (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) + ctxt->sax->endElement(ctxt->userData, name); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + if (ctxt->name == NULL) { + ctxt->instate = XML_PARSER_EPILOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EPILOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + } + break; + } + if (RAW == '>') { + NEXT; + } else { + ctxt->errNo = XML_ERR_GT_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + + /* + * end of parsing of this node. + */ + nodePop(ctxt); + oldname = namePop(ctxt); + spacePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + break; + } + case XML_PARSER_CONTENT: { + const xmlChar *test; + unsigned int cons; + if ((avail < 2) && (ctxt->inputNr == 1)) + goto done; + cur = ctxt->input->cur[0]; + next = ctxt->input->cur[1]; + + test = CUR_PTR; + cons = ctxt->input->consumed; + if ((cur == '<') && (next == '/')) { + ctxt->instate = XML_PARSER_END_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering END_TAG\n"); +#endif + break; + } else if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next != '!')) { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering START_TAG\n"); +#endif + break; + } else if ((cur == '<') && (next == '!') && + (ctxt->input->cur[2] == '-') && + (ctxt->input->cur[3] == '-')) { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && + (ctxt->input->cur[2] == '[') && + (ctxt->input->cur[3] == 'C') && + (ctxt->input->cur[4] == 'D') && + (ctxt->input->cur[5] == 'A') && + (ctxt->input->cur[6] == 'T') && + (ctxt->input->cur[7] == 'A') && + (ctxt->input->cur[8] == '[')) { + SKIP(9); + ctxt->instate = XML_PARSER_CDATA_SECTION; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CDATA_SECTION\n"); +#endif + break; + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else if (cur == '&') { + if ((!terminate) && + (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing Reference\n"); +#endif + xmlParseReference(ctxt); + } else { + /* TODO Avoid the extra copy, handle directly !!! */ + /* + * Goal of the following test is: + * - minimize calls to the SAX 'character' callback + * when they are mergeable + * - handle an problem for isBlank when we only parse + * a sequence of blank chars and the next one is + * not available to check against '<' presence. + * - tries to homogenize the differences in SAX + * callbacks between the push and pull versions + * of the parser. + */ + if ((ctxt->inputNr == 1) && + (avail < XML_PARSER_BIG_BUFFER_SIZE)) { + if (!terminate) { + if (ctxt->progressive) { + if ((lastlt == NULL) || + (ctxt->input->cur > lastlt)) + goto done; + } else if (xmlParseLookupSequence(ctxt, + '<', 0, 0) < 0) { + goto done; + } + } + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: Parsing char data\n"); +#endif + xmlParseCharData(ctxt, 0); + } + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "detected an error in element content\n"); + ctxt->wellFormed = 0; + if (ctxt->recovery == 0) ctxt->disableSAX = 1; + ctxt->instate = XML_PARSER_EOF; + break; + } + break; + } + case XML_PARSER_END_TAG: + if (avail < 2) + goto done; + if (!terminate) { + if (ctxt->progressive) { + if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) + goto done; + } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { + goto done; + } + } + xmlParseEndTag(ctxt); + if (ctxt->name == NULL) { + ctxt->instate = XML_PARSER_EPILOG; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering EPILOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + } + break; + case XML_PARSER_CDATA_SECTION: { + /* + * The Push mode need to have the SAX callback for + * cdataBlock merge back contiguous callbacks. + */ + int base; + + base = xmlParseLookupSequence(ctxt, ']', ']', '>'); + if (base < 0) { + if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, + XML_PARSER_BIG_BUFFER_SIZE); + } + SKIP(XML_PARSER_BIG_BUFFER_SIZE); + ctxt->checkIndex = 0; + } + goto done; + } else { + if ((ctxt->sax != NULL) && (base > 0) && + (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, + ctxt->input->cur, base); + } + SKIP(base + 3); + ctxt->checkIndex = 0; + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering CONTENT\n"); +#endif + } + break; + } case XML_PARSER_MISC: SKIP_BLANKS; if (ctxt->input->buf == NULL) - avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); + avail = ctxt->input->length - + (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + avail = ctxt->input->buf->buffer->use - + (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; cur = ctxt->input->cur[0]; @@ -8432,7 +8794,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { #endif xmlParsePI(ctxt); } else if ((cur == '<') && (next == '!') && - (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { + (ctxt->input->cur[2] == '-') && + (ctxt->input->cur[3] == '-')) { if ((!terminate) && (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) goto done; @@ -8443,9 +8806,12 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { xmlParseComment(ctxt); ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '!') && - (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') && - (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') && - (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') && + (ctxt->input->cur[2] == 'D') && + (ctxt->input->cur[3] == 'O') && + (ctxt->input->cur[4] == 'C') && + (ctxt->input->cur[5] == 'T') && + (ctxt->input->cur[6] == 'Y') && + (ctxt->input->cur[7] == 'P') && (ctxt->input->cur[8] == 'E')) { if ((!terminate) && (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) @@ -8484,21 +8850,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; + ctxt->progressive = 1; + xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: entering START_TAG\n"); #endif } break; - case XML_PARSER_IGNORE: - xmlGenericError(xmlGenericErrorContext, - "PP: internal error, state == IGNORE"); - ctxt->instate = XML_PARSER_DTD; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering DTD\n"); -#endif - break; case XML_PARSER_PROLOG: SKIP_BLANKS; if (ctxt->input->buf == NULL) @@ -8534,6 +8893,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; + ctxt->progressive = 1; + xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: entering START_TAG\n"); @@ -8591,290 +8952,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } break; - case XML_PARSER_START_TAG: { - xmlChar *name, *oldname; - - if ((avail < 2) && (ctxt->inputNr == 1)) - goto done; - cur = ctxt->input->cur[0]; - if (cur != '<') { - ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Start tag expect, '<' not found\n"); - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) ctxt->disableSAX = 1; - ctxt->instate = XML_PARSER_EOF; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering EOF\n"); -#endif - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) - ctxt->sax->endDocument(ctxt->userData); - goto done; - } - if ((!terminate) && - (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) - goto done; - if (ctxt->spaceNr == 0) - spacePush(ctxt, -1); - else - spacePush(ctxt, *ctxt->space); - name = xmlParseStartTag(ctxt); - if (name == NULL) { - spacePop(ctxt); - ctxt->instate = XML_PARSER_EOF; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering EOF\n"); -#endif - if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) - ctxt->sax->endDocument(ctxt->userData); - goto done; - } - namePush(ctxt, xmlStrdup(name)); - - /* - * [ VC: Root Element Type ] - * The Name in the document type declaration must match - * the element type of the root element. - */ - if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && - ctxt->node && (ctxt->node == ctxt->myDoc->children)) - ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); - - /* - * Check for an Empty Element. - */ - if ((RAW == '/') && (NXT(1) == '>')) { - SKIP(2); - if ((ctxt->sax != NULL) && - (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX)) - ctxt->sax->endElement(ctxt->userData, name); - xmlFree(name); - oldname = namePop(ctxt); - spacePop(ctxt); - if (oldname != NULL) { -#ifdef DEBUG_STACK - xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); -#endif - xmlFree(oldname); - } - if (ctxt->name == NULL) { - ctxt->instate = XML_PARSER_EPILOG; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering EPILOG\n"); -#endif - } else { - ctxt->instate = XML_PARSER_CONTENT; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering CONTENT\n"); -#endif - } - break; - } - if (RAW == '>') { - NEXT; - } else { - ctxt->errNo = XML_ERR_GT_REQUIRED; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Couldn't find end of Start Tag %s\n", - name); - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) ctxt->disableSAX = 1; - - /* - * end of parsing of this node. - */ - nodePop(ctxt); - oldname = namePop(ctxt); - spacePop(ctxt); - if (oldname != NULL) { -#ifdef DEBUG_STACK - xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname); -#endif - xmlFree(oldname); - } - } - xmlFree(name); - ctxt->instate = XML_PARSER_CONTENT; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering CONTENT\n"); -#endif - break; - } - case XML_PARSER_CONTENT: { - const xmlChar *test; - unsigned int cons; - if ((avail < 2) && (ctxt->inputNr == 1)) - goto done; - cur = ctxt->input->cur[0]; - next = ctxt->input->cur[1]; - - test = CUR_PTR; - cons = ctxt->input->consumed; - if ((cur == '<') && (next == '?')) { - if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) - goto done; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: Parsing PI\n"); -#endif - xmlParsePI(ctxt); - } else if ((cur == '<') && (next == '!') && - (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { - if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) - goto done; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: Parsing Comment\n"); -#endif - xmlParseComment(ctxt); - ctxt->instate = XML_PARSER_CONTENT; - } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && - (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') && - (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') && - (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') && - (ctxt->input->cur[8] == '[')) { - SKIP(9); - ctxt->instate = XML_PARSER_CDATA_SECTION; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering CDATA_SECTION\n"); -#endif - break; - } else if ((cur == '<') && (next == '!') && - (avail < 9)) { - goto done; - } else if ((cur == '<') && (next == '/')) { - ctxt->instate = XML_PARSER_END_TAG; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering END_TAG\n"); -#endif - break; - } else if (cur == '<') { - ctxt->instate = XML_PARSER_START_TAG; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering START_TAG\n"); -#endif - break; - } else if (cur == '&') { - if ((!terminate) && - (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) - goto done; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: Parsing Reference\n"); -#endif - xmlParseReference(ctxt); - } else { - /* TODO Avoid the extra copy, handle directly !!! */ - /* - * Goal of the following test is: - * - minimize calls to the SAX 'character' callback - * when they are mergeable - * - handle an problem for isBlank when we only parse - * a sequence of blank chars and the next one is - * not available to check against '<' presence. - * - tries to homogenize the differences in SAX - * callbacks between the push and pull versions - * of the parser. - */ - if ((ctxt->inputNr == 1) && - (avail < XML_PARSER_BIG_BUFFER_SIZE)) { - if ((!terminate) && - (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) - goto done; - } - ctxt->checkIndex = 0; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: Parsing char data\n"); -#endif - xmlParseCharData(ctxt, 0); - } - /* - * Pop-up of finished entities. - */ - while ((RAW == 0) && (ctxt->inputNr > 1)) - xmlPopInput(ctxt); - if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { - ctxt->errNo = XML_ERR_INTERNAL_ERROR; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "detected an error in element content\n"); - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) ctxt->disableSAX = 1; - ctxt->instate = XML_PARSER_EOF; - break; - } - break; - } - case XML_PARSER_CDATA_SECTION: { - /* - * The Push mode need to have the SAX callback for - * cdataBlock merge back contiguous callbacks. - */ - int base; - - base = xmlParseLookupSequence(ctxt, ']', ']', '>'); - if (base < 0) { - if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { - if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { - if (ctxt->sax->cdataBlock != NULL) - ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur, - XML_PARSER_BIG_BUFFER_SIZE); - } - SKIP(XML_PARSER_BIG_BUFFER_SIZE); - ctxt->checkIndex = 0; - } - goto done; - } else { - if ((ctxt->sax != NULL) && (base > 0) && - (!ctxt->disableSAX)) { - if (ctxt->sax->cdataBlock != NULL) - ctxt->sax->cdataBlock(ctxt->userData, - ctxt->input->cur, base); - } - SKIP(base + 3); - ctxt->checkIndex = 0; - ctxt->instate = XML_PARSER_CONTENT; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering CONTENT\n"); -#endif - } - break; - } - case XML_PARSER_END_TAG: - if (avail < 2) - goto done; - if ((!terminate) && - (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) - goto done; - xmlParseEndTag(ctxt); - if (ctxt->name == NULL) { - ctxt->instate = XML_PARSER_EPILOG; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering EPILOG\n"); -#endif - } else { - ctxt->instate = XML_PARSER_CONTENT; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "PP: entering CONTENT\n"); -#endif - } - break; case XML_PARSER_DTD: { /* * Sorry but progressive parsing of the internal subset @@ -8965,6 +9042,15 @@ found_end_int_subset: "PP: entering CONTENT\n"); #endif break; + case XML_PARSER_IGNORE: + xmlGenericError(xmlGenericErrorContext, + "PP: internal error, state == IGNORE"); + ctxt->instate = XML_PARSER_DTD; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "PP: entering DTD\n"); +#endif + break; case XML_PARSER_PI: xmlGenericError(xmlGenericErrorContext, "PP: internal error, state == PI\n"); @@ -9056,8 +9142,10 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif +#if 0 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) xmlParseTryOrFinish(ctxt, terminate); +#endif } else if (ctxt->instate != XML_PARSER_EOF) { if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { xmlParserInputBufferPtr in = ctxt->input->buf; diff --git a/parserInternals.c b/parserInternals.c index 741d7667..e3b4446d 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1051,8 +1051,10 @@ xmlParserInputShrink(xmlParserInputPtr in) { * Do not shrink on large buffers whose only a tiny fraction * was consumed */ +#if 0 if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK) return; +#endif if (used > INPUT_CHUNK) { ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { diff --git a/python/tests/reader2.py b/python/tests/reader2.py index 7519a901..f8ca2541 100755 --- a/python/tests/reader2.py +++ b/python/tests/reader2.py @@ -16,7 +16,7 @@ expect="""../../test/valid/rss.xml:172: validity error: Element rss does not car ^ ../../test/valid/xlink.xml:450: validity error: ID dt-arc already defined -

An arc is contained within an +

An #endif +int __xmlRegisterCallbacks = 0; + xmlNsPtr xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns); /************************************************************************ @@ -744,7 +746,7 @@ xmlNewDtd(xmlDocPtr doc, const xmlChar *name, doc->extSubset = cur; cur->doc = doc; - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -854,7 +856,7 @@ xmlCreateIntSubset(xmlDocPtr doc, const xmlChar *name, } } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -875,7 +877,7 @@ xmlFreeDtd(xmlDtdPtr cur) { return; } - if (xmlDeregisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue((xmlNodePtr)cur); if (cur->children != NULL) { @@ -946,7 +948,7 @@ xmlNewDoc(const xmlChar *version) { cur->doc = cur; cur->charset = XML_CHAR_ENCODING_UTF8; - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -969,7 +971,7 @@ xmlFreeDoc(xmlDocPtr cur) { return; } - if (xmlDeregisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue((xmlNodePtr)cur); /* @@ -1552,7 +1554,7 @@ xmlNewProp(xmlNodePtr node, const xmlChar *name, const xmlChar *value) { } } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -1632,7 +1634,7 @@ xmlNewNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name, } } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -1712,7 +1714,7 @@ xmlNewNsPropEatName(xmlNodePtr node, xmlNsPtr ns, xmlChar *name, } } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -1767,7 +1769,7 @@ xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value) { } } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -1811,7 +1813,7 @@ xmlFreeProp(xmlAttrPtr cur) { return; } - if (xmlDeregisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue((xmlNodePtr)cur); /* Check for ID removal -> leading to invalid references ! */ @@ -1912,7 +1914,7 @@ xmlNewPI(const xmlChar *name, const xmlChar *content) { cur->content = xmlStrdup(content); } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -1953,7 +1955,7 @@ xmlNewNode(xmlNsPtr ns, const xmlChar *name) { cur->name = xmlStrdup(name); cur->ns = ns; - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -1994,7 +1996,7 @@ xmlNewNodeEatName(xmlNsPtr ns, xmlChar *name) { cur->name = name; cur->ns = ns; - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)cur); return(cur); } @@ -2118,7 +2120,7 @@ xmlNewDocFragment(xmlDocPtr doc) { cur->doc = doc; - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -2151,7 +2153,7 @@ xmlNewText(const xmlChar *content) { cur->content = xmlStrdup(content); } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -2254,7 +2256,7 @@ xmlNewCharRef(xmlDocPtr doc, const xmlChar *name) { } else cur->name = xmlStrdup(name); - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -2308,7 +2310,7 @@ xmlNewReference(xmlDocPtr doc, const xmlChar *name) { cur->last = (xmlNodePtr) ent; } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -2359,7 +2361,7 @@ xmlNewTextLen(const xmlChar *content, int len) { cur->content = xmlStrndup(content, len); } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -2411,7 +2413,7 @@ xmlNewComment(const xmlChar *content) { cur->content = xmlStrdup(content); } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -2446,7 +2448,7 @@ xmlNewCDataBlock(xmlDocPtr doc, const xmlChar *content, int len) { cur->content = xmlStrndup(content, len); } - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue(cur); return(cur); } @@ -3076,7 +3078,7 @@ xmlFreeNodeList(xmlNodePtr cur) { /* unroll to speed up freeing the document */ if (cur->type != XML_DTD_NODE) { - if (xmlDeregisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue(cur); if ((cur->children != NULL) && @@ -3161,7 +3163,7 @@ xmlFreeNode(xmlNodePtr cur) { return; } - if (xmlDeregisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlDeregisterNodeDefaultValue)) xmlDeregisterNodeDefaultValue(cur); if ((cur->children != NULL) && @@ -3630,7 +3632,7 @@ xmlStaticCopyNode(const xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent, * in case ret does get coalesced in xmlAddChild * the deregister-node callback is called; so we register ret now already */ - if (xmlRegisterNodeDefaultValue) + if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue)) xmlRegisterNodeDefaultValue((xmlNodePtr)ret); tmp = xmlAddChild(parent, ret); diff --git a/xmllint.c b/xmllint.c index 60c175bb..698170c2 100644 --- a/xmllint.c +++ b/xmllint.c @@ -763,12 +763,11 @@ static void parseAndPrintFile(char *filename) { } if (f != NULL) { int ret; - int res, size = 3; + int res, size = 1024; char chars[1024]; xmlParserCtxtPtr ctxt; - if (repeat) - size = 1024; + /* if (repeat) size = 1024; */ res = fread(chars, 1, 4, f); if (res > 0) { ctxt = xmlCreatePushParserCtxt(NULL, NULL, diff --git a/xmlreader.c b/xmlreader.c index 6cdadc97..9a24110c 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -56,6 +56,7 @@ #define DUMP_READER #endif +#define CHUNK_SIZE 512 /************************************************************************ * * * The parser: maps the Text Reader API on top of the existing * @@ -340,9 +341,8 @@ xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len) */ static int xmlTextReaderPushData(xmlTextReaderPtr reader) { - unsigned int cur = reader->cur; xmlBufferPtr inbuf; - int val; + int val, s; int oldstate; if ((reader->input == NULL) || (reader->input->buffer == NULL)) @@ -351,8 +351,9 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { oldstate = reader->state; reader->state = XML_TEXTREADER_NONE; inbuf = reader->input->buffer; + while (reader->state == XML_TEXTREADER_NONE) { - if (cur >= inbuf->use) { + if (inbuf->use < reader->cur + CHUNK_SIZE) { /* * Refill the buffer unless we are at the end of the stream */ @@ -365,47 +366,39 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { (reader->ctxt->myDoc != NULL)) return(val); } + } else break; } /* - * parse by block of 512 bytes + * parse by block of CHUNK_SIZE bytes, various tests show that + * it's the best tradeoff at least on a 1.2GH Duron */ - if ((cur >= reader->cur + 512) || (cur >= inbuf->use)) { - if (cur < inbuf->use) - cur = cur + 1; + if (inbuf->use >= reader->cur + CHUNK_SIZE) { val = xmlParseChunk(reader->ctxt, (const char *) &inbuf->content[reader->cur], - cur - reader->cur, 0); + CHUNK_SIZE, 0); + reader->cur += CHUNK_SIZE; if (val != 0) return(-1); - reader->cur = cur; - break; } else { - cur = cur + 1; - - /* - * One may have to force a flush at some point when parsing really - * large CDATA sections - */ - if ((cur - reader->cur > 4096) && (reader->base == 0) && - (reader->mode == XML_TEXTREADER_MODE_INTERACTIVE)) { - cur = cur + 1; - val = xmlParseChunk(reader->ctxt, - (const char *) &inbuf->content[reader->cur], - cur - reader->cur, 0); - if (val != 0) - return(-1); - reader->cur = cur; - } + s = inbuf->use - reader->cur; + val = xmlParseChunk(reader->ctxt, + (const char *) &inbuf->content[reader->cur], + s, 0); + reader->cur += s; + if (val != 0) + return(-1); + break; } } + /* * Discard the consumed input when needed and possible */ if (reader->mode == XML_TEXTREADER_MODE_INTERACTIVE) { - if ((reader->cur >= 4096) && (reader->base == 0)) { - val = xmlBufferShrink(inbuf, cur); + if (reader->cur >= 4096) { + val = xmlBufferShrink(inbuf, reader->cur); if (val >= 0) { reader->cur -= val; } @@ -416,12 +409,13 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { * At the end of the stream signal that the work is done to the Push * parser. */ - if (reader->mode == XML_TEXTREADER_MODE_EOF) { + else if (reader->mode == XML_TEXTREADER_MODE_EOF) { if (reader->mode != XML_TEXTREADER_DONE) { + s = inbuf->use - reader->cur; val = xmlParseChunk(reader->ctxt, (const char *) &inbuf->content[reader->cur], - cur - reader->cur, 1); - reader->cur = cur; + s, 1); + reader->cur = inbuf->use; reader->mode = XML_TEXTREADER_DONE; } } @@ -767,17 +761,17 @@ get_next_node: * that the parser didn't finished or that we arent at the end * of stream, continue processing. */ - while (((oldstate == XML_TEXTREADER_BACKTRACK) || + while ((reader->node->next == NULL) && + (reader->ctxt->nodeNr == olddepth) && + ((oldstate == XML_TEXTREADER_BACKTRACK) || (reader->node->children == NULL) || (reader->node->type == XML_ENTITY_REF_NODE) || (reader->node->type == XML_DTD_NODE) || (reader->node->type == XML_DOCUMENT_NODE) || (reader->node->type == XML_HTML_DOCUMENT_NODE)) && - (reader->node->next == NULL) && ((reader->ctxt->node == NULL) || (reader->ctxt->node == reader->node) || (reader->ctxt->node == reader->node->parent)) && - (reader->ctxt->nodeNr == olddepth) && (reader->ctxt->instate != XML_PARSER_EOF)) { val = xmlTextReaderPushData(reader); if (val < 0) @@ -785,45 +779,6 @@ get_next_node: if (reader->node == NULL) goto node_end; } - /* - * If we are in the middle of a piece of CDATA make sure it's finished - * Maybe calling a function checking that a non-character() callback was - * received would be cleaner for the loop exit. - */ - if ((oldstate == XML_TEXTREADER_ELEMENT) && - (reader->ctxt->instate == XML_PARSER_CDATA_SECTION)) { - while ((reader->ctxt->instate == XML_PARSER_CDATA_SECTION) && - (((reader->node->content == NULL) && - (reader->node->next != NULL) && - (reader->node->next->type == XML_CDATA_SECTION_NODE) && - (reader->node->next->next == NULL) && - (reader->node->parent->next == NULL)) || - ((reader->node->children != NULL) && - (reader->node->children->type == XML_CDATA_SECTION_NODE) && - (reader->node->children->next == NULL) && - (reader->node->children->next == NULL)))) { - val = xmlTextReaderPushData(reader); - if (val < 0) - return(-1); - } - } - if ((oldstate == XML_TEXTREADER_ELEMENT) && - (reader->ctxt->instate == XML_PARSER_CONTENT)) { - while ((reader->ctxt->instate == XML_PARSER_CONTENT) && - (((reader->node->content == NULL) && - (reader->node->next != NULL) && - (reader->node->next->type == XML_TEXT_NODE) && - (reader->node->next->next == NULL) && - (reader->node->parent->next == NULL)) || - ((reader->node->children != NULL) && - (reader->node->children->type == XML_TEXT_NODE) && - (reader->node->children->next == NULL) && - (reader->node->children->next == NULL)))) { - val = xmlTextReaderPushData(reader); - if (val < 0) - return(-1); - } - } if (oldstate != XML_TEXTREADER_BACKTRACK) { if ((reader->node->children != NULL) && (reader->node->type != XML_ENTITY_REF_NODE) && @@ -899,6 +854,15 @@ get_next_node: node_found: DUMP_READER + /* + * If we are in the middle of a piece of CDATA make sure it's finished + */ + if ((reader->node != NULL) && + ((reader->node->type == XML_TEXT_NODE) || + (reader->node->type == XML_CDATA_SECTION_NODE))) { + xmlTextReaderExpand(reader); + } + /* * Handle entities enter and exit when in entity replacement mode */