diff --git a/HTMLparser.c b/HTMLparser.c
index 93b6661b..d33913b3 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5334,30 +5334,17 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
int base, len;
htmlParserInputPtr in;
const xmlChar *buf;
- int invalue = 0;
- char valdellim = 0x0;
+ int quote;
in = ctxt->input;
if (in == NULL)
return (-1);
- base = in->cur - in->base;
- if (base < 0)
- return (-1);
+ base = ctxt->checkIndex;
+ quote = ctxt->endCheckState;
- if (ctxt->checkIndex > base) {
- base = ctxt->checkIndex;
- /* Abuse hasPErefs member to restore current state. */
- invalue = ctxt->hasPErefs & 1 ? 1 : 0;
- }
-
- if (in->buf == NULL) {
- buf = in->base;
- len = in->length;
- } else {
- buf = xmlBufContent(in->buf->buffer);
- len = xmlBufUse(in->buf->buffer);
- }
+ buf = in->cur;
+ len = in->end - in->cur;
/* take into account the sequence length */
if (third)
@@ -5366,18 +5353,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
len--;
for (; base < len; base++) {
if (ignoreattrval) {
+ if (quote) {
+ if (buf[base] == quote)
+ quote = 0;
+ continue;
+ }
if (buf[base] == '"' || buf[base] == '\'') {
- if (invalue) {
- if (buf[base] == valdellim) {
- invalue = 0;
- continue;
- }
- } else {
- valdellim = buf[base];
- invalue = 1;
- continue;
- }
- } else if (invalue) {
+ quote = buf[base];
continue;
}
}
@@ -5390,29 +5372,12 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
continue;
}
ctxt->checkIndex = 0;
-#ifdef DEBUG_PUSH
- if (next == 0)
- xmlGenericError(xmlGenericErrorContext,
- "HPP: lookup '%c' found at %d\n",
- first, base);
- else if (third == 0)
- xmlGenericError(xmlGenericErrorContext,
- "HPP: lookup '%c%c' found at %d\n",
- first, next, base);
- else
- xmlGenericError(xmlGenericErrorContext,
- "HPP: lookup '%c%c%c' found at %d\n",
- first, next, third, base);
-#endif
- return (base - (in->cur - in->base));
+ ctxt->endCheckState = 0;
+ return (base);
}
}
ctxt->checkIndex = base;
- /* Abuse hasPErefs member to track current state. */
- if (invalue)
- ctxt->hasPErefs |= 1;
- else
- ctxt->hasPErefs &= ~1;
+ ctxt->endCheckState = quote;
#ifdef DEBUG_PUSH
if (next == 0)
xmlGenericError(xmlGenericErrorContext,
@@ -5446,7 +5411,6 @@ static int
htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
{
int mark = 0;
- int cur = CUR_PTR - BASE_PTR;
while (mark >= 0) {
mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 0);
@@ -5455,7 +5419,7 @@ htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
return mark;
}
- ctxt->checkIndex = cur + mark + 1;
+ ctxt->checkIndex = mark + 1;
}
return mark;
}
@@ -6806,6 +6770,7 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
ctxt->vctxt.warning = xmlParserValidityWarning;
ctxt->record_info = 0;
ctxt->checkIndex = 0;
+ ctxt->endCheckState = 0;
ctxt->inSubset = 0;
ctxt->errNo = XML_ERR_OK;
ctxt->depth = 0;
diff --git a/SAX2.c b/SAX2.c
index 44c6b8c3..f6321c68 100644
--- a/SAX2.c
+++ b/SAX2.c
@@ -387,6 +387,7 @@ xmlSAX2ExternalSubset(void *ctx, const xmlChar *name,
xmlCharEncoding enc;
int oldcharset;
const xmlChar *oldencoding;
+ int oldprogressive;
/*
* Ask the Entity resolver to load the damn thing
@@ -409,7 +410,9 @@ xmlSAX2ExternalSubset(void *ctx, const xmlChar *name,
oldinputTab = ctxt->inputTab;
oldcharset = ctxt->charset;
oldencoding = ctxt->encoding;
+ oldprogressive = ctxt->progressive;
ctxt->encoding = NULL;
+ ctxt->progressive = 0;
ctxt->inputTab = (xmlParserInputPtr *)
xmlMalloc(5 * sizeof(xmlParserInputPtr));
@@ -422,6 +425,7 @@ xmlSAX2ExternalSubset(void *ctx, const xmlChar *name,
ctxt->inputTab = oldinputTab;
ctxt->charset = oldcharset;
ctxt->encoding = oldencoding;
+ ctxt->progressive = oldprogressive;
return;
}
ctxt->inputNr = 0;
@@ -472,6 +476,7 @@ xmlSAX2ExternalSubset(void *ctx, const xmlChar *name,
(!xmlDictOwns(ctxt->dict, ctxt->encoding))))
xmlFree((xmlChar *) ctxt->encoding);
ctxt->encoding = oldencoding;
+ ctxt->progressive = oldprogressive;
/* ctxt->wellFormed = oldwellFormed; */
}
}
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index ca5c93c9..3c86d228 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -311,6 +311,8 @@ struct _xmlParserCtxt {
int input_id; /* we need to label inputs */
unsigned long sizeentcopy; /* volume of entity copy */
+
+ int endCheckState; /* quote state for push parser */
};
/**
diff --git a/parser.c b/parser.c
index 3aeb8ad9..903ccf08 100644
--- a/parser.c
+++ b/parser.c
@@ -11074,142 +11074,231 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
************************************************************************/
/**
- * xmlParseLookupSequence:
+ * xmlParseLookupChar:
* @ctxt: an XML parser context
- * @first: the first char to lookup
- * @next: the next char to lookup or zero
- * @third: the next char to lookup or zero
+ * @c: character
*
- * Try to find if a sequence (first, next, third) or just (first next) or
- * (first) is available in the input stream.
- * This function has a side effect of (possibly) incrementing ctxt->checkIndex
- * to avoid rescanning sequences of bytes, it DOES change the state of the
- * parser, do not use liberally.
- *
- * Returns the index to the current parsing point if the full sequence
- * is available, -1 otherwise.
+ * Check whether the input buffer contains a character.
*/
static int
-xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
- xmlChar next, xmlChar third) {
- int base, len;
- xmlParserInputPtr in;
- const xmlChar *buf;
+xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
+ const xmlChar *cur;
- in = ctxt->input;
- if (in == NULL) return(-1);
- base = in->cur - in->base;
- if (base < 0) return(-1);
- if (ctxt->checkIndex > base)
- base = ctxt->checkIndex;
- if (in->buf == NULL) {
- buf = in->base;
- len = in->length;
+ if (ctxt->checkIndex == 0) {
+ cur = ctxt->input->cur + 1;
} else {
- buf = xmlBufContent(in->buf->buffer);
- len = xmlBufUse(in->buf->buffer);
+ cur = ctxt->input->cur + ctxt->checkIndex;
}
- /* take into account the sequence length */
- if (third) len -= 2;
- else if (next) len --;
- for (;base < len;base++) {
- if (buf[base] == first) {
- if (third != 0) {
- if ((buf[base + 1] != next) ||
- (buf[base + 2] != third)) continue;
- } else if (next != 0) {
- if (buf[base + 1] != next) continue;
- }
- ctxt->checkIndex = 0;
-#ifdef DEBUG_PUSH
- if (next == 0)
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup '%c' found at %d\n",
- first, base);
- else if (third == 0)
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup '%c%c' found at %d\n",
- first, next, base);
- else
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup '%c%c%c' found at %d\n",
- first, next, third, base);
-#endif
- return(base - (in->cur - in->base));
- }
+
+ if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
+ ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
+ return(0);
+ } else {
+ ctxt->checkIndex = 0;
+ return(1);
}
- ctxt->checkIndex = base;
-#ifdef DEBUG_PUSH
- if (next == 0)
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup '%c' failed\n", first);
- else if (third == 0)
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup '%c%c' failed\n", first, next);
- else
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup '%c%c%c' failed\n", first, next, third);
-#endif
- return(-1);
}
/**
- * xmlParseGetLasts:
+ * xmlParseLookupString:
* @ctxt: an XML parser context
- * @lastlt: pointer to store the last '<' from the input
- * @lastgt: pointer to store the last '>' from the input
+ * @startDelta: delta to apply at the start
+ * @str: string
+ * @strLen: length of string
*
- * Lookup the last < and > in the current chunk
+ * Check whether the input buffer contains a string.
*/
-static void
-xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
- const xmlChar **lastgt) {
- const xmlChar *tmp;
+static const xmlChar *
+xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
+ const char *str, size_t strLen) {
+ const xmlChar *cur, *term;
- if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
- xmlGenericError(xmlGenericErrorContext,
- "Internal error: xmlParseGetLasts\n");
- return;
- }
- if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
- tmp = ctxt->input->end;
- tmp--;
- while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
- if (tmp < ctxt->input->base) {
- *lastlt = NULL;
- *lastgt = NULL;
- } else {
- *lastlt = tmp;
- tmp++;
- while ((tmp < ctxt->input->end) && (*tmp != '>')) {
- if (*tmp == '\'') {
- tmp++;
- while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
- if (tmp < ctxt->input->end) tmp++;
- } else if (*tmp == '"') {
- tmp++;
- while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
- if (tmp < ctxt->input->end) tmp++;
- } else
- tmp++;
- }
- if (tmp < ctxt->input->end)
- *lastgt = tmp;
- else {
- tmp = *lastlt;
- tmp--;
- while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
- if (tmp >= ctxt->input->base)
- *lastgt = tmp;
- else
- *lastgt = NULL;
- }
- }
+ if (ctxt->checkIndex == 0) {
+ cur = ctxt->input->cur + startDelta;
} else {
- *lastlt = NULL;
- *lastgt = NULL;
+ cur = ctxt->input->cur + ctxt->checkIndex;
}
+
+ term = BAD_CAST strstr((const char *) cur, str);
+ if (term == NULL) {
+ const xmlChar *end = ctxt->input->end;
+
+ /* Rescan (strLen - 1) characters. */
+ if ((size_t) (end - cur) < strLen)
+ end = cur;
+ else
+ end -= strLen - 1;
+ ctxt->checkIndex = end - ctxt->input->cur;
+ } else {
+ ctxt->checkIndex = 0;
+ }
+
+ return(term);
}
+
+/**
+ * xmlParseLookupCharData:
+ * @ctxt: an XML parser context
+ *
+ * Check whether the input buffer contains terminated char data.
+ */
+static int
+xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
+ const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
+ const xmlChar *end = ctxt->input->end;
+
+ while (cur < end) {
+ if ((*cur == '<') || (*cur == '&')) {
+ ctxt->checkIndex = 0;
+ return(1);
+ }
+ cur++;
+ }
+
+ ctxt->checkIndex = cur - ctxt->input->cur;
+ return(0);
+}
+
+/**
+ * xmlParseLookupGt:
+ * @ctxt: an XML parser context
+ *
+ * Check whether there's enough data in the input buffer to finish parsing
+ * a start tag. This has to take quotes into account.
+ */
+static int
+xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
+ const xmlChar *cur;
+ const xmlChar *end = ctxt->input->end;
+ int state = ctxt->endCheckState;
+
+ if (ctxt->checkIndex == 0)
+ cur = ctxt->input->cur + 1;
+ else
+ cur = ctxt->input->cur + ctxt->checkIndex;
+
+ while (cur < end) {
+ if (state) {
+ if (*cur == state)
+ state = 0;
+ } else if (*cur == '\'' || *cur == '"') {
+ state = *cur;
+ } else if (*cur == '>') {
+ ctxt->checkIndex = 0;
+ ctxt->endCheckState = 0;
+ return(1);
+ }
+ cur++;
+ }
+
+ ctxt->checkIndex = cur - ctxt->input->cur;
+ ctxt->endCheckState = state;
+ return(0);
+}
+
+/**
+ * xmlParseLookupInternalSubset:
+ * @ctxt: an XML parser context
+ *
+ * Check whether there's enough data in the input buffer to finish parsing
+ * the internal subset.
+ */
+static int
+xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
+ /*
+ * Sorry, but progressive parsing of the internal subset is not
+ * supported. We first check that the full content of the internal
+ * subset is available and parsing is launched only at that point.
+ * Internal subset ends with "']' S? '>'" in an unescaped section and
+ * not in a ']]>' sequence which are conditional sections.
+ */
+ const xmlChar *cur, *start;
+ const xmlChar *end = ctxt->input->end;
+ int state = ctxt->endCheckState;
+
+ if (ctxt->checkIndex == 0) {
+ cur = ctxt->input->cur + 1;
+ } else {
+ cur = ctxt->input->cur + ctxt->checkIndex;
+ }
+ start = cur;
+
+ while (cur < end) {
+ if (state == '-') {
+ if ((*cur == '-') &&
+ (cur[1] == '-') &&
+ (cur[2] == '>')) {
+ state = 0;
+ cur += 3;
+ start = cur;
+ continue;
+ }
+ }
+ else if (state == ']') {
+ if (*cur == '>') {
+ ctxt->checkIndex = 0;
+ ctxt->endCheckState = 0;
+ return(1);
+ }
+ if (IS_BLANK_CH(*cur)) {
+ state = ' ';
+ } else if (*cur != ']') {
+ state = 0;
+ start = cur;
+ continue;
+ }
+ }
+ else if (state == ' ') {
+ if (*cur == '>') {
+ ctxt->checkIndex = 0;
+ ctxt->endCheckState = 0;
+ return(1);
+ }
+ if (!IS_BLANK_CH(*cur)) {
+ state = 0;
+ start = cur;
+ continue;
+ }
+ }
+ else if (state != 0) {
+ if (*cur == state) {
+ state = 0;
+ start = cur + 1;
+ }
+ }
+ else if (*cur == '<') {
+ if ((cur[1] == '!') &&
+ (cur[2] == '-') &&
+ (cur[3] == '-')) {
+ state = '-';
+ cur += 4;
+ /* Don't treat as comment */
+ start = cur;
+ continue;
+ }
+ }
+ else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
+ state = *cur;
+ }
+
+ cur++;
+ }
+
+ /*
+ * Rescan the three last characters to detect ""
+ * split across chunks.
+ */
+ if ((state == 0) || (state == '-')) {
+ if (cur - start < 3)
+ cur = start;
+ else
+ cur -= 3;
+ }
+ ctxt->checkIndex = cur - ctxt->input->cur;
+ ctxt->endCheckState = state;
+ return(0);
+}
+
/**
* xmlCheckCdataPush:
* @cur: pointer to the block of characters
@@ -11292,7 +11381,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int ret = 0;
int avail, tlen;
xmlChar cur, next;
- const xmlChar *lastlt, *lastgt;
if (ctxt->input == NULL)
return(0);
@@ -11353,9 +11441,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if ((ctxt->input != NULL) &&
(ctxt->input->cur - ctxt->input->base > 4096)) {
xmlParserInputShrink(ctxt->input);
- ctxt->checkIndex = 0;
}
- xmlParseGetLasts(ctxt, &lastlt, &lastgt);
while (ctxt->instate != XML_PARSER_EOF) {
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
@@ -11442,10 +11528,10 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
}
if ((cur == '<') && (next == '?')) {
/* PI or XML decl */
- if (avail < 5) return(ret);
+ if (avail < 5) goto done;
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
- return(ret);
+ (!xmlParseLookupString(ctxt, 2, "?>", 2)))
+ goto done;
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
&xmlDefaultSAXLocator);
@@ -11526,15 +11612,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->sax->endDocument(ctxt->userData);
goto done;
}
- if (!terminate) {
- if (ctxt->progressive) {
- /* > can be found unescaped in attribute values */
- if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
- goto done;
- } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
- goto done;
- }
- }
+ if ((!terminate) && (!xmlParseLookupGt(ctxt)))
+ goto done;
if (ctxt->spaceNr == 0)
spacePush(ctxt, -1);
else if (*ctxt->space == -2)
@@ -11599,7 +11678,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
} else {
ctxt->instate = XML_PARSER_CONTENT;
}
- ctxt->progressive = 1;
break;
}
if (RAW == '>') {
@@ -11614,7 +11692,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
ctxt->instate = XML_PARSER_CONTENT;
- ctxt->progressive = 1;
break;
}
case XML_PARSER_CONTENT: {
@@ -11628,33 +11705,21 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
break;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
- ctxt->progressive = XML_PARSER_PI;
+ (!xmlParseLookupString(ctxt, 2, "?>", 2)))
goto done;
- }
xmlParsePI(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
- ctxt->progressive = 1;
} else if ((cur == '<') && (next != '!')) {
ctxt->instate = XML_PARSER_START_TAG;
break;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
- int term;
-
- if (avail < 4)
- goto done;
- ctxt->input->cur += 4;
- term = xmlParseLookupSequence(ctxt, '-', '-', '>');
- ctxt->input->cur -= 4;
- if ((!terminate) && (term < 0)) {
- ctxt->progressive = XML_PARSER_COMMENT;
+ if ((!terminate) &&
+ (!xmlParseLookupString(ctxt, 4, "-->", 3)))
goto done;
- }
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
- ctxt->progressive = 1;
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
(ctxt->input->cur[2] == '[') &&
(ctxt->input->cur[3] == 'C') &&
@@ -11674,8 +11739,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
"detected an error in element content\n");
SKIP(1);
} else if (cur == '&') {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
+ if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
goto done;
xmlParseReference(ctxt);
} else {
@@ -11693,18 +11757,10 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
*/
if ((ctxt->inputNr == 1) &&
(avail < XML_PARSER_BIG_BUFFER_SIZE)) {
- if (!terminate) {
- if (ctxt->progressive) {
- if ((lastlt == NULL) ||
- (ctxt->input->cur > lastlt))
- goto done;
- } else if (xmlParseLookupSequence(ctxt,
- '<', 0, 0) < 0) {
- goto done;
- }
- }
+ if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
+ goto done;
}
- ctxt->checkIndex = 0;
+ ctxt->checkIndex = 0;
xmlParseCharData(ctxt, 0);
}
break;
@@ -11712,15 +11768,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
case XML_PARSER_END_TAG:
if (avail < 2)
goto done;
- if (!terminate) {
- if (ctxt->progressive) {
- /* > can be found unescaped in attribute values */
- if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
- goto done;
- } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
- goto done;
- }
- }
+ if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
+ goto done;
if (ctxt->sax2) {
xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
nameNsPop(ctxt);
@@ -11742,35 +11791,35 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
* The Push mode need to have the SAX callback for
* cdataBlock merge back contiguous callbacks.
*/
- int base;
+ const xmlChar *term;
- base = xmlParseLookupSequence(ctxt, ']', ']', '>');
- if (base < 0) {
- if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
- int tmp;
+ term = xmlParseLookupString(ctxt, 0, "]]>", 3);
+ if (term == NULL) {
+ int tmp;
- tmp = xmlCheckCdataPush(ctxt->input->cur,
- XML_PARSER_BIG_BUFFER_SIZE, 0);
- if (tmp < 0) {
- tmp = -tmp;
- ctxt->input->cur += tmp;
- goto encoding_error;
- }
- if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
- if (ctxt->sax->cdataBlock != NULL)
- ctxt->sax->cdataBlock(ctxt->userData,
- ctxt->input->cur, tmp);
- else if (ctxt->sax->characters != NULL)
- ctxt->sax->characters(ctxt->userData,
- ctxt->input->cur, tmp);
- }
- if (ctxt->instate == XML_PARSER_EOF)
- goto done;
- SKIPL(tmp);
- ctxt->checkIndex = 0;
- }
- goto done;
+ if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
+ goto done;
+ ctxt->checkIndex = 0;
+ tmp = xmlCheckCdataPush(ctxt->input->cur,
+ XML_PARSER_BIG_BUFFER_SIZE, 0);
+ if (tmp < 0) {
+ tmp = -tmp;
+ ctxt->input->cur += tmp;
+ goto encoding_error;
+ }
+ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
+ if (ctxt->sax->cdataBlock != NULL)
+ ctxt->sax->cdataBlock(ctxt->userData,
+ ctxt->input->cur, tmp);
+ else if (ctxt->sax->characters != NULL)
+ ctxt->sax->characters(ctxt->userData,
+ ctxt->input->cur, tmp);
+ }
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto done;
+ SKIPL(tmp);
} else {
+ int base = term - CUR_PTR;
int tmp;
tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
@@ -11804,7 +11853,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
SKIPL(base + 3);
- ctxt->checkIndex = 0;
ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -11827,10 +11875,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
- ctxt->progressive = XML_PARSER_PI;
+ (!xmlParseLookupString(ctxt, 2, "?>", 2)))
goto done;
- }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
@@ -11839,16 +11885,12 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_MISC;
- ctxt->progressive = 1;
- ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
- ctxt->progressive = XML_PARSER_COMMENT;
+ (!xmlParseLookupString(ctxt, 4, "-->", 3)))
goto done;
- }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
@@ -11857,8 +11899,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_MISC;
- ctxt->progressive = 1;
- ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == 'D') &&
(ctxt->input->cur[3] == 'O') &&
@@ -11867,18 +11907,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
(ctxt->input->cur[6] == 'Y') &&
(ctxt->input->cur[7] == 'P') &&
(ctxt->input->cur[8] == 'E')) {
- if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
- ctxt->progressive = XML_PARSER_DTD;
- goto done;
- }
+ if ((!terminate) && (!xmlParseLookupGt(ctxt)))
+ goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing internal subset\n");
#endif
ctxt->inSubset = 1;
- ctxt->progressive = 0;
- ctxt->checkIndex = 0;
xmlParseDocTypeDecl(ctxt);
if (ctxt->instate == XML_PARSER_EOF)
goto done;
@@ -11911,8 +11946,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
- ctxt->progressive = XML_PARSER_START_TAG;
- xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering START_TAG\n");
@@ -11932,10 +11965,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
- ctxt->progressive = XML_PARSER_PI;
+ (!xmlParseLookupString(ctxt, 2, "?>", 2)))
goto done;
- }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
@@ -11944,14 +11975,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_PROLOG;
- ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
- ctxt->progressive = XML_PARSER_COMMENT;
+ (!xmlParseLookupString(ctxt, 4, "-->", 3)))
goto done;
- }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
@@ -11960,15 +11988,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_PROLOG;
- ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
- if (ctxt->progressive == 0)
- ctxt->progressive = XML_PARSER_START_TAG;
- xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering START_TAG\n");
@@ -11988,10 +12012,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
- ctxt->progressive = XML_PARSER_PI;
+ (!xmlParseLookupString(ctxt, 2, "?>", 2)))
goto done;
- }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
@@ -12000,14 +12022,11 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_EPILOG;
- ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
- ctxt->progressive = XML_PARSER_COMMENT;
+ (!xmlParseLookupString(ctxt, 4, "-->", 3)))
goto done;
- }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
@@ -12016,7 +12035,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_EPILOG;
- ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
@@ -12033,117 +12051,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
}
break;
case XML_PARSER_DTD: {
- /*
- * Sorry but progressive parsing of the internal subset
- * is not expected to be supported. We first check that
- * the full content of the internal subset is available and
- * the parsing is launched only at that point.
- * Internal subset ends up with "']' S? '>'" in an unescaped
- * section and not in a ']]>' sequence which are conditional
- * sections (whoever argued to keep that crap in XML deserve
- * a place in hell !).
- */
- int base, i;
- xmlChar *buf;
- xmlChar quote = 0;
- size_t use;
-
- base = ctxt->input->cur - ctxt->input->base;
- if (base < 0) return(0);
- if (ctxt->checkIndex > base)
- base = ctxt->checkIndex;
- buf = xmlBufContent(ctxt->input->buf->buffer);
- use = xmlBufUse(ctxt->input->buf->buffer);
- for (;(unsigned int) base < use; base++) {
- if (quote != 0) {
- if (buf[base] == quote)
- quote = 0;
- continue;
- }
- if ((quote == 0) && (buf[base] == '<')) {
- int found = 0;
- /* special handling of comments */
- if (((unsigned int) base + 4 < use) &&
- (buf[base + 1] == '!') &&
- (buf[base + 2] == '-') &&
- (buf[base + 3] == '-')) {
- for (;(unsigned int) base + 3 < use; base++) {
- if ((buf[base] == '-') &&
- (buf[base + 1] == '-') &&
- (buf[base + 2] == '>')) {
- found = 1;
- base += 2;
- break;
- }
- }
- if (!found) {
-#if 0
- fprintf(stderr, "unfinished comment\n");
-#endif
- break; /* for */
- }
- continue;
- }
- }
- if (buf[base] == '"') {
- quote = '"';
- continue;
- }
- if (buf[base] == '\'') {
- quote = '\'';
- continue;
- }
- if (buf[base] == ']') {
-#if 0
- fprintf(stderr, "%c%c%c%c: ", buf[base],
- buf[base + 1], buf[base + 2], buf[base + 3]);
-#endif
- if ((unsigned int) base +1 >= use)
- break;
- if (buf[base + 1] == ']') {
- /* conditional crap, skip both ']' ! */
- base++;
- continue;
- }
- for (i = 1; (unsigned int) base + i < use; i++) {
- if (buf[base + i] == '>') {
-#if 0
- fprintf(stderr, "found\n");
-#endif
- goto found_end_int_subset;
- }
- if (!IS_BLANK_CH(buf[base + i])) {
-#if 0
- fprintf(stderr, "not found\n");
-#endif
- goto not_end_of_int_subset;
- }
- }
-#if 0
- fprintf(stderr, "end of stream\n");
-#endif
- break;
-
- }
-not_end_of_int_subset:
- continue; /* for */
- }
- /*
- * We didn't found the end of the Internal subset
- */
- if (quote == 0)
- ctxt->checkIndex = base;
- else
- ctxt->checkIndex = 0;
-#ifdef DEBUG_PUSH
- if (next == 0)
- xmlGenericError(xmlGenericErrorContext,
- "PP: lookup of int subset end filed\n");
-#endif
- goto done;
-
-found_end_int_subset:
- ctxt->checkIndex = 0;
+ if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
+ goto done;
xmlParseInternalSubset(ctxt);
if (ctxt->instate == XML_PARSER_EOF)
goto done;
@@ -12157,7 +12066,6 @@ found_end_int_subset:
if (ctxt->instate == XML_PARSER_EOF)
goto done;
ctxt->instate = XML_PARSER_PROLOG;
- ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: entering PROLOG\n");
@@ -12257,55 +12165,6 @@ encoding_error:
return(0);
}
-/**
- * xmlParseCheckTransition:
- * @ctxt: an XML parser context
- * @chunk: a char array
- * @size: the size in byte of the chunk
- *
- * Check depending on the current parser state if the chunk given must be
- * processed immediately or one need more data to advance on parsing.
- *
- * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
- */
-static int
-xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
- if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
- return(-1);
- if (ctxt->instate == XML_PARSER_START_TAG) {
- if (memchr(chunk, '>', size) != NULL)
- return(1);
- return(0);
- }
- if (ctxt->progressive == XML_PARSER_COMMENT) {
- if (memchr(chunk, '>', size) != NULL)
- return(1);
- return(0);
- }
- if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
- if (memchr(chunk, '>', size) != NULL)
- return(1);
- return(0);
- }
- if (ctxt->progressive == XML_PARSER_PI) {
- if (memchr(chunk, '>', size) != NULL)
- return(1);
- return(0);
- }
- if (ctxt->instate == XML_PARSER_END_TAG) {
- if (memchr(chunk, '>', size) != NULL)
- return(1);
- return(0);
- }
- if ((ctxt->progressive == XML_PARSER_DTD) ||
- (ctxt->instate == XML_PARSER_DTD)) {
- if (memchr(chunk, '>', size) != NULL)
- return(1);
- return(0);
- }
- return(1);
-}
-
/**
* xmlParseChunk:
* @ctxt: an XML parser context
@@ -12322,8 +12181,6 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
int terminate) {
int end_in_lf = 0;
int remain = 0;
- size_t old_avail = 0;
- size_t avail = 0;
if (ctxt == NULL)
return(XML_ERR_INTERNAL_ERROR);
@@ -12331,6 +12188,10 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
return(ctxt->errNo);
if (ctxt->instate == XML_PARSER_EOF)
return(-1);
+ if (ctxt->input == NULL)
+ return(-1);
+
+ ctxt->progressive = 1;
if (ctxt->instate == XML_PARSER_START)
xmlDetectSAX2(ctxt);
if ((size > 0) && (chunk != NULL) && (!terminate) &&
@@ -12347,7 +12208,6 @@ xmldecl_done:
size_t cur = ctxt->input->cur - ctxt->input->base;
int res;
- old_avail = xmlBufUse(ctxt->input->buf->buffer);
/*
* Specific handling if we autodetected an encoding, we should not
* push more than the first line ... which depend on the encoding
@@ -12415,23 +12275,11 @@ xmldecl_done:
}
}
}
+
if (remain != 0) {
xmlParseTryOrFinish(ctxt, 0);
} else {
- if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
- avail = xmlBufUse(ctxt->input->buf->buffer);
- /*
- * Depending on the current state it may not be such
- * a good idea to try parsing if there is nothing in the chunk
- * which would be worth doing a parser state transition and we
- * need to wait for more data
- */
- if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
- (old_avail == 0) || (avail == 0) ||
- (xmlParseCheckTransition(ctxt,
- (const char *)&ctxt->input->base[old_avail],
- avail - old_avail)))
- xmlParseTryOrFinish(ctxt, terminate);
+ xmlParseTryOrFinish(ctxt, terminate);
}
if (ctxt->instate == XML_PARSER_EOF)
return(ctxt->errNo);
@@ -14895,6 +14743,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
#endif
ctxt->record_info = 0;
ctxt->checkIndex = 0;
+ ctxt->endCheckState = 0;
ctxt->inSubset = 0;
ctxt->errNo = XML_ERR_OK;
ctxt->depth = 0;
diff --git a/result/errors/754946.xml.str b/result/errors/754946.xml.str
index 49395b61..7aaf045b 100644
--- a/result/errors/754946.xml.str
+++ b/result/errors/754946.xml.str
@@ -1,4 +1,15 @@
-./test/errors/754946.xml:1: parser error : Extra content at the end of the document
-
+ ^
+Entity: line 1: parser error : xmlParseEntityDecl: no name
+%zz;
+ ^
+Entity: line 1:
+
+ ^
+Entity: line 1: parser error : ParsePI: PI xDOCTYPEm space expected
+%zz;
+ ^
+Entity: line 1:
+
+ ^
+./test/errors/759573-2.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
+
+%xx;ÿggKENSMYNT#MENTDŴzz;'>
+ ^
+Entity: line 2:
+
+ ^
+./test/errors/759573-2.xml:6: parser error : DOCTYPE improperly terminated
+%xx;ÿggKENSMYNT#MENTDŴzz;'>
+ ^
./test/errors/759573-2.xml : failed to parse
diff --git a/result/errors/759573.xml.str b/result/errors/759573.xml.str
index 1b6addb7..432f66b2 100644
--- a/result/errors/759573.xml.str
+++ b/result/errors/759573.xml.str
@@ -1,4 +1,30 @@
-./test/errors/759573.xml:1: parser error : Extra content at the end of the document
-%xx;
+ ^
+Entity: line 1:
+%%xx;
+ ^
+Entity: line 1:
+%%xx;
+ ^
+./test/errors/759573.xml:1: parser error : DOCTYPE improperly terminated
+T t (A)>%xx;
+ ^
./test/errors/759573.xml : failed to parse