From 9efe141422f062685eb13c9742d0010de1a31ba8 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Fri, 31 Jan 2025 13:07:35 +0100 Subject: [PATCH] parser: Fix detection of ']]>' when push-parsing Fixes #850. --- parser.c | 15 +++++++++---- testparser.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/parser.c b/parser.c index 1b8933eb..ccfa9e53 100644 --- a/parser.c +++ b/parser.c @@ -4955,9 +4955,11 @@ get_more: ctxt->input->cur = in + 1; return; } - in++; - ctxt->input->col++; - goto get_more; + if ((!partial) || (ctxt->input->end - in >= 2)) { + in++; + ctxt->input->col++; + goto get_more; + } } nbchar = in - ctxt->input->cur; if (nbchar > 0) { @@ -5008,6 +5010,9 @@ get_more: if (*in == '&') { return; } + if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) { + return; + } SHRINK; GROW; in = ctxt->input->cur; @@ -5038,6 +5043,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { cur = xmlCurrentCharRecover(ctxt, &l); while ((cur != '<') && /* checked */ (cur != '&') && + ((!partial) || (cur != ']') || + (ctxt->input->end - ctxt->input->cur >= 2)) && (IS_CHAR(cur))) { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); @@ -5102,7 +5109,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { "Incomplete UTF-8 sequence starting with %02X\n", CUR); NEXTL(1); } - } else if ((cur != '<') && (cur != '&')) { + } else if ((cur != '<') && (cur != '&') && (cur != ']')) { /* Generate the error and skip the offending character */ xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, "PCDATA invalid Char value %d\n", cur); diff --git a/testparser.c b/testparser.c index 561f644f..a59b3e53 100644 --- a/testparser.c +++ b/testparser.c @@ -372,7 +372,63 @@ testHugeEncodedChunk(void) { return err; } -#endif + +static int +testPushCDataEnd(void) { + int err = 0; + int k; + + for (k = 0; k < 2; k++) { + xmlBufferPtr buf; + xmlChar *chunk; + xmlParserCtxtPtr ctxt; + int i; + + ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL); + xmlCtxtSetOptions(ctxt, XML_PARSE_NOERROR); + + /* + * Push parse text data with ']]>' split across chunks. + */ + buf = xmlBufferCreate(); + xmlBufferCCat(buf, ""); + + /* + * Also test xmlParseCharDataCopmlex + */ + if (k == 0) + xmlBufferCCat(buf, "x"); + else + xmlBufferCCat(buf, "\xC3\xA4"); + + /* + * Create enough data to trigger a "characters" SAX callback. + * (XML_PARSER_BIG_BUFFER_SIZE = 300) + */ + for (i = 0; i < 2000; i++) + xmlBufferCCat(buf, "x"); + + xmlBufferCCat(buf, "]"); + chunk = xmlBufferDetach(buf); + xmlBufferFree(buf); + + xmlParseChunk(ctxt, (char *) chunk, xmlStrlen(chunk), 0); + xmlParseChunk(ctxt, "]>xxx", 11, 1); + + if (ctxt->errNo != XML_ERR_MISPLACED_CDATA_END) { + fprintf(stderr, "xmlParseChunk failed to detect CData end: %d\n", + ctxt->errNo); + err = 1; + } + + xmlFree(chunk); + xmlFreeDoc(ctxt->myDoc); + xmlFreeParserCtxt(ctxt); + } + + return err; +} +#endif /* PUSH */ #ifdef LIBXML_HTML_ENABLED static int @@ -999,6 +1055,7 @@ main(void) { #ifdef LIBXML_PUSH_ENABLED err |= testHugePush(); err |= testHugeEncodedChunk(); + err |= testPushCDataEnd(); #endif #ifdef LIBXML_HTML_ENABLED err |= testHtmlIds();