From 446e126de5315702f1f9d04614af7aca42f36143 Mon Sep 17 00:00:00 2001 From: Jiri Netolicky Date: Fri, 7 Aug 2009 17:05:36 +0200 Subject: [PATCH] =?UTF-8?q?576368=20=E2=80=93=20htmlChunkParser=20with=20s?= =?UTF-8?q?pecial=20attributes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * HTMLparser.c: htmlChunkParsing failed when the chunk ends inside element after some attribute which has a '>' char in its value. --- HTMLparser.c | 53 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 5e44d7ad..da17efe5 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -4521,11 +4521,14 @@ htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) { */ static int htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, - xmlChar next, xmlChar third, int iscomment) { + xmlChar next, xmlChar third, int iscomment, + int ignoreattrval) { int base, len; htmlParserInputPtr in; const xmlChar *buf; int incomment = 0; + int invalue = 0; + char valdellim = 0x0; in = ctxt->input; if (in == NULL) return(-1); @@ -4552,6 +4555,22 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, base += 2; } } + if (ignoreattrval) { + if (buf[base] == '"' || buf[base] == '\'') { + if (invalue) { + if (buf[base] == valdellim) { + invalue = 0; + continue; + } + } else { + valdellim = buf[base]; + invalue = 1; + continue; + } + } else if (invalue) { + continue; + } + } if (incomment) { if (base + 3 > len) return(-1); @@ -4731,7 +4750,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4764,7 +4783,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) + (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4774,7 +4793,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4788,7 +4807,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4824,7 +4843,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) + (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4834,7 +4853,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4871,7 +4890,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0)) + (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4881,7 +4900,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -4931,7 +4950,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { break; } if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; failed = htmlParseStartTag(ctxt); @@ -5056,7 +5075,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { int idx; xmlChar val; - idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0); + idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); if (idx < 0) goto done; val = in->cur[idx + 2]; @@ -5083,7 +5102,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, "Misplaced DOCTYPE declaration\n", @@ -5093,7 +5112,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (in->cur[2] == '-') && (in->cur[3] == '-')) { if ((!terminate) && (htmlParseLookupSequence( - ctxt, '-', '-', '>', 1) < 0)) + ctxt, '-', '-', '>', 1, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5103,7 +5122,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_CONTENT; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5131,7 +5150,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { break; } else if (cur == '&') { if ((!terminate) && - (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, ';', 0, 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5147,7 +5166,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { * data detection. */ if ((!terminate) && - (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '<', 0, 0, 0, 1) < 0)) goto done; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH @@ -5173,7 +5192,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (avail < 2) goto done; if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) goto done; htmlParseEndTag(ctxt); if (ctxt->nameNr == 0) {