From 890fd9f9f3ed4354a6ec34d075537b732bea075a Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Fri, 27 Oct 2006 12:53:28 +0000 Subject: [PATCH] applied a reworked version of Usamah Malik patch to avoid growing the * HTMLparser.c: applied a reworked version of Usamah Malik patch to avoid growing the parser stack in some autoclose cases, should fix #361221 Daniel --- ChangeLog | 6 ++++++ HTMLparser.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5a714f3a..8f5fb77d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Fri Oct 27 14:54:07 CEST 2006 Daniel Veillard + + * HTMLparser.c: applied a reworked version of Usamah Malik patch + to avoid growing the parser stack in some autoclose cases, should + fix #361221 + Thu Oct 26 10:54:40 CEST 2006 Daniel Veillard * xpath.c: William spotted an obvious bug diff --git a/HTMLparser.c b/HTMLparser.c index 5e23ad72..f2d1bd2d 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2205,6 +2205,38 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) { return(xmlDictLookup(ctxt->dict, loc, i)); } + +/** + * htmlParseHTMLName_nonInvasive: + * @ctxt: an HTML parser context + * + * parse an HTML tag or attribute name, note that we convert it to lowercase + * since HTML names are not case-sensitive, this doesn't consume the data + * from the stream, it's a look-ahead + * + * Returns the Tag Name parsed or NULL + */ + +static const xmlChar * +htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { + int i = 0; + xmlChar loc[HTML_PARSER_BUFFER_SIZE]; + + if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && + (NXT(1) != ':')) return(NULL); + + while ((i < HTML_PARSER_BUFFER_SIZE) && + ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || + (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { + if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; + else loc[i] = NXT(1+i); + i++; + } + + return(xmlDictLookup(ctxt->dict, loc, i)); +} + + /** * htmlParseName: * @ctxt: an HTML parser context @@ -3775,6 +3807,7 @@ static void htmlParseContent(htmlParserCtxtPtr ctxt) { xmlChar *currentNode; int depth; + const xmlChar *name; currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; @@ -3795,6 +3828,31 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { continue; /* while */ } + else if ((CUR == '<') && + ((IS_ASCII_LETTER(NXT(1))) || + (NXT(1) == '_') || (NXT(1) == ':'))) { + name = htmlParseHTMLName_nonInvasive(ctxt); + if (name == NULL) { + htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, + "htmlParseStartTag: invalid element name\n", + NULL, NULL); + /* Dump the bogus tag like browsers do */ + while ((IS_CHAR_CH(CUR)) && (CUR != '>')) + NEXT; + + if (currentNode != NULL) + xmlFree(currentNode); + return; + } + + if (ctxt->name != NULL) { + if (htmlCheckAutoClose(name, ctxt->name) == 1) { + htmlAutoClose(ctxt, name); + continue; + } + } + } + /* * Has this node been popped out during parsing of * the next element