diff --git a/HTMLparser.c b/HTMLparser.c index be6e14a2..9c9237e2 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2441,33 +2441,36 @@ htmlSkipBogusComment(htmlParserCtxtPtr ctxt) { */ static const xmlChar * -htmlParseHTMLName(htmlParserCtxtPtr ctxt) { +htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) { const xmlChar *ret; - int i = 0; - xmlChar loc[HTML_PARSER_BUFFER_SIZE]; + int nbchar = 0; + int c, l; + int stop = attr ? '=' : 0; + xmlChar buf[HTML_PARSER_BUFFER_SIZE]; - if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && - (CUR != ':') && (CUR != '.')) return(NULL); + c = CUR_CHAR(l); + while ((c != 0) && (c != '/') && (c != '>') && + ((nbchar == 0) || (c != stop)) && + (!IS_BLANK_CH(c))) { + if (nbchar + l <= HTML_PARSER_BUFFER_SIZE) { + if ((c >= 'A') && (c <= 'Z')) { + buf[nbchar++] = c + 0x20; + } else { + COPY_BUF(buf, nbchar, c); + } + } - while ((i < HTML_PARSER_BUFFER_SIZE) && - ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || - (CUR == ':') || (CUR == '-') || (CUR == '_') || - (CUR == '.'))) { - if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; - else loc[i] = CUR; - i++; - - NEXT; + NEXTL(l); + c = CUR_CHAR(l); } - ret = xmlDictLookup(ctxt->dict, loc, i); + ret = xmlDictLookup(ctxt->dict, buf, nbchar); if (ret == NULL) htmlErrMemory(ctxt); return(ret); } - /** * htmlParseHTMLName_nonInvasive: * @ctxt: an HTML parser context @@ -2481,22 +2484,31 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) { static const xmlChar * htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) { + int nbchar = 0; int i = 0; - xmlChar loc[HTML_PARSER_BUFFER_SIZE]; + int c, l; + xmlChar buf[HTML_PARSER_BUFFER_SIZE]; const xmlChar *ret; + size_t avail = ctxt->input->end - ctxt->input->cur; - if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') && - (NXT(1) != ':')) return(NULL); + l = avail - i; + c = xmlGetUTF8Char(CUR_PTR + i, &l); + while ((c > 0) && (c != '/') && (c != '>') && + (!IS_BLANK_CH(c))) { + if (nbchar + l <= HTML_PARSER_BUFFER_SIZE) { + if ((c >= 'A') && (c <= 'Z')) { + buf[nbchar++] = c + 0x20; + } else { + COPY_BUF(buf, nbchar, c); + } + } - while ((i < HTML_PARSER_BUFFER_SIZE) && - ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) || - (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) { - if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20; - else loc[i] = NXT(1+i); - i++; + i += l; + l = avail - i; + c = xmlGetUTF8Char(CUR_PTR + i, &l); } - ret = xmlDictLookup(ctxt->dict, loc, i); + ret = xmlDictLookup(ctxt->dict, buf, nbchar); if (ret == NULL) htmlErrMemory(ctxt); @@ -3636,7 +3648,7 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { xmlChar *val = NULL; *value = NULL; - name = htmlParseHTMLName(ctxt); + name = htmlParseHTMLName(ctxt, 1); if (name == NULL) { htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, "error parsing attribute name\n", NULL, NULL); @@ -3777,7 +3789,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { maxatts = ctxt->maxatts; GROW; - name = htmlParseHTMLName(ctxt); + name = htmlParseHTMLName(ctxt, 0); if (name == NULL) { htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, "htmlParseStartTag: invalid element name\n", @@ -3970,7 +3982,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) } SKIP(2); - name = htmlParseHTMLName(ctxt); + name = htmlParseHTMLName(ctxt, 0); if (name == NULL) return (0); /* diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm index 8e45a5a4..0f3a8b79 100644 --- a/result/HTML/doc3.htm +++ b/result/HTML/doc3.htm @@ -97,7 +97,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,

-

Taking a first look at the Abit Linux release called
Gentus +

Taking a first look at the Abit Linux release called
Gentus

diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err index 5e2491da..bb9204eb 100644 --- a/result/HTML/doc3.htm.err +++ b/result/HTML/doc3.htm.err @@ -10,9 +10,6 @@ _top">

^ @@ -43,10 +40,10 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708">
./test/HTML/doc3.htm:795: HTML parser error : Unexpected end tag : iframe document.write("42DF8478957377>"); ^ -./test/HTML/doc3.htm:803: HTML parser error : End tag : expected '>' - document.write("DF8478957377>' + document.write("RIPT>"); + ^ +./test/HTML/doc3.htm:804: HTML parser error : Unexpected end tag : sc"); document.write("RIPT>"); ^ ./test/HTML/doc3.htm:811: HTML parser error : Unexpected end tag : a diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax index 9237ad3c..f2009a95 100644 --- a/result/HTML/doc3.htm.sax +++ b/result/HTML/doc3.htm.sax @@ -319,8 +319,7 @@ SAX.characters( , 26) SAX.startElement(p, align='center') SAX.startElement(a, href='http://www.gentus.com/') -SAX.error: error parsing attribute name -SAX.startElement(img, align='bottom', alt='Taking a first look at the Abit Linux release called ', border='0', height='45', src='doc3_files/gentusbox.gif', width='70', gentus) +SAX.startElement(img, align='bottom', alt='Taking a first look at the Abit Linux release called ', border='0', height='45', src='doc3_files/gentusbox.gif', width='70', gentus?.?) SAX.endElement(img) SAX.endElement(a) SAX.startElement(br) @@ -2698,7 +2697,7 @@ SAX.error: Unexpected end tag : iframe SAX.cdata("); } else if ((parseI, 463) SAX.error: End tag : expected '>' -SAX.error: Unexpected end tag : sc +SAX.error: Unexpected end tag : sc"); SAX.cdata("); } else { d, 328) diff --git a/result/HTML/wired.html b/result/HTML/wired.html index 916630d8..fef7a694 100644 --- a/result/HTML/wired.html +++ b/result/HTML/wired.html @@ -462,13 +462,13 @@ or PointCast
Making the Grade
Reading, writing, and ROM.
Sponsored by U of Phoenix


-Infostructure
An IS/IT resource
Sponsored by Sprint


+Infostructure
An IS/IT resource
Sponsored by Sprint

Y2K Watch
Tick... Tick... Tick...

More Hoo-Ha
 
-
+ diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err index 116bbd2f..6b8d8184 100644 --- a/result/HTML/wired.html.err +++ b/result/HTML/wired.html.err @@ -218,8 +218,14 @@ wired.com&BANNER=Sprint" style="text-decoration:none">Spri com&BANNER=Sprint" style="text-decoration:none">Sprint ^ ./test/HTML/wired.html:408: HTML parser error : End tag : expected '>' -=Sprint" style="text-decoration:none">SprintSprintSprint + ^ +./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font + + ^ ./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font ^ diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax index bb787656..ce51ed60 100644 --- a/result/HTML/wired.html.sax +++ b/result/HTML/wired.html.sax @@ -1961,7 +1961,7 @@ SAX.endElement(font) SAX.endElement(a) SAX.endElement(i) SAX.error: End tag : expected '>' -SAX.endElement(font) +SAX.error: Unexpected end tag : font< SAX.startElement(br) SAX.endElement(br) SAX.startElement(br) @@ -2024,6 +2024,8 @@ SAX.error: Opening and ending tag mismatch: td and font SAX.endElement(font) SAX.error: Opening and ending tag mismatch: td and font SAX.endElement(font) +SAX.error: Opening and ending tag mismatch: td and font +SAX.endElement(font) SAX.endElement(td) SAX.characters( , 1)