mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
html: Parse tag and attribute names according to HTML5
HTML5 allows bascially all characters in tag and attribute names.
This commit is contained in:
74
HTMLparser.c
74
HTMLparser.c
@@ -2441,33 +2441,36 @@ htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static const xmlChar *
|
static const xmlChar *
|
||||||
htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
|
htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) {
|
||||||
const xmlChar *ret;
|
const xmlChar *ret;
|
||||||
int i = 0;
|
int nbchar = 0;
|
||||||
xmlChar loc[HTML_PARSER_BUFFER_SIZE];
|
int c, l;
|
||||||
|
int stop = attr ? '=' : 0;
|
||||||
|
xmlChar buf[HTML_PARSER_BUFFER_SIZE];
|
||||||
|
|
||||||
if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
|
c = CUR_CHAR(l);
|
||||||
(CUR != ':') && (CUR != '.')) return(NULL);
|
while ((c != 0) && (c != '/') && (c != '>') &&
|
||||||
|
((nbchar == 0) || (c != stop)) &&
|
||||||
while ((i < HTML_PARSER_BUFFER_SIZE) &&
|
(!IS_BLANK_CH(c))) {
|
||||||
((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
|
if (nbchar + l <= HTML_PARSER_BUFFER_SIZE) {
|
||||||
(CUR == ':') || (CUR == '-') || (CUR == '_') ||
|
if ((c >= 'A') && (c <= 'Z')) {
|
||||||
(CUR == '.'))) {
|
buf[nbchar++] = c + 0x20;
|
||||||
if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
|
} else {
|
||||||
else loc[i] = CUR;
|
COPY_BUF(buf, nbchar, c);
|
||||||
i++;
|
}
|
||||||
|
|
||||||
NEXT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = xmlDictLookup(ctxt->dict, loc, i);
|
NEXTL(l);
|
||||||
|
c = CUR_CHAR(l);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = xmlDictLookup(ctxt->dict, buf, nbchar);
|
||||||
if (ret == NULL)
|
if (ret == NULL)
|
||||||
htmlErrMemory(ctxt);
|
htmlErrMemory(ctxt);
|
||||||
|
|
||||||
return(ret);
|
return(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseHTMLName_nonInvasive:
|
* htmlParseHTMLName_nonInvasive:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
@@ -2481,22 +2484,31 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
|
|||||||
|
|
||||||
static const xmlChar *
|
static const xmlChar *
|
||||||
htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
|
htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
|
||||||
|
int nbchar = 0;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
xmlChar loc[HTML_PARSER_BUFFER_SIZE];
|
int c, l;
|
||||||
|
xmlChar buf[HTML_PARSER_BUFFER_SIZE];
|
||||||
const xmlChar *ret;
|
const xmlChar *ret;
|
||||||
|
size_t avail = ctxt->input->end - ctxt->input->cur;
|
||||||
|
|
||||||
if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&
|
l = avail - i;
|
||||||
(NXT(1) != ':')) return(NULL);
|
c = xmlGetUTF8Char(CUR_PTR + i, &l);
|
||||||
|
while ((c > 0) && (c != '/') && (c != '>') &&
|
||||||
while ((i < HTML_PARSER_BUFFER_SIZE) &&
|
(!IS_BLANK_CH(c))) {
|
||||||
((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) ||
|
if (nbchar + l <= HTML_PARSER_BUFFER_SIZE) {
|
||||||
(NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) {
|
if ((c >= 'A') && (c <= 'Z')) {
|
||||||
if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;
|
buf[nbchar++] = c + 0x20;
|
||||||
else loc[i] = NXT(1+i);
|
} else {
|
||||||
i++;
|
COPY_BUF(buf, nbchar, c);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = xmlDictLookup(ctxt->dict, loc, i);
|
i += l;
|
||||||
|
l = avail - i;
|
||||||
|
c = xmlGetUTF8Char(CUR_PTR + i, &l);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = xmlDictLookup(ctxt->dict, buf, nbchar);
|
||||||
if (ret == NULL)
|
if (ret == NULL)
|
||||||
htmlErrMemory(ctxt);
|
htmlErrMemory(ctxt);
|
||||||
|
|
||||||
@@ -3636,7 +3648,7 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
|
|||||||
xmlChar *val = NULL;
|
xmlChar *val = NULL;
|
||||||
|
|
||||||
*value = NULL;
|
*value = NULL;
|
||||||
name = htmlParseHTMLName(ctxt);
|
name = htmlParseHTMLName(ctxt, 1);
|
||||||
if (name == NULL) {
|
if (name == NULL) {
|
||||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
||||||
"error parsing attribute name\n", NULL, NULL);
|
"error parsing attribute name\n", NULL, NULL);
|
||||||
@@ -3777,7 +3789,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
maxatts = ctxt->maxatts;
|
maxatts = ctxt->maxatts;
|
||||||
|
|
||||||
GROW;
|
GROW;
|
||||||
name = htmlParseHTMLName(ctxt);
|
name = htmlParseHTMLName(ctxt, 0);
|
||||||
if (name == NULL) {
|
if (name == NULL) {
|
||||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
||||||
"htmlParseStartTag: invalid element name\n",
|
"htmlParseStartTag: invalid element name\n",
|
||||||
@@ -3970,7 +3982,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
|||||||
}
|
}
|
||||||
SKIP(2);
|
SKIP(2);
|
||||||
|
|
||||||
name = htmlParseHTMLName(ctxt);
|
name = htmlParseHTMLName(ctxt, 0);
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
return (0);
|
return (0);
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
|
|||||||
</p>
|
</p>
|
||||||
</td>
|
</td>
|
||||||
<td valign="top" width="15%">
|
<td valign="top" width="15%">
|
||||||
<p align="center"><a href="http://www.gentus.com/"><img align="bottom" alt="Taking a first look at the Abit Linux release called " border="0" height="45" src="doc3_files/gentusbox.gif" width="70" gentus></a><br><a href="http://www.gentus.com/"><font color="white" face="Verdana" size="1">Gentus</font></a>
|
<p align="center"><a href="http://www.gentus.com/"><img align="bottom" alt="Taking a first look at the Abit Linux release called " border="0" height="45" src="doc3_files/gentusbox.gif" width="70" gentus?.?></a><br><a href="http://www.gentus.com/"><font color="white" face="Verdana" size="1">Gentus</font></a>
|
||||||
</p>
|
</p>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|||||||
@@ -10,9 +10,6 @@ _top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media
|
|||||||
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
./test/HTML/doc3.htm:52: HTML parser error : htmlParseEntityRef: expecting ';'
|
||||||
><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&id
|
><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&id
|
||||||
^
|
^
|
||||||
./test/HTML/doc3.htm:145: HTML parser error : error parsing attribute name
|
|
||||||
width=70 Gentus?.?></A><BR><A
|
|
||||||
^
|
|
||||||
./test/HTML/doc3.htm:148: HTML parser error : Unexpected end tag : p
|
./test/HTML/doc3.htm:148: HTML parser error : Unexpected end tag : p
|
||||||
</P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P>
|
</P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P>
|
||||||
^
|
^
|
||||||
@@ -43,10 +40,10 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT>
|
|||||||
./test/HTML/doc3.htm:795: HTML parser error : Unexpected end tag : iframe
|
./test/HTML/doc3.htm:795: HTML parser error : Unexpected end tag : iframe
|
||||||
document.write("42DF8478957377></IFRAME>");
|
document.write("42DF8478957377></IFRAME>");
|
||||||
^
|
^
|
||||||
./test/HTML/doc3.htm:803: HTML parser error : End tag : expected '>'
|
./test/HTML/doc3.htm:804: HTML parser error : End tag : expected '>'
|
||||||
document.write("DF8478957377></SC");
|
document.write("RIPT>");
|
||||||
^
|
^
|
||||||
./test/HTML/doc3.htm:804: HTML parser error : Unexpected end tag : sc
|
./test/HTML/doc3.htm:804: HTML parser error : Unexpected end tag : sc");
|
||||||
document.write("RIPT>");
|
document.write("RIPT>");
|
||||||
^
|
^
|
||||||
./test/HTML/doc3.htm:811: HTML parser error : Unexpected end tag : a
|
./test/HTML/doc3.htm:811: HTML parser error : Unexpected end tag : a
|
||||||
|
|||||||
@@ -319,8 +319,7 @@ SAX.characters(
|
|||||||
, 26)
|
, 26)
|
||||||
SAX.startElement(p, align='center')
|
SAX.startElement(p, align='center')
|
||||||
SAX.startElement(a, href='http://www.gentus.com/')
|
SAX.startElement(a, href='http://www.gentus.com/')
|
||||||
SAX.error: error parsing attribute name
|
SAX.startElement(img, align='bottom', alt='Taking a first look at the Abit Linux release called ', border='0', height='45', src='doc3_files/gentusbox.gif', width='70', gentus?.?)
|
||||||
SAX.startElement(img, align='bottom', alt='Taking a first look at the Abit Linux release called ', border='0', height='45', src='doc3_files/gentusbox.gif', width='70', gentus)
|
|
||||||
SAX.endElement(img)
|
SAX.endElement(img)
|
||||||
SAX.endElement(a)
|
SAX.endElement(a)
|
||||||
SAX.startElement(br)
|
SAX.startElement(br)
|
||||||
@@ -2698,7 +2697,7 @@ SAX.error: Unexpected end tag : iframe
|
|||||||
if ((, 532)
|
if ((, 532)
|
||||||
SAX.error: Unexpected end tag : iframe
|
SAX.error: Unexpected end tag : iframe
|
||||||
SAX.cdata(");
|
SAX.cdata(");
|
||||||
} else if ((parseI, 463)
|
} else if ((parseI, 463)
|
||||||
SAX.error: End tag : expected '>'
|
SAX.error: End tag : expected '>'
|
||||||
SAX.error: Unexpected end tag : sc");
|
SAX.error: Unexpected end tag : sc");
|
||||||
SAX.cdata(");
|
SAX.cdata(");
|
||||||
|
|||||||
@@ -462,13 +462,13 @@ or <a href="/news/pointcast/0,1366,,00.html">PointCast</a></font><br>
|
|||||||
|
|
||||||
<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/school/0,1383,,00.html">Making the Grade</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Reading, writing, and ROM. <br><i>Sponsored by <a href="http://r.hotwired.com/r/wn_sch_r_nav_uop/http://ads25.focalink.com/SmartBanner/page?12630.53" style="text-decoration:none"><font color="#000000">U of Phoenix</font></a></i></font><br><br>
|
<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/school/0,1383,,00.html">Making the Grade</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Reading, writing, and ROM. <br><i>Sponsored by <a href="http://r.hotwired.com/r/wn_sch_r_nav_uop/http://ads25.focalink.com/SmartBanner/page?12630.53" style="text-decoration:none"><font color="#000000">U of Phoenix</font></a></i></font><br><br>
|
||||||
|
|
||||||
<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/infostructure/0,1377,,00.html">Infostructure</a></b></font><br><font size="1" face="Arial, Helvetica, sans-serif" color="#000000">An IS/IT resource <br><i>Sponsored by <a href="http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s%0Aervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=%0Awired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</font></a></i></font><br><br>
|
<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/infostructure/0,1377,,00.html">Infostructure</a></b></font><br><font size="1" face="Arial, Helvetica, sans-serif" color="#000000">An IS/IT resource <br><i>Sponsored by <a href="http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s%0Aervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=%0Awired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</font></a></i><br><br>
|
||||||
|
|
||||||
<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/y2k/0,1360,,00.html">Y2K Watch</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Tick... Tick... Tick...</font><br><br>
|
<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/y2k/0,1360,,00.html">Y2K Watch</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Tick... Tick... Tick...</font><br><br>
|
||||||
|
|
||||||
<font face="Arial, Helvetica, sans-serif" size="2"><b><i><a href="/news/special_reports/1,1293,,00.html">More Hoo-Ha</a></i></b></font><br> <br>
|
<font face="Arial, Helvetica, sans-serif" size="2"><b><i><a href="/news/special_reports/1,1293,,00.html">More Hoo-Ha</a></i></b></font><br> <br>
|
||||||
|
|
||||||
</font></font></font></font></font></font></font></font></font>
|
</font></font></font></font></font></font></font></font></font></font>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<!-- start of Gen News -->
|
<!-- start of Gen News -->
|
||||||
|
|||||||
@@ -218,7 +218,13 @@ wired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Spri
|
|||||||
com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
|
com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
|
||||||
^
|
^
|
||||||
./test/HTML/wired.html:408: HTML parser error : End tag : expected '>'
|
./test/HTML/wired.html:408: HTML parser error : End tag : expected '>'
|
||||||
=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a></i></font
|
Sprint" style="text-decoration:none"><font color="#000000">Sprint</a></i></font<
|
||||||
|
^
|
||||||
|
./test/HTML/wired.html:408: HTML parser error : Unexpected end tag : font<
|
||||||
|
" style="text-decoration:none"><font color="#000000">Sprint</a></i></font</font>
|
||||||
|
^
|
||||||
|
./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
|
||||||
|
</td>
|
||||||
^
|
^
|
||||||
./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
|
./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
|
||||||
</td>
|
</td>
|
||||||
|
|||||||
@@ -1961,7 +1961,7 @@ SAX.endElement(font)
|
|||||||
SAX.endElement(a)
|
SAX.endElement(a)
|
||||||
SAX.endElement(i)
|
SAX.endElement(i)
|
||||||
SAX.error: End tag : expected '>'
|
SAX.error: End tag : expected '>'
|
||||||
SAX.endElement(font)
|
SAX.error: Unexpected end tag : font<
|
||||||
SAX.startElement(br)
|
SAX.startElement(br)
|
||||||
SAX.endElement(br)
|
SAX.endElement(br)
|
||||||
SAX.startElement(br)
|
SAX.startElement(br)
|
||||||
@@ -2024,6 +2024,8 @@ SAX.error: Opening and ending tag mismatch: td and font
|
|||||||
SAX.endElement(font)
|
SAX.endElement(font)
|
||||||
SAX.error: Opening and ending tag mismatch: td and font
|
SAX.error: Opening and ending tag mismatch: td and font
|
||||||
SAX.endElement(font)
|
SAX.endElement(font)
|
||||||
|
SAX.error: Opening and ending tag mismatch: td and font
|
||||||
|
SAX.endElement(font)
|
||||||
SAX.endElement(td)
|
SAX.endElement(td)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 1)
|
, 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user