mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-28 00:21:53 +03:00
patch from Arnold Hendriks improving parsing of html within html bogus
* HTMLparser.c: patch from Arnold Hendriks improving parsing of html within html bogus data, still not a complete fix though Daniel svn path=/trunk/; revision=3704
This commit is contained in:
@ -1,3 +1,8 @@
|
|||||||
|
Wed Mar 12 18:56:22 CET 2008 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* HTMLparser.c: patch from Arnold Hendriks improving parsing of
|
||||||
|
html within html bogus data, still not a complete fix though
|
||||||
|
|
||||||
Wed Mar 12 10:22:01 CET 2008 Daniel Veillard <daniel@veillard.com>
|
Wed Mar 12 10:22:01 CET 2008 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* python/types.c: fix a memory errro when using namespace nodes
|
* python/types.c: fix a memory errro when using namespace nodes
|
||||||
|
31
HTMLparser.c
31
HTMLparser.c
@ -3423,7 +3423,7 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
|
|||||||
*
|
*
|
||||||
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
|
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
|
||||||
*
|
*
|
||||||
* Returns 0 in case of success and -1 in case of error.
|
* Returns 0 in case of success, -1 in case of error and 1 if discarded
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -3436,6 +3436,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
int maxatts;
|
int maxatts;
|
||||||
int meta = 0;
|
int meta = 0;
|
||||||
int i;
|
int i;
|
||||||
|
int discardtag = 0;
|
||||||
|
|
||||||
if ((ctxt == NULL) || (ctxt->input == NULL)) {
|
if ((ctxt == NULL) || (ctxt->input == NULL)) {
|
||||||
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
|
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
|
||||||
@ -3480,14 +3481,14 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"htmlParseStartTag: misplaced <html> tag\n",
|
"htmlParseStartTag: misplaced <html> tag\n",
|
||||||
name, NULL);
|
name, NULL);
|
||||||
return 0;
|
discardtag = 1;
|
||||||
}
|
}
|
||||||
if ((ctxt->nameNr != 1) &&
|
if ((ctxt->nameNr != 1) &&
|
||||||
(xmlStrEqual(name, BAD_CAST"head"))) {
|
(xmlStrEqual(name, BAD_CAST"head"))) {
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"htmlParseStartTag: misplaced <head> tag\n",
|
"htmlParseStartTag: misplaced <head> tag\n",
|
||||||
name, NULL);
|
name, NULL);
|
||||||
return 0;
|
discardtag = 1;
|
||||||
}
|
}
|
||||||
if (xmlStrEqual(name, BAD_CAST"body")) {
|
if (xmlStrEqual(name, BAD_CAST"body")) {
|
||||||
int indx;
|
int indx;
|
||||||
@ -3496,9 +3497,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"htmlParseStartTag: misplaced <body> tag\n",
|
"htmlParseStartTag: misplaced <body> tag\n",
|
||||||
name, NULL);
|
name, NULL);
|
||||||
while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
|
discardtag = 1;
|
||||||
NEXT;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3597,12 +3596,14 @@ failed:
|
|||||||
/*
|
/*
|
||||||
* SAX: Start of Element !
|
* SAX: Start of Element !
|
||||||
*/
|
*/
|
||||||
htmlnamePush(ctxt, name);
|
if (!discardtag) {
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
|
htmlnamePush(ctxt, name);
|
||||||
if (nbatts != 0)
|
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
|
||||||
ctxt->sax->startElement(ctxt->userData, name, atts);
|
if (nbatts != 0)
|
||||||
else
|
ctxt->sax->startElement(ctxt->userData, name, atts);
|
||||||
ctxt->sax->startElement(ctxt->userData, name, NULL);
|
else
|
||||||
|
ctxt->sax->startElement(ctxt->userData, name, NULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (atts != NULL) {
|
if (atts != NULL) {
|
||||||
@ -3612,7 +3613,7 @@ failed:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return(discardtag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -3991,7 +3992,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
|
|||||||
|
|
||||||
failed = htmlParseStartTag(ctxt);
|
failed = htmlParseStartTag(ctxt);
|
||||||
name = ctxt->name;
|
name = ctxt->name;
|
||||||
if (failed || (name == NULL)) {
|
if ((failed == -1) || (name == NULL)) {
|
||||||
if (CUR == '>')
|
if (CUR == '>')
|
||||||
NEXT;
|
NEXT;
|
||||||
return;
|
return;
|
||||||
@ -4893,7 +4894,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
|
|
||||||
failed = htmlParseStartTag(ctxt);
|
failed = htmlParseStartTag(ctxt);
|
||||||
name = ctxt->name;
|
name = ctxt->name;
|
||||||
if (failed ||
|
if ((failed == -1) ||
|
||||||
(name == NULL)) {
|
(name == NULL)) {
|
||||||
if (CUR == '>')
|
if (CUR == '>')
|
||||||
NEXT;
|
NEXT;
|
||||||
|
Reference in New Issue
Block a user