1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

patch from Arnold Hendriks improving parsing of html within html bogus

* HTMLparser.c: patch from Arnold Hendriks improving parsing of
  html within html bogus data, still not a complete fix though
Daniel

svn path=/trunk/; revision=3704
This commit is contained in:
Daniel Veillard
2008-03-12 21:43:39 +00:00
parent 5d279c95f8
commit 35fcbb84d2
2 changed files with 21 additions and 15 deletions

View File

@ -1,3 +1,8 @@
Wed Mar 12 18:56:22 CET 2008 Daniel Veillard <daniel@veillard.com>
* HTMLparser.c: patch from Arnold Hendriks improving parsing of
html within html bogus data, still not a complete fix though
Wed Mar 12 10:22:01 CET 2008 Daniel Veillard <daniel@veillard.com> Wed Mar 12 10:22:01 CET 2008 Daniel Veillard <daniel@veillard.com>
* python/types.c: fix a memory errro when using namespace nodes * python/types.c: fix a memory errro when using namespace nodes

View File

@ -3423,7 +3423,7 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
* *
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
* *
* Returns 0 in case of success and -1 in case of error. * Returns 0 in case of success, -1 in case of error and 1 if discarded
*/ */
static int static int
@ -3436,6 +3436,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
int maxatts; int maxatts;
int meta = 0; int meta = 0;
int i; int i;
int discardtag = 0;
if ((ctxt == NULL) || (ctxt->input == NULL)) { if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
@ -3480,14 +3481,14 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <html> tag\n", "htmlParseStartTag: misplaced <html> tag\n",
name, NULL); name, NULL);
return 0; discardtag = 1;
} }
if ((ctxt->nameNr != 1) && if ((ctxt->nameNr != 1) &&
(xmlStrEqual(name, BAD_CAST"head"))) { (xmlStrEqual(name, BAD_CAST"head"))) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <head> tag\n", "htmlParseStartTag: misplaced <head> tag\n",
name, NULL); name, NULL);
return 0; discardtag = 1;
} }
if (xmlStrEqual(name, BAD_CAST"body")) { if (xmlStrEqual(name, BAD_CAST"body")) {
int indx; int indx;
@ -3496,9 +3497,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <body> tag\n", "htmlParseStartTag: misplaced <body> tag\n",
name, NULL); name, NULL);
while ((IS_CHAR_CH(CUR)) && (CUR != '>')) discardtag = 1;
NEXT;
return 0;
} }
} }
} }
@ -3597,12 +3596,14 @@ failed:
/* /*
* SAX: Start of Element ! * SAX: Start of Element !
*/ */
htmlnamePush(ctxt, name); if (!discardtag) {
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) { htmlnamePush(ctxt, name);
if (nbatts != 0) if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
ctxt->sax->startElement(ctxt->userData, name, atts); if (nbatts != 0)
else ctxt->sax->startElement(ctxt->userData, name, atts);
ctxt->sax->startElement(ctxt->userData, name, NULL); else
ctxt->sax->startElement(ctxt->userData, name, NULL);
}
} }
if (atts != NULL) { if (atts != NULL) {
@ -3612,7 +3613,7 @@ failed:
} }
} }
return 0; return(discardtag);
} }
/** /**
@ -3991,7 +3992,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
failed = htmlParseStartTag(ctxt); failed = htmlParseStartTag(ctxt);
name = ctxt->name; name = ctxt->name;
if (failed || (name == NULL)) { if ((failed == -1) || (name == NULL)) {
if (CUR == '>') if (CUR == '>')
NEXT; NEXT;
return; return;
@ -4893,7 +4894,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
failed = htmlParseStartTag(ctxt); failed = htmlParseStartTag(ctxt);
name = ctxt->name; name = ctxt->name;
if (failed || if ((failed == -1) ||
(name == NULL)) { (name == NULL)) {
if (CUR == '>') if (CUR == '>')
NEXT; NEXT;