From e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Fri, 29 Jan 2010 20:47:08 +0100 Subject: [PATCH] Fix xmlParseInNodeContext for HTML content xmlParseInNodeContext notices that the enclosing document is an HTML document, so invoke the HTML parser for that fragment, and the HTML parser finding a "

hello world!

" document automatically augment it with defaulted and . This defaulting should be turned off in the HTML parser for this to work, but there is no such HTML parser option. There is an htmlOmittedDefaultValue global variable that you could use, but really we should not rely on global variable for processing options anymore, best is to add an HTML_PARSE_NOIMPLIED. * include/libxml/HTMLparser.h: add the HTML_PARSE_NOIMPLIED parser flag * HTMLparser.c: do add implied element if HTML_PARSE_NOIMPLIED is set * parser.c: add HTML_PARSE_NOIMPLIED to options for xmlParseInNodeContext on HTML documents --- HTMLparser.c | 2 ++ include/libxml/HTMLparser.h | 1 + parser.c | 8 +++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/HTMLparser.c b/HTMLparser.c index 9e275a29..3d4831ce 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -1394,6 +1394,8 @@ static void htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { int i; + if (ctxt->options & HTML_PARSE_NOIMPLIED) + return; if (!htmlOmittedDefaultValue) return; if (xmlStrEqual(newtag, BAD_CAST"html")) diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index 05905e4b..cde0ac6d 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -182,6 +182,7 @@ typedef enum { HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ HTML_PARSE_NONET = 1<<11,/* Forbid network access */ + HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */ HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */ } htmlParserOption; diff --git a/parser.c b/parser.c index c779c1d0..a63c6682 100644 --- a/parser.c +++ b/parser.c @@ -12870,8 +12870,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, if (doc->type == XML_DOCUMENT_NODE) ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); #ifdef LIBXML_HTML_ENABLED - else if (doc->type == XML_HTML_DOCUMENT_NODE) + else if (doc->type == XML_HTML_DOCUMENT_NODE) { ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); + /* + * When parsing in context, it makes no sense to add implied + * elements like html/body/etc... + */ + options |= HTML_PARSE_NOIMPLIED; + } #endif else return(XML_ERR_INTERNAL_ERROR);