mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-28 00:21:53 +03:00
Fix xmlParseInNodeContext for HTML content
xmlParseInNodeContext notices that the enclosing document is an HTML document, so invoke the HTML parser for that fragment, and the HTML parser finding a "<p>hello world!</p>" document automatically augment it with defaulted <html> and <body>. This defaulting should be turned off in the HTML parser for this to work, but there is no such HTML parser option. There is an htmlOmittedDefaultValue global variable that you could use, but really we should not rely on global variable for processing options anymore, best is to add an HTML_PARSE_NOIMPLIED. * include/libxml/HTMLparser.h: add the HTML_PARSE_NOIMPLIED parser flag * HTMLparser.c: do add implied element if HTML_PARSE_NOIMPLIED is set * parser.c: add HTML_PARSE_NOIMPLIED to options for xmlParseInNodeContext on HTML documents
This commit is contained in:
@ -1394,6 +1394,8 @@ static void
|
|||||||
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
|
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (ctxt->options & HTML_PARSE_NOIMPLIED)
|
||||||
|
return;
|
||||||
if (!htmlOmittedDefaultValue)
|
if (!htmlOmittedDefaultValue)
|
||||||
return;
|
return;
|
||||||
if (xmlStrEqual(newtag, BAD_CAST"html"))
|
if (xmlStrEqual(newtag, BAD_CAST"html"))
|
||||||
|
@ -182,6 +182,7 @@ typedef enum {
|
|||||||
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
|
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
|
||||||
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
|
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
|
||||||
HTML_PARSE_NONET = 1<<11,/* Forbid network access */
|
HTML_PARSE_NONET = 1<<11,/* Forbid network access */
|
||||||
|
HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
|
||||||
HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
|
HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
|
||||||
} htmlParserOption;
|
} htmlParserOption;
|
||||||
|
|
||||||
|
8
parser.c
8
parser.c
@ -12870,8 +12870,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
|
|||||||
if (doc->type == XML_DOCUMENT_NODE)
|
if (doc->type == XML_DOCUMENT_NODE)
|
||||||
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
|
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
|
||||||
#ifdef LIBXML_HTML_ENABLED
|
#ifdef LIBXML_HTML_ENABLED
|
||||||
else if (doc->type == XML_HTML_DOCUMENT_NODE)
|
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
|
||||||
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
|
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
|
||||||
|
/*
|
||||||
|
* When parsing in context, it makes no sense to add implied
|
||||||
|
* elements like html/body/etc...
|
||||||
|
*/
|
||||||
|
options |= HTML_PARSE_NOIMPLIED;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return(XML_ERR_INTERNAL_ERROR);
|
return(XML_ERR_INTERNAL_ERROR);
|
||||||
|
Reference in New Issue
Block a user