1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

Fix xmlParseInNodeContext for HTML content

xmlParseInNodeContext notices that the enclosing document is
an HTML document, so invoke the HTML parser for that fragment, and
the HTML parser finding a "<p>hello world!</p>" document automatically
augment it with defaulted <html> and <body>. This defaulting should
be turned off in the HTML parser for this to work, but there is no
such HTML parser option. There is an htmlOmittedDefaultValue global
variable that you could use, but really we should not rely on global
variable for processing options anymore, best is to add an
HTML_PARSE_NOIMPLIED.
* include/libxml/HTMLparser.h: add the HTML_PARSE_NOIMPLIED parser flag
* HTMLparser.c: do add implied element if HTML_PARSE_NOIMPLIED is set
* parser.c: add HTML_PARSE_NOIMPLIED to options for xmlParseInNodeContext
  on HTML documents
This commit is contained in:
Daniel Veillard
2010-01-29 20:47:08 +01:00
parent ddb01cbf61
commit e20fb5a72c
3 changed files with 10 additions and 1 deletions

View File

@ -1394,6 +1394,8 @@ static void
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
int i; int i;
if (ctxt->options & HTML_PARSE_NOIMPLIED)
return;
if (!htmlOmittedDefaultValue) if (!htmlOmittedDefaultValue)
return; return;
if (xmlStrEqual(newtag, BAD_CAST"html")) if (xmlStrEqual(newtag, BAD_CAST"html"))

View File

@ -182,6 +182,7 @@ typedef enum {
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
HTML_PARSE_NONET = 1<<11,/* Forbid network access */ HTML_PARSE_NONET = 1<<11,/* Forbid network access */
HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */ HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
} htmlParserOption; } htmlParserOption;

View File

@ -12870,8 +12870,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
if (doc->type == XML_DOCUMENT_NODE) if (doc->type == XML_DOCUMENT_NODE)
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
#ifdef LIBXML_HTML_ENABLED #ifdef LIBXML_HTML_ENABLED
else if (doc->type == XML_HTML_DOCUMENT_NODE) else if (doc->type == XML_HTML_DOCUMENT_NODE) {
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
/*
* When parsing in context, it makes no sense to add implied
* elements like html/body/etc...
*/
options |= HTML_PARSE_NOIMPLIED;
}
#endif #endif
else else
return(XML_ERR_INTERNAL_ERROR); return(XML_ERR_INTERNAL_ERROR);