1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

Keep non-significant blanks node in HTML parser

For https://bugzilla.gnome.org/show_bug.cgi?id=681822

Regardless if the option HTML_PARSE_NOBLANKS is set or not, blank nodes
are removed from a HTML document, for example:

<html>
  <head>
    <title>This is a test.</title>
  </head>
  <body>
    <p>This is a test.</p>
  </body>
</html>

is read as:

<html><head><title>This is a test.</title></head><body>
    <p>This is a test.</p>
  </body></html>

This changes the default behaviour but the old behaviour is available
as expected when using the parser flag HTML_PARSE_NOBLANKS

Based on original patch from Igor Ignatyuk <igor_ignatiouk@hotmail.com>

* HTMLparser.c: change various places in the parser where ignorable_space
  SAX callback was called without checking for the parser flag preference
* xmllint.c: make sure we use the new flag even for HTML parsing
* result/HTML/*: this modifies the output of a number of tests
This commit is contained in:
Daniel Veillard
2012-09-07 19:32:12 +08:00
parent 878ec9db9d
commit f933c89813
29 changed files with 769 additions and 338 deletions

View File

@@ -2981,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData,
buf, nbchar);
if (ctxt->keepBlanks) {
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(ctxt->userData, buf, nbchar);
} else {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData,
buf, nbchar);
}
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -3014,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
if (ctxt->keepBlanks) {
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(ctxt->userData, buf, nbchar);
} else {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(ctxt->userData,
buf, nbchar);
}
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -5687,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((cur != '<') && (cur != '&')) {
if (ctxt->sax != NULL) {
if (IS_BLANK_CH(cur)) {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(
ctxt->userData, &cur, 1);
if (ctxt->keepBlanks) {
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(
ctxt->userData, &cur, 1);
} else {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(
ctxt->userData, &cur, 1);
}
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)