mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
Keep non-significant blanks node in HTML parser
For https://bugzilla.gnome.org/show_bug.cgi?id=681822 Regardless if the option HTML_PARSE_NOBLANKS is set or not, blank nodes are removed from a HTML document, for example: <html> <head> <title>This is a test.</title> </head> <body> <p>This is a test.</p> </body> </html> is read as: <html><head><title>This is a test.</title></head><body> <p>This is a test.</p> </body></html> This changes the default behaviour but the old behaviour is available as expected when using the parser flag HTML_PARSE_NOBLANKS Based on original patch from Igor Ignatyuk <igor_ignatiouk@hotmail.com> * HTMLparser.c: change various places in the parser where ignorable_space SAX callback was called without checking for the parser flag preference * xmllint.c: make sure we use the new flag even for HTML parsing * result/HTML/*: this modifies the output of a number of tests
This commit is contained in:
33
HTMLparser.c
33
HTMLparser.c
@@ -2981,9 +2981,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
||||
*/
|
||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
||||
if (areBlanks(ctxt, buf, nbchar)) {
|
||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||
ctxt->sax->ignorableWhitespace(ctxt->userData,
|
||||
buf, nbchar);
|
||||
if (ctxt->keepBlanks) {
|
||||
if (ctxt->sax->characters != NULL)
|
||||
ctxt->sax->characters(ctxt->userData, buf, nbchar);
|
||||
} else {
|
||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||
ctxt->sax->ignorableWhitespace(ctxt->userData,
|
||||
buf, nbchar);
|
||||
}
|
||||
} else {
|
||||
htmlCheckParagraph(ctxt);
|
||||
if (ctxt->sax->characters != NULL)
|
||||
@@ -3014,8 +3019,14 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
||||
*/
|
||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
||||
if (areBlanks(ctxt, buf, nbchar)) {
|
||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
|
||||
if (ctxt->keepBlanks) {
|
||||
if (ctxt->sax->characters != NULL)
|
||||
ctxt->sax->characters(ctxt->userData, buf, nbchar);
|
||||
} else {
|
||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||
ctxt->sax->ignorableWhitespace(ctxt->userData,
|
||||
buf, nbchar);
|
||||
}
|
||||
} else {
|
||||
htmlCheckParagraph(ctxt);
|
||||
if (ctxt->sax->characters != NULL)
|
||||
@@ -5687,9 +5698,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
if ((cur != '<') && (cur != '&')) {
|
||||
if (ctxt->sax != NULL) {
|
||||
if (IS_BLANK_CH(cur)) {
|
||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||
ctxt->sax->ignorableWhitespace(
|
||||
ctxt->userData, &cur, 1);
|
||||
if (ctxt->keepBlanks) {
|
||||
if (ctxt->sax->characters != NULL)
|
||||
ctxt->sax->characters(
|
||||
ctxt->userData, &cur, 1);
|
||||
} else {
|
||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||
ctxt->sax->ignorableWhitespace(
|
||||
ctxt->userData, &cur, 1);
|
||||
}
|
||||
} else {
|
||||
htmlCheckParagraph(ctxt);
|
||||
if (ctxt->sax->characters != NULL)
|
||||
|
||||
Reference in New Issue
Block a user