mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2026-01-26 21:41:34 +03:00
parser: Improve handling of NOBLANKS option
Don't change the SAX handler. Use a helper function to invoke "characters" SAX callback. The old code didn't advance the input pointer consistently before invoking the callback. There was also some inconsistency wrt to ctxt->space handling. I don't understand the ctxt->space thing, but now we always behave like the non-complex case before.
This commit is contained in:
2
SAX2.c
2
SAX2.c
@@ -2733,7 +2733,7 @@ xmlSAXVersion(xmlSAXHandler *hdlr, int version)
|
|||||||
hdlr->reference = xmlSAX2Reference;
|
hdlr->reference = xmlSAX2Reference;
|
||||||
hdlr->characters = xmlSAX2Characters;
|
hdlr->characters = xmlSAX2Characters;
|
||||||
hdlr->cdataBlock = xmlSAX2CDataBlock;
|
hdlr->cdataBlock = xmlSAX2CDataBlock;
|
||||||
hdlr->ignorableWhitespace = xmlSAX2Characters;
|
hdlr->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
||||||
hdlr->processingInstruction = xmlSAX2ProcessingInstruction;
|
hdlr->processingInstruction = xmlSAX2ProcessingInstruction;
|
||||||
hdlr->comment = xmlSAX2Comment;
|
hdlr->comment = xmlSAX2Comment;
|
||||||
hdlr->warning = xmlParserWarning;
|
hdlr->warning = xmlParserWarning;
|
||||||
|
|||||||
134
parser.c
134
parser.c
@@ -2978,13 +2978,6 @@ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
|
|||||||
int i;
|
int i;
|
||||||
xmlNodePtr lastChild;
|
xmlNodePtr lastChild;
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't spend time trying to differentiate them, the same callback is
|
|
||||||
* used !
|
|
||||||
*/
|
|
||||||
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
|
|
||||||
return(0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for xml:space value.
|
* Check for xml:space value.
|
||||||
*/
|
*/
|
||||||
@@ -4865,6 +4858,34 @@ static const unsigned char test_char_data[256] = {
|
|||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size) {
|
||||||
|
if ((ctxt->sax == NULL) || (ctxt->disableSAX))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calling areBlanks with only parts of a text node
|
||||||
|
* is fundamentally broken, making the NOBLANKS option
|
||||||
|
* essentially unusable.
|
||||||
|
*/
|
||||||
|
if ((!ctxt->keepBlanks) &&
|
||||||
|
(ctxt->sax->ignorableWhitespace != ctxt->sax->characters) &&
|
||||||
|
(areBlanks(ctxt, buf, size, 1))) {
|
||||||
|
if (ctxt->sax->ignorableWhitespace != NULL)
|
||||||
|
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
|
||||||
|
} else {
|
||||||
|
if (ctxt->sax->characters != NULL)
|
||||||
|
ctxt->sax->characters(ctxt->userData, buf, size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The old code used to update this value for "complex" data
|
||||||
|
* even if keepBlanks was true. This was probably a bug.
|
||||||
|
*/
|
||||||
|
if ((!ctxt->keepBlanks) && (*ctxt->space == -1))
|
||||||
|
*ctxt->space = -2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlParseCharDataInternal:
|
* xmlParseCharDataInternal:
|
||||||
* @ctxt: an XML parser context
|
* @ctxt: an XML parser context
|
||||||
@@ -4910,32 +4931,7 @@ get_more_space:
|
|||||||
const xmlChar *tmp = ctxt->input->cur;
|
const xmlChar *tmp = ctxt->input->cur;
|
||||||
ctxt->input->cur = in;
|
ctxt->input->cur = in;
|
||||||
|
|
||||||
if ((ctxt->sax != NULL) &&
|
xmlCharacters(ctxt, tmp, nbchar);
|
||||||
(ctxt->disableSAX == 0) &&
|
|
||||||
(ctxt->sax->ignorableWhitespace !=
|
|
||||||
ctxt->sax->characters)) {
|
|
||||||
/*
|
|
||||||
* Calling areBlanks with only parts of a text node
|
|
||||||
* is fundamentally broken, making the NOBLANKS option
|
|
||||||
* essentially unusable.
|
|
||||||
*/
|
|
||||||
if (areBlanks(ctxt, tmp, nbchar, 1)) {
|
|
||||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
|
||||||
ctxt->sax->ignorableWhitespace(ctxt->userData,
|
|
||||||
tmp, nbchar);
|
|
||||||
} else {
|
|
||||||
if (ctxt->sax->characters != NULL)
|
|
||||||
ctxt->sax->characters(ctxt->userData,
|
|
||||||
tmp, nbchar);
|
|
||||||
if (*ctxt->space == -1)
|
|
||||||
*ctxt->space = -2;
|
|
||||||
}
|
|
||||||
} else if ((ctxt->sax != NULL) &&
|
|
||||||
(ctxt->disableSAX == 0) &&
|
|
||||||
(ctxt->sax->characters != NULL)) {
|
|
||||||
ctxt->sax->characters(ctxt->userData,
|
|
||||||
tmp, nbchar);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -4968,35 +4964,13 @@ get_more:
|
|||||||
}
|
}
|
||||||
nbchar = in - ctxt->input->cur;
|
nbchar = in - ctxt->input->cur;
|
||||||
if (nbchar > 0) {
|
if (nbchar > 0) {
|
||||||
if ((ctxt->sax != NULL) &&
|
const xmlChar *tmp = ctxt->input->cur;
|
||||||
(ctxt->disableSAX == 0) &&
|
ctxt->input->cur = in;
|
||||||
(ctxt->sax->ignorableWhitespace !=
|
|
||||||
ctxt->sax->characters) &&
|
|
||||||
(IS_BLANK_CH(*ctxt->input->cur))) {
|
|
||||||
const xmlChar *tmp = ctxt->input->cur;
|
|
||||||
ctxt->input->cur = in;
|
|
||||||
|
|
||||||
if (areBlanks(ctxt, tmp, nbchar, 0)) {
|
xmlCharacters(ctxt, tmp, nbchar);
|
||||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
|
||||||
ctxt->sax->ignorableWhitespace(ctxt->userData,
|
line = ctxt->input->line;
|
||||||
tmp, nbchar);
|
col = ctxt->input->col;
|
||||||
} else {
|
|
||||||
if (ctxt->sax->characters != NULL)
|
|
||||||
ctxt->sax->characters(ctxt->userData,
|
|
||||||
tmp, nbchar);
|
|
||||||
if (*ctxt->space == -1)
|
|
||||||
*ctxt->space = -2;
|
|
||||||
}
|
|
||||||
line = ctxt->input->line;
|
|
||||||
col = ctxt->input->col;
|
|
||||||
} else if ((ctxt->sax != NULL) &&
|
|
||||||
(ctxt->disableSAX == 0)) {
|
|
||||||
if (ctxt->sax->characters != NULL)
|
|
||||||
ctxt->sax->characters(ctxt->userData,
|
|
||||||
ctxt->input->cur, nbchar);
|
|
||||||
line = ctxt->input->line;
|
|
||||||
col = ctxt->input->col;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ctxt->input->cur = in;
|
ctxt->input->cur = in;
|
||||||
if (*in == 0xD) {
|
if (*in == 0xD) {
|
||||||
@@ -5060,23 +5034,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
|
|||||||
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
|
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
|
||||||
buf[nbchar] = 0;
|
buf[nbchar] = 0;
|
||||||
|
|
||||||
/*
|
xmlCharacters(ctxt, buf, nbchar);
|
||||||
* OK the segment is to be consumed as chars.
|
|
||||||
*/
|
|
||||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
|
||||||
if (areBlanks(ctxt, buf, nbchar, 0)) {
|
|
||||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
|
||||||
ctxt->sax->ignorableWhitespace(ctxt->userData,
|
|
||||||
buf, nbchar);
|
|
||||||
} else {
|
|
||||||
if (ctxt->sax->characters != NULL)
|
|
||||||
ctxt->sax->characters(ctxt->userData, buf, nbchar);
|
|
||||||
if ((ctxt->sax->characters !=
|
|
||||||
ctxt->sax->ignorableWhitespace) &&
|
|
||||||
(*ctxt->space == -1))
|
|
||||||
*ctxt->space = -2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nbchar = 0;
|
nbchar = 0;
|
||||||
SHRINK;
|
SHRINK;
|
||||||
}
|
}
|
||||||
@@ -5084,21 +5042,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
|
|||||||
}
|
}
|
||||||
if (nbchar != 0) {
|
if (nbchar != 0) {
|
||||||
buf[nbchar] = 0;
|
buf[nbchar] = 0;
|
||||||
/*
|
|
||||||
* OK the segment is to be consumed as chars.
|
xmlCharacters(ctxt, buf, nbchar);
|
||||||
*/
|
|
||||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
|
||||||
if (areBlanks(ctxt, buf, nbchar, 0)) {
|
|
||||||
if (ctxt->sax->ignorableWhitespace != NULL)
|
|
||||||
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
|
|
||||||
} else {
|
|
||||||
if (ctxt->sax->characters != NULL)
|
|
||||||
ctxt->sax->characters(ctxt->userData, buf, nbchar);
|
|
||||||
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
|
|
||||||
(*ctxt->space == -1))
|
|
||||||
*ctxt->space = -2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* cur == 0 can mean
|
* cur == 0 can mean
|
||||||
@@ -13633,9 +13578,6 @@ xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
|
|||||||
/*
|
/*
|
||||||
* Changing SAX callbacks is a bad idea. This should be fixed.
|
* Changing SAX callbacks is a bad idea. This should be fixed.
|
||||||
*/
|
*/
|
||||||
if (options & XML_PARSE_NOBLANKS) {
|
|
||||||
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
|
||||||
}
|
|
||||||
if (options & XML_PARSE_NOCDATA) {
|
if (options & XML_PARSE_NOCDATA) {
|
||||||
ctxt->sax->cdataBlock = NULL;
|
ctxt->sax->cdataBlock = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user