1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

parser: Make xmlParseContent more useful

This is an internal function which isn't really usable without some
hacks. See WebKit/Chromium trying to recreate the effects of
xmlDetectSAX2 manually, for example.

Make xmlParseContent perform late initialization and check whether the
content was fully parsed.

Also rename xmlDetectSAX2 and document why it's needed.
This commit is contained in:
Nick Wellnhofer
2024-01-04 17:14:31 +01:00
parent 65c65b6524
commit 8c5848bdd5

View File

@@ -934,18 +934,25 @@ encoding_error:
************************************************************************/
/**
* xmlDetectSAX2:
* xmlCtxtInitializeLate:
* @ctxt: an XML parser context
*
* Do the SAX2 detection and specific initialization
* Final initialization of the parser context before starting to parse.
*
* This accounts for users modifying struct members of parser context
* directly.
*/
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
xmlSAXHandlerPtr sax;
/* Avoid unused variable warning if features are disabled. */
(void) sax;
/*
* Changing the SAX struct directly is still widespread practice
* in internal and external code.
*/
if (ctxt == NULL) return;
sax = ctxt->sax;
#ifdef LIBXML_SAX1_ENABLED
@@ -964,6 +971,10 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
/*
* Some users replace the dictionary directly in the context struct.
* We really need an API function to do that cleanly.
*/
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
@@ -7236,7 +7247,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
const xmlChar *SystemID) {
xmlDetectSAX2(ctxt);
xmlCtxtInitializeLate(ctxt);
xmlDetectEncoding(ctxt);
@@ -9695,14 +9706,21 @@ xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
* xmlParseContent:
* @ctxt: an XML parser context
*
* Parse a content sequence. Stops at EOF or '</'.
*
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
* Parse XML element content. This is useful if you're only interested
* in custom SAX callbacks. If you want a node list, use
* xmlParseInNodeContext.
*/
void
xmlParseContent(xmlParserCtxtPtr ctxt) {
if ((ctxt == NULL) || (ctxt->input == NULL))
return;
xmlCtxtInitializeLate(ctxt);
xmlParseContentInternal(ctxt);
if (ctxt->input->cur < ctxt->input->end)
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
}
/**
@@ -10432,7 +10450,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
/*
* SAX: detecting the level.
*/
xmlDetectSAX2(ctxt);
xmlCtxtInitializeLate(ctxt);
/*
* Document locator is unused. Only for backward compatibility.
@@ -10560,7 +10578,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
if ((ctxt == NULL) || (ctxt->input == NULL))
return(-1);
xmlDetectSAX2(ctxt);
xmlCtxtInitializeLate(ctxt);
/*
* Document locator is unused. Only for backward compatibility.
@@ -10600,13 +10618,10 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
ctxt->loadsubset = 0;
ctxt->depth = 0;
xmlParseContent(ctxt);
xmlParseContentInternal(ctxt);
if ((RAW == '<') && (NXT(1) == '/')) {
if (ctxt->input->cur < ctxt->input->end)
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
} else if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
}
/*
* SAX: end of the document processing.
@@ -11463,7 +11478,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
if (ctxt->instate == XML_PARSER_START)
xmlDetectSAX2(ctxt);
xmlCtxtInitializeLate(ctxt);
if ((size > 0) && (chunk != NULL) && (!terminate) &&
(chunk[size - 1] == '\r')) {
end_in_lf = 1;
@@ -11690,8 +11705,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
/* We are loading a DTD */
ctxt->options |= XML_PARSE_DTDLOAD;
xmlDetectSAX2(ctxt);
/*
* generate a parser input from the I/O handler
*/
@@ -11939,9 +11952,9 @@ xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
}
}
xmlParseContent(ctxt);
xmlParseContentInternal(ctxt);
if ((RAW == '<') && (NXT(1) == '/'))
if (ctxt->input->cur < ctxt->input->end)
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
if ((ctxt->wellFormed) ||
@@ -12022,7 +12035,7 @@ xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
* This initiates a recursive call chain:
*
* - xmlCtxtParseContent
* - xmlParseContent
* - xmlParseContentInternal
* - xmlParseReference
* - xmlCtxtParseEntity
*
@@ -12105,6 +12118,8 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
if (input == NULL)
return(ctxt->errNo);
xmlCtxtInitializeLate(ctxt);
list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
if (*listOut != NULL)
*listOut = list;
@@ -12152,8 +12167,6 @@ xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
if (ctxt == NULL)
return(XML_ERR_NO_MEMORY);
xmlDetectSAX2(ctxt);
ctxt->depth = depth;
ctxt->myDoc = doc;
ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
@@ -12276,7 +12289,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
/*
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
* We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
* we must wait until the last moment to free the original one.
*/
if (doc->dict != NULL) {
@@ -12290,11 +12303,15 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
xmlCtxtUseOptions(ctxt, options);
xmlDetectSAX2(ctxt);
xmlCtxtInitializeLate(ctxt);
ctxt->myDoc = doc;
/* parsing in context, i.e. as within existing content */
ctxt->input_id = 2;
/*
* TODO: Use xmlCtxtParseContent
*/
fake = xmlNewDocComment(node->doc, NULL);
if (fake == NULL) {
xmlFreeParserCtxt(ctxt);
@@ -12335,18 +12352,12 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
__htmlParseContent(ctxt);
else
#endif
xmlParseContent(ctxt);
xmlParseContentInternal(ctxt);
if (ctxt->input->cur < ctxt->input->end)
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
xmlParserNsPop(ctxt, nsnr);
if ((RAW == '<') && (NXT(1) == '/')) {
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
} else if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
}
if ((ctxt->node != NULL) && (ctxt->node != node)) {
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
ctxt->wellFormed = 0;
}
if ((ctxt->wellFormed) ||
((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
@@ -12435,7 +12446,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
if (ctxt == NULL)
return(XML_ERR_NO_MEMORY);
xmlDetectSAX2(ctxt);
xmlCtxtInitializeLate(ctxt);
ctxt->depth = depth;
ctxt->myDoc = doc;
@@ -12677,7 +12688,6 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
}
}
xmlDetectSAX2(ctxt);
if (data!=NULL) {
ctxt->_private = data;
}
@@ -12829,7 +12839,6 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
}
ctxt->userData = user_data;
}
xmlDetectSAX2(ctxt);
xmlParseDocument(ctxt);
@@ -12927,7 +12936,6 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
}
}
xmlDetectSAX2(ctxt);
if (data!=NULL) {
ctxt->_private=data;
}
@@ -13026,7 +13034,6 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
}
ctxt->userData = user_data;
}
xmlDetectSAX2(ctxt);
xmlParseDocument(ctxt);
@@ -13108,7 +13115,6 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
ctxt->sax = sax;
ctxt->userData = NULL;
}
xmlDetectSAX2(ctxt);
xmlParseDocument(ctxt);
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;