1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-23 01:52:48 +03:00

parser: Rework entity boundary check for element content

Only use depth of input stack. This makes the input ID unused
internally.
This commit is contained in:
Nick Wellnhofer
2025-05-21 20:21:32 +02:00
parent 74ea6b483c
commit 4dc44c83ab
2 changed files with 74 additions and 43 deletions

View File

@@ -125,7 +125,7 @@ struct _xmlParserInput {
const xmlChar *version XML_DEPRECATED_MEMBER; const xmlChar *version XML_DEPRECATED_MEMBER;
/* Flags */ /* Flags */
int flags XML_DEPRECATED_MEMBER; int flags XML_DEPRECATED_MEMBER;
/* an unique identifier for the entity */ /* an unique identifier for the entity, unused internally */
int id XML_DEPRECATED_MEMBER; int id XML_DEPRECATED_MEMBER;
/* unused */ /* unused */
unsigned long parentConsumed XML_DEPRECATED_MEMBER; unsigned long parentConsumed XML_DEPRECATED_MEMBER;

115
parser.c
View File

@@ -1943,9 +1943,9 @@ xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
} }
/* /*
* Internally, the input ID is only used to detect parameter entity * The input ID is unused internally, but there are entity
* boundaries. But there are entity loaders in downstream code that * loaders in downstream code that detect the main document
* detect the main document by checking for "input_id == 1". * by checking for "input_id == 1".
*/ */
value->id = ctxt->input_id++; value->id = ctxt->input_id++;
@@ -6061,6 +6061,48 @@ xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
} }
} }
/**
* Handle PEs and check that we don't pop the entity that started
* a balanced group.
*
* @param ctxt parser context
* @param openInputNr input nr of the entity with opening '('
*/
static void
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
SKIP_BLANKS;
GROW;
(void) openInputNr;
if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
return;
while (1) {
if (ctxt->input->cur >= ctxt->input->end) {
#ifdef LIBXML_VALID_ENABLED
if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start "
"and stop in the same entity\n",
NULL, NULL);
}
#endif
if (PARSER_IN_PE(ctxt))
xmlPopPE(ctxt);
else
break;
} else if (RAW == '%') {
xmlParsePERefInternal(ctxt, 0);
} else {
break;
}
SKIP_BLANKS;
GROW;
}
}
/** /**
* parse the declaration for a Mixed Element content * parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl() * The leading '(' and spaces have been skipped in xmlParseElementContentDecl()
@@ -6077,23 +6119,22 @@ xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
* mixed-content declaration. * mixed-content declaration.
* *
* @param ctxt an XML parser context * @param ctxt an XML parser context
* @param inputchk the input used for the current entity, needed for boundary checks * @param openInputNr the input used for the current entity, needed for
* boundary checks
* @returns the list of the xmlElementContent describing the element choices * @returns the list of the xmlElementContent describing the element choices
*/ */
xmlElementContent * xmlElementContent *
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int inputchk) { xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
xmlElementContentPtr ret = NULL, cur = NULL, n; xmlElementContentPtr ret = NULL, cur = NULL, n;
const xmlChar *elem = NULL; const xmlChar *elem = NULL;
(void) inputchk;
GROW; GROW;
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
SKIP(7); SKIP(7);
SKIP_BLANKS_PE; xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
if (RAW == ')') { if (RAW == ')') {
#ifdef LIBXML_VALID_ENABLED #ifdef LIBXML_VALID_ENABLED
if ((ctxt->validate) && (inputchk != ctxt->input->id)) { if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start " "Element content declaration doesn't start "
"and stop in the same entity\n", "and stop in the same entity\n",
@@ -6134,7 +6175,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int inputchk) {
n->c1->parent = n; n->c1->parent = n;
cur = n; cur = n;
} }
SKIP_BLANKS_PE; xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
elem = xmlParseName(ctxt); elem = xmlParseName(ctxt);
if (elem == NULL) { if (elem == NULL) {
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
@@ -6142,8 +6183,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int inputchk) {
xmlFreeDocElementContent(ctxt->myDoc, ret); xmlFreeDocElementContent(ctxt->myDoc, ret);
return(NULL); return(NULL);
} }
SKIP_BLANKS_PE; xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
GROW;
} }
if ((RAW == ')') && (NXT(1) == '*')) { if ((RAW == ')') && (NXT(1) == '*')) {
if (elem != NULL) { if (elem != NULL) {
@@ -6156,7 +6196,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int inputchk) {
if (ret != NULL) if (ret != NULL)
ret->ocur = XML_ELEMENT_CONTENT_MULT; ret->ocur = XML_ELEMENT_CONTENT_MULT;
#ifdef LIBXML_VALID_ENABLED #ifdef LIBXML_VALID_ENABLED
if ((ctxt->validate) && (inputchk != ctxt->input->id)) { if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start " "Element content declaration doesn't start "
"and stop in the same entity\n", "and stop in the same entity\n",
@@ -6205,41 +6245,36 @@ mem_error:
* the replacement text should be a connector (| or ,). * the replacement text should be a connector (| or ,).
* *
* @param ctxt an XML parser context * @param ctxt an XML parser context
* @param inputchk the input used for the current entity, needed for boundary checks * @param openInputNr the input used for the current entity, needed for
* boundary checks
* @param depth the level of recursion * @param depth the level of recursion
* @returns the tree of xmlElementContent describing the element * @returns the tree of xmlElementContent describing the element
* hierarchy. * hierarchy.
*/ */
static xmlElementContentPtr static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
int depth) { int depth) {
int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256; int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
const xmlChar *elem; const xmlChar *elem;
xmlChar type = 0; xmlChar type = 0;
(void) inputchk;
if (depth > maxDepth) { if (depth > maxDepth) {
xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT, xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
"xmlParseElementChildrenContentDecl : depth %d too deep, " "xmlParseElementChildrenContentDecl : depth %d too deep, "
"use XML_PARSE_HUGE\n", depth); "use XML_PARSE_HUGE\n", depth);
return(NULL); return(NULL);
} }
SKIP_BLANKS_PE; xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
GROW;
if (RAW == '(') { if (RAW == '(') {
int inputid = ctxt->input->id; int newInputNr = ctxt->inputNr;
/* Recurse on first child */ /* Recurse on first child */
NEXT; NEXT;
SKIP_BLANKS_PE; cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
depth + 1); depth + 1);
if (cur == NULL) if (cur == NULL)
return(NULL); return(NULL);
SKIP_BLANKS_PE;
GROW;
} else { } else {
elem = xmlParseName(ctxt); elem = xmlParseName(ctxt);
if (elem == NULL) { if (elem == NULL) {
@@ -6266,8 +6301,10 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
} }
GROW; GROW;
} }
SKIP_BLANKS_PE; while (!PARSER_STOPPED(ctxt)) {
while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) { xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
if (RAW == ')')
break;
/* /*
* Each loop we parse one separator and one element. * Each loop we parse one separator and one element.
*/ */
@@ -6362,22 +6399,19 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
xmlFreeDocElementContent(ctxt->myDoc, ret); xmlFreeDocElementContent(ctxt->myDoc, ret);
return(NULL); return(NULL);
} }
GROW; xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
SKIP_BLANKS_PE; if (RAW == '(') {
GROW; int newInputNr = ctxt->inputNr;
if (RAW == '(') {
int inputid = ctxt->input->id;
/* Recurse on second child */ /* Recurse on second child */
NEXT; NEXT;
SKIP_BLANKS_PE; last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
depth + 1); depth + 1);
if (last == NULL) { if (last == NULL) {
if (ret != NULL) if (ret != NULL)
xmlFreeDocElementContent(ctxt->myDoc, ret); xmlFreeDocElementContent(ctxt->myDoc, ret);
return(NULL); return(NULL);
} }
SKIP_BLANKS_PE;
} else { } else {
elem = xmlParseName(ctxt); elem = xmlParseName(ctxt);
if (elem == NULL) { if (elem == NULL) {
@@ -6406,8 +6440,6 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
last->ocur = XML_ELEMENT_CONTENT_ONCE; last->ocur = XML_ELEMENT_CONTENT_ONCE;
} }
} }
SKIP_BLANKS_PE;
GROW;
} }
if ((cur != NULL) && (last != NULL)) { if ((cur != NULL) && (last != NULL)) {
cur->c2 = last; cur->c2 = last;
@@ -6415,7 +6447,7 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
last->parent = cur; last->parent = cur;
} }
#ifdef LIBXML_VALID_ENABLED #ifdef LIBXML_VALID_ENABLED
if ((ctxt->validate) && (inputchk != ctxt->input->id)) { if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element content declaration doesn't start " "Element content declaration doesn't start "
"and stop in the same entity\n", "and stop in the same entity\n",
@@ -6545,7 +6577,7 @@ xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
xmlElementContent **result) { xmlElementContent **result) {
xmlElementContentPtr tree = NULL; xmlElementContentPtr tree = NULL;
int inputid = ctxt->input->id; int openInputNr = ctxt->inputNr;
int res; int res;
*result = NULL; *result = NULL;
@@ -6556,13 +6588,12 @@ xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
return(-1); return(-1);
} }
NEXT; NEXT;
GROW; xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
SKIP_BLANKS_PE;
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
tree = xmlParseElementMixedContentDecl(ctxt, inputid); tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
res = XML_ELEMENT_TYPE_MIXED; res = XML_ELEMENT_TYPE_MIXED;
} else { } else {
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
res = XML_ELEMENT_TYPE_ELEMENT; res = XML_ELEMENT_TYPE_ELEMENT;
} }
SKIP_BLANKS_PE; SKIP_BLANKS_PE;