1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

parser: Pop PEs that start markup declarations explicitly

We currently only handle "Validity constraint: Proper Declaration/PE
Nesting", but we must detect "Well-formedness constraint: PE Between
Declarations" separately:

> The replacement text of a parameter entity reference in a DeclSep must
> match the production extSubsetDecl.

PEs in DeclSeps are PEs that start with a full markup declaration (or
another PE). These are handled in xmParse{Internal|External}Subset. We
set a flag on these PEs and don't close them implicitly in
xmlSkipBlankCharsPE. This will make unterminated declarations in such
PEs cause a parser error. The PEs are closed explicitly in
xmParse{Internal|External}Subset, the only location where they are
allowed to end.
This commit is contained in:
Nick Wellnhofer
2025-05-20 19:40:06 +02:00
parent 2a60ca06c0
commit 2f3655c9c3
9 changed files with 148 additions and 73 deletions

View File

@@ -33,6 +33,7 @@
#define XML_INPUT_USES_ENC_DECL (1u << 4) #define XML_INPUT_USES_ENC_DECL (1u << 4)
#define XML_INPUT_ENCODING_ERROR (1u << 5) #define XML_INPUT_ENCODING_ERROR (1u << 5)
#define XML_INPUT_PROGRESSIVE (1u << 6) #define XML_INPUT_PROGRESSIVE (1u << 6)
#define XML_INPUT_MARKUP_DECL (1u << 7)
#define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1) #define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1)

152
parser.c
View File

@@ -204,6 +204,9 @@ xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
static int static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
static void
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
/************************************************************************ /************************************************************************
* * * *
* Some factorized error routines * * Some factorized error routines *
@@ -2440,7 +2443,7 @@ xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
* even consume the whole entity and pop it. We might * even consume the whole entity and pop it. We might
* even pop multiple PEs in this loop. * even pop multiple PEs in this loop.
*/ */
xmlParsePEReference(ctxt); xmlParsePERefInternal(ctxt, 0);
inParam = PARSER_IN_PE(ctxt); inParam = PARSER_IN_PE(ctxt);
expandParam = PARSER_EXTERNAL(ctxt); expandParam = PARSER_EXTERNAL(ctxt);
@@ -2448,6 +2451,14 @@ xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
if (inParam == 0) if (inParam == 0)
break; break;
/*
* Don't pop parameter entities that start a markup
* declaration to detect Well-formedness constraint:
* PE Between Declarations.
*/
if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
break;
xmlPopPE(ctxt); xmlPopPE(ctxt);
inParam = PARSER_IN_PE(ctxt); inParam = PARSER_IN_PE(ctxt);
@@ -2747,7 +2758,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*/ */
void void
xmlParserHandlePEReference(xmlParserCtxt *ctxt) { xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
xmlParsePEReference(ctxt); xmlParsePERefInternal(ctxt, 0);
} }
/** /**
@@ -6648,20 +6659,31 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
int *inputIds = NULL; int *inputIds = NULL;
size_t inputIdsSize = 0; size_t inputIdsSize = 0;
size_t depth = 0; size_t depth = 0;
int isFreshPE = 0;
int oldInputNr = ctxt->inputNr;
int declInputNr = ctxt->inputNr;
while (PARSER_STOPPED(ctxt) == 0) { while (!PARSER_STOPPED(ctxt)) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { if (ctxt->input->cur >= ctxt->input->end) {
if (ctxt->inputNr <= oldInputNr) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
goto error;
}
xmlPopPE(ctxt);
declInputNr = ctxt->inputNr;
} else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
int id = ctxt->input->id; int id = ctxt->input->id;
SKIP(3); SKIP(3);
SKIP_BLANKS_PE; SKIP_BLANKS_PE;
isFreshPE = 0;
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
SKIP(7); SKIP(7);
SKIP_BLANKS_PE; SKIP_BLANKS_PE;
if (RAW != '[') { if (RAW != '[') {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
xmlHaltParser(ctxt);
goto error; goto error;
} }
if (ctxt->input->id != id) { if (ctxt->input->id != id) {
@@ -6700,7 +6722,6 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS_PE; SKIP_BLANKS_PE;
if (RAW != '[') { if (RAW != '[') {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
xmlHaltParser(ctxt);
goto error; goto error;
} }
if (ctxt->input->id != id) { if (ctxt->input->id != id) {
@@ -6741,11 +6762,17 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
} }
} else { } else {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
xmlHaltParser(ctxt);
goto error; goto error;
} }
} else if ((depth > 0) && } else if ((depth > 0) &&
(RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
if (isFreshPE) {
xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
"Parameter entity must match "
"extSubsetDecl\n");
goto error;
}
depth--; depth--;
if (ctxt->input->id != inputIds[depth]) { if (ctxt->input->id != inputIds[depth]) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
@@ -6754,17 +6781,23 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
} }
SKIP(3); SKIP(3);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
isFreshPE = 0;
xmlParseMarkupDecl(ctxt); xmlParseMarkupDecl(ctxt);
} else if (RAW == '%') {
xmlParsePERefInternal(ctxt, 1);
if (ctxt->inputNr > declInputNr) {
isFreshPE = 1;
declInputNr = ctxt->inputNr;
}
} else { } else {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
xmlHaltParser(ctxt);
goto error; goto error;
} }
if (depth == 0) if (depth == 0)
break; break;
SKIP_BLANKS_PE; SKIP_BLANKS;
SHRINK; SHRINK;
GROW; GROW;
} }
@@ -6937,7 +6970,7 @@ xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
} }
ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->properties = XML_DOC_INTERNAL;
} }
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) && if ((ctxt->myDoc->intSubset == NULL) &&
(xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) { (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
xmlErrMemory(ctxt); xmlErrMemory(ctxt);
} }
@@ -6945,27 +6978,32 @@ xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
ctxt->inSubset = 2; ctxt->inSubset = 2;
oldInputNr = ctxt->inputNr; oldInputNr = ctxt->inputNr;
SKIP_BLANKS_PE; SKIP_BLANKS;
while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) && while (!PARSER_STOPPED(ctxt)) {
(!PARSER_STOPPED(ctxt))) { if (ctxt->input->cur >= ctxt->input->end) {
GROW; if (ctxt->inputNr <= oldInputNr) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
break;
}
xmlPopPE(ctxt);
} else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt); xmlParseConditionalSections(ctxt);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
xmlParseMarkupDecl(ctxt); xmlParseMarkupDecl(ctxt);
} else if (RAW == '%') {
xmlParsePERefInternal(ctxt, 1);
} else { } else {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
xmlHaltParser(ctxt);
return;
}
SKIP_BLANKS_PE;
SHRINK;
}
while (ctxt->inputNr > oldInputNr) while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt); xmlPopPE(ctxt);
break;
xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED); }
SKIP_BLANKS;
SHRINK;
GROW;
}
} }
/** /**
@@ -7455,8 +7493,6 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
/** /**
* Parse a parameter entity reference. Always consumes '%'. * Parse a parameter entity reference. Always consumes '%'.
* *
* @deprecated Internal function, don't use.
*
* The entity content is handled directly by pushing it's content as * The entity content is handled directly by pushing it's content as
* a new input stream. * a new input stream.
* *
@@ -7482,10 +7518,10 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
* NOTE: misleading but this is handled. * NOTE: misleading but this is handled.
* *
* @param ctxt an XML parser context * @param ctxt an XML parser context
* @param markupDecl whether the PERef starts a markup declaration
*/ */
void static void
xmlParsePEReference(xmlParserCtxt *ctxt) xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
{
const xmlChar *name; const xmlChar *name;
xmlEntityPtr entity = NULL; xmlEntityPtr entity = NULL;
xmlParserInputPtr input; xmlParserInputPtr input;
@@ -7548,6 +7584,9 @@ xmlParsePEReference(xmlParserCtxt *ctxt)
entity->flags |= XML_ENT_EXPANDING; entity->flags |= XML_ENT_EXPANDING;
if (markupDecl)
input->flags |= XML_INPUT_MARKUP_DECL;
GROW; GROW;
if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
@@ -7562,6 +7601,18 @@ xmlParsePEReference(xmlParserCtxt *ctxt)
} }
} }
/**
* Parse a parameter entity reference.
*
* @deprecated Internal function, don't use.
*
* @param ctxt an XML parser context
*/
void
xmlParsePEReference(xmlParserCtxt *ctxt) {
xmlParsePERefInternal(ctxt, 0);
}
/** /**
* Load the content of an entity. * Load the content of an entity.
* *
@@ -7880,42 +7931,47 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* Subsequence (markupdecl | PEReference | S)* * Subsequence (markupdecl | PEReference | S)*
*/ */
SKIP_BLANKS; SKIP_BLANKS;
while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) && while (1) {
(PARSER_STOPPED(ctxt) == 0)) { if (PARSER_STOPPED(ctxt)) {
return;
/* } else if (ctxt->input->cur >= ctxt->input->end) {
* Conditional sections are allowed from external entities included if (ctxt->inputNr <= oldInputNr) {
* by PE References in the internal subset. xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
*/ return;
if ((PARSER_EXTERNAL(ctxt)) && }
xmlPopPE(ctxt);
} else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
NEXT;
SKIP_BLANKS;
break;
} else if ((PARSER_EXTERNAL(ctxt)) &&
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
/*
* Conditional sections are allowed in external entities
* included by PE References in the internal subset.
*/
xmlParseConditionalSections(ctxt); xmlParseConditionalSections(ctxt);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
xmlParseMarkupDecl(ctxt); xmlParseMarkupDecl(ctxt);
} else if (RAW == '%') { } else if (RAW == '%') {
xmlParsePEReference(ctxt); xmlParsePERefInternal(ctxt, 1);
} else { } else {
xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL); xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
break;
}
SKIP_BLANKS_PE;
SHRINK;
GROW;
}
while (ctxt->inputNr > oldInputNr) while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt); xmlPopPE(ctxt);
return;
if (RAW == ']') { }
NEXT;
SKIP_BLANKS; SKIP_BLANKS;
SHRINK;
GROW;
} }
} }
/* /*
* We should be at the end of the DOCTYPE declaration. * We should be at the end of the DOCTYPE declaration.
*/ */
if ((ctxt->wellFormed) && (RAW != '>')) { if (RAW != '>') {
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
return; return;
} }

View File

@@ -47,14 +47,17 @@ value
^ ^
""".format(dir_prefix), """.format(dir_prefix),
'cond_sect2': 'cond_sect2':
"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity """{0}/dtds/cond_sect2.dtd:15: parser error : Parameter entity must match extSubsetDecl
%ent; %ent;
^ ^
Entity: line 1: Entity: line 1:
]]> ]]>
^ ^
{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset {0}/dtds/cond_sect2.dtd:15: parser error : Content error in the external subset
%ent;
^
Entity: line 1:
]]>
^ ^
""".format(dir_prefix), """.format(dir_prefix),
'rss': 'rss':

View File

@@ -19,12 +19,12 @@ Entity: line 1:
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset ./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx; %xx;
^ ^
Entity: line 2: Entity: line 1:
<![INCLUDE[ <![INCLUDE[
^ ^
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset ./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx; %xx;
^ ^
Entity: line 2: Entity: line 1:
<![INCLUDE[ <![INCLUDE[
^ ^

View File

@@ -19,12 +19,12 @@ Entity: line 1:
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset ./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx; %xx;
^ ^
Entity: line 2: Entity: line 1:
<![INCLUDE[ <![INCLUDE[
^ ^
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset ./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx; %xx;
^ ^
Entity: line 2: Entity: line 1:
<![INCLUDE[ <![INCLUDE[
^ ^

View File

@@ -19,13 +19,13 @@ Entity: line 1:
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset ./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx; %xx;
^ ^
Entity: line 2: Entity: line 1:
<![INCLUDE[ <![INCLUDE[
^ ^
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset ./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx; %xx;
^ ^
Entity: line 2: Entity: line 1:
<![INCLUDE[ <![INCLUDE[
^ ^
./test/errors/759573-2.xml : failed to parse ./test/errors/759573-2.xml : failed to parse

View File

@@ -1,6 +1,15 @@
./test/errors10/781361.xml:4: parser error : xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected ./test/errors10/781361.xml:3: parser error : Space required after the element name
%elem;
^ ^
Entity: line 1:
<!ELEMENT e0000000000
^
./test/errors10/781361.xml:3: parser error : xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected
%elem;
^
Entity: line 1:
<!ELEMENT e0000000000
^
./test/errors10/781361.xml:4: parser error : Content error in the internal subset ./test/errors10/781361.xml:4: parser error : Content error in the internal subset
^ ^

View File

@@ -1,9 +1,12 @@
test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity test/valid/dtds/cond_sect2.dtd:15: parser error : Parameter entity must match extSubsetDecl
%ent; %ent;
^ ^
Entity: line 1: Entity: line 1:
]]> ]]>
^ ^
test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset test/valid/dtds/cond_sect2.dtd:15: parser error : Content error in the external subset
%ent;
^
Entity: line 1:
]]>
^ ^

View File

@@ -1,10 +1,13 @@
test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity test/valid/dtds/cond_sect2.dtd:15: parser error : Parameter entity must match extSubsetDecl
%ent; %ent;
^ ^
Entity: line 1: Entity: line 1:
]]> ]]>
^ ^
test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset test/valid/dtds/cond_sect2.dtd:15: parser error : Content error in the external subset
%ent;
^
Entity: line 1:
]]>
^ ^
./test/valid/cond_sect2.xml : failed to parse ./test/valid/cond_sect2.xml : failed to parse