1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

parser: Pop PEs that start markup declarations explicitly

We currently only handle "Validity constraint: Proper Declaration/PE
Nesting", but we must detect "Well-formedness constraint: PE Between
Declarations" separately:

> The replacement text of a parameter entity reference in a DeclSep must
> match the production extSubsetDecl.

PEs in DeclSeps are PEs that start with a full markup declaration (or
another PE). These are handled in xmParse{Internal|External}Subset. We
set a flag on these PEs and don't close them implicitly in
xmlSkipBlankCharsPE. This will make unterminated declarations in such
PEs cause a parser error. The PEs are closed explicitly in
xmParse{Internal|External}Subset, the only location where they are
allowed to end.
This commit is contained in:
Nick Wellnhofer
2025-05-20 19:40:06 +02:00
parent 2a60ca06c0
commit 2f3655c9c3
9 changed files with 148 additions and 73 deletions

View File

@@ -33,6 +33,7 @@
#define XML_INPUT_USES_ENC_DECL (1u << 4)
#define XML_INPUT_ENCODING_ERROR (1u << 5)
#define XML_INPUT_PROGRESSIVE (1u << 6)
#define XML_INPUT_MARKUP_DECL (1u << 7)
#define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1)

166
parser.c
View File

@@ -204,6 +204,9 @@ xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
static void
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
/************************************************************************
* *
* Some factorized error routines *
@@ -2440,7 +2443,7 @@ xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
* even consume the whole entity and pop it. We might
* even pop multiple PEs in this loop.
*/
xmlParsePEReference(ctxt);
xmlParsePERefInternal(ctxt, 0);
inParam = PARSER_IN_PE(ctxt);
expandParam = PARSER_EXTERNAL(ctxt);
@@ -2448,6 +2451,14 @@ xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
if (inParam == 0)
break;
/*
* Don't pop parameter entities that start a markup
* declaration to detect Well-formedness constraint:
* PE Between Declarations.
*/
if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
break;
xmlPopPE(ctxt);
inParam = PARSER_IN_PE(ctxt);
@@ -2747,7 +2758,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*/
void
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
xmlParsePEReference(ctxt);
xmlParsePERefInternal(ctxt, 0);
}
/**
@@ -6648,20 +6659,31 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
int *inputIds = NULL;
size_t inputIdsSize = 0;
size_t depth = 0;
int isFreshPE = 0;
int oldInputNr = ctxt->inputNr;
int declInputNr = ctxt->inputNr;
while (PARSER_STOPPED(ctxt) == 0) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
while (!PARSER_STOPPED(ctxt)) {
if (ctxt->input->cur >= ctxt->input->end) {
if (ctxt->inputNr <= oldInputNr) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
goto error;
}
xmlPopPE(ctxt);
declInputNr = ctxt->inputNr;
} else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
int id = ctxt->input->id;
SKIP(3);
SKIP_BLANKS_PE;
isFreshPE = 0;
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
SKIP(7);
SKIP_BLANKS_PE;
if (RAW != '[') {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
xmlHaltParser(ctxt);
goto error;
}
if (ctxt->input->id != id) {
@@ -6700,7 +6722,6 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS_PE;
if (RAW != '[') {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
xmlHaltParser(ctxt);
goto error;
}
if (ctxt->input->id != id) {
@@ -6741,11 +6762,17 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
}
} else {
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
xmlHaltParser(ctxt);
goto error;
}
} else if ((depth > 0) &&
(RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
if (isFreshPE) {
xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
"Parameter entity must match "
"extSubsetDecl\n");
goto error;
}
depth--;
if (ctxt->input->id != inputIds[depth]) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
@@ -6754,17 +6781,23 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
}
SKIP(3);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
isFreshPE = 0;
xmlParseMarkupDecl(ctxt);
} else if (RAW == '%') {
xmlParsePERefInternal(ctxt, 1);
if (ctxt->inputNr > declInputNr) {
isFreshPE = 1;
declInputNr = ctxt->inputNr;
}
} else {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
xmlHaltParser(ctxt);
goto error;
}
if (depth == 0)
break;
SKIP_BLANKS_PE;
SKIP_BLANKS;
SHRINK;
GROW;
}
@@ -6937,7 +6970,7 @@ xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
}
ctxt->myDoc->properties = XML_DOC_INTERNAL;
}
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
if ((ctxt->myDoc->intSubset == NULL) &&
(xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
xmlErrMemory(ctxt);
}
@@ -6945,27 +6978,32 @@ xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
ctxt->inSubset = 2;
oldInputNr = ctxt->inputNr;
SKIP_BLANKS_PE;
while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
(!PARSER_STOPPED(ctxt))) {
GROW;
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
SKIP_BLANKS;
while (!PARSER_STOPPED(ctxt)) {
if (ctxt->input->cur >= ctxt->input->end) {
if (ctxt->inputNr <= oldInputNr) {
xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
break;
}
xmlPopPE(ctxt);
} else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
xmlParseMarkupDecl(ctxt);
} else if (RAW == '%') {
xmlParsePERefInternal(ctxt, 1);
} else {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
xmlHaltParser(ctxt);
return;
while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt);
break;
}
SKIP_BLANKS_PE;
SKIP_BLANKS;
SHRINK;
GROW;
}
while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt);
xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
}
/**
@@ -7455,8 +7493,6 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
/**
* Parse a parameter entity reference. Always consumes '%'.
*
* @deprecated Internal function, don't use.
*
* The entity content is handled directly by pushing it's content as
* a new input stream.
*
@@ -7482,10 +7518,10 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
* NOTE: misleading but this is handled.
*
* @param ctxt an XML parser context
* @param markupDecl whether the PERef starts a markup declaration
*/
void
xmlParsePEReference(xmlParserCtxt *ctxt)
{
static void
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
const xmlChar *name;
xmlEntityPtr entity = NULL;
xmlParserInputPtr input;
@@ -7548,6 +7584,9 @@ xmlParsePEReference(xmlParserCtxt *ctxt)
entity->flags |= XML_ENT_EXPANDING;
if (markupDecl)
input->flags |= XML_INPUT_MARKUP_DECL;
GROW;
if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
@@ -7562,6 +7601,18 @@ xmlParsePEReference(xmlParserCtxt *ctxt)
}
}
/**
* Parse a parameter entity reference.
*
* @deprecated Internal function, don't use.
*
* @param ctxt an XML parser context
*/
void
xmlParsePEReference(xmlParserCtxt *ctxt) {
xmlParsePERefInternal(ctxt, 0);
}
/**
* Load the content of an entity.
*
@@ -7880,44 +7931,49 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* Subsequence (markupdecl | PEReference | S)*
*/
SKIP_BLANKS;
while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
(PARSER_STOPPED(ctxt) == 0)) {
/*
* Conditional sections are allowed from external entities included
* by PE References in the internal subset.
*/
if ((PARSER_EXTERNAL(ctxt)) &&
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
while (1) {
if (PARSER_STOPPED(ctxt)) {
return;
} else if (ctxt->input->cur >= ctxt->input->end) {
if (ctxt->inputNr <= oldInputNr) {
xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
return;
}
xmlPopPE(ctxt);
} else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
NEXT;
SKIP_BLANKS;
break;
} else if ((PARSER_EXTERNAL(ctxt)) &&
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
/*
* Conditional sections are allowed in external entities
* included by PE References in the internal subset.
*/
xmlParseConditionalSections(ctxt);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
xmlParseMarkupDecl(ctxt);
xmlParseMarkupDecl(ctxt);
} else if (RAW == '%') {
xmlParsePEReference(ctxt);
xmlParsePERefInternal(ctxt, 1);
} else {
xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
break;
xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt);
return;
}
SKIP_BLANKS_PE;
SKIP_BLANKS;
SHRINK;
GROW;
}
while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt);
if (RAW == ']') {
NEXT;
SKIP_BLANKS;
}
}
}
/*
* We should be at the end of the DOCTYPE declaration.
*/
if ((ctxt->wellFormed) && (RAW != '>')) {
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
return;
if (RAW != '>') {
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
return;
}
NEXT;
}

View File

@@ -47,14 +47,17 @@ value
^
""".format(dir_prefix),
'cond_sect2':
"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
"""{0}/dtds/cond_sect2.dtd:15: parser error : Parameter entity must match extSubsetDecl
%ent;
^
Entity: line 1:
]]>
^
{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
{0}/dtds/cond_sect2.dtd:15: parser error : Content error in the external subset
%ent;
^
Entity: line 1:
]]>
^
""".format(dir_prefix),
'rss':

View File

@@ -19,12 +19,12 @@ Entity: line 1:
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx;
^
Entity: line 2:
Entity: line 1:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx;
^
Entity: line 2:
Entity: line 1:
<![INCLUDE[
^

View File

@@ -19,12 +19,12 @@ Entity: line 1:
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx;
^
Entity: line 2:
Entity: line 1:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx;
^
Entity: line 2:
Entity: line 1:
<![INCLUDE[
^

View File

@@ -19,13 +19,13 @@ Entity: line 1:
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx;
^
Entity: line 2:
Entity: line 1:
<![INCLUDE[
^
./test/errors/759573-2.xml:6: parser error : Content error in the internal subset
%xx;
^
Entity: line 2:
Entity: line 1:
<![INCLUDE[
^
./test/errors/759573-2.xml : failed to parse

View File

@@ -1,6 +1,15 @@
./test/errors10/781361.xml:4: parser error : xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected
^
./test/errors10/781361.xml:3: parser error : Space required after the element name
%elem;
^
Entity: line 1:
<!ELEMENT e0000000000
^
./test/errors10/781361.xml:3: parser error : xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected
%elem;
^
Entity: line 1:
<!ELEMENT e0000000000
^
./test/errors10/781361.xml:4: parser error : Content error in the internal subset
^

View File

@@ -1,9 +1,12 @@
test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
test/valid/dtds/cond_sect2.dtd:15: parser error : Parameter entity must match extSubsetDecl
%ent;
^
Entity: line 1:
]]>
^
test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
test/valid/dtds/cond_sect2.dtd:15: parser error : Content error in the external subset
%ent;
^
Entity: line 1:
]]>
^

View File

@@ -1,10 +1,13 @@
test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
test/valid/dtds/cond_sect2.dtd:15: parser error : Parameter entity must match extSubsetDecl
%ent;
^
Entity: line 1:
]]>
^
test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
test/valid/dtds/cond_sect2.dtd:15: parser error : Content error in the external subset
%ent;
^
Entity: line 1:
]]>
^
./test/valid/cond_sect2.xml : failed to parse