1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-26 00:37:43 +03:00

parser: Fix DTD parser progress checks

This is another attempt at fixing parser progress checks. Instead of
relying on in->consumed, which could overflow, change some DTD parser
functions to make guaranteed progress on certain byte sequences.
This commit is contained in:
Nick Wellnhofer
2022-11-13 21:47:03 +01:00
parent 249cee4b2a
commit f61b8a6233
7 changed files with 78 additions and 68 deletions

100
parser.c
View File

@@ -4900,7 +4900,8 @@ not_terminated:
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* Skip an XML (SGML) comment <!-- .... --> * Parse an XML (SGML) comment. Always consumes '<!'.
*
* The spec says that "For compatibility, the string "--" (double-hyphen) * The spec says that "For compatibility, the string "--" (double-hyphen)
* must not occur within comments. " * must not occur within comments. "
* *
@@ -4923,12 +4924,15 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
/* /*
* Check that there is a comment right here. * Check that there is a comment right here.
*/ */
if ((RAW != '<') || (NXT(1) != '!') || if ((RAW != '<') || (NXT(1) != '!'))
(NXT(2) != '-') || (NXT(3) != '-')) return; return;
SKIP(2);
if ((RAW != '-') || (NXT(1) != '-'))
return;
state = ctxt->instate; state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT; ctxt->instate = XML_PARSER_COMMENT;
inputid = ctxt->input->id; inputid = ctxt->input->id;
SKIP(4); SKIP(2);
SHRINK; SHRINK;
GROW; GROW;
@@ -5342,7 +5346,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* parse a notation declaration * Parse a notation declaration. Always consumes '<!'.
* *
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
* *
@@ -5360,10 +5364,14 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
xmlChar *Pubid; xmlChar *Pubid;
xmlChar *Systemid; xmlChar *Systemid;
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { if ((CUR != '<') || (NXT(1) != '!'))
return;
SKIP(2);
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
int inputid = ctxt->input->id; int inputid = ctxt->input->id;
SHRINK; SHRINK;
SKIP(10); SKIP(8);
if (SKIP_BLANKS == 0) { if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space required after '<!NOTATION'\n"); "Space required after '<!NOTATION'\n");
@@ -5416,7 +5424,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* parse <!ENTITY declarations * Parse an entity declaration. Always consumes '<!'.
* *
* [70] EntityDecl ::= GEDecl | PEDecl * [70] EntityDecl ::= GEDecl | PEDecl
* *
@@ -5443,11 +5451,15 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
int isParameter = 0; int isParameter = 0;
xmlChar *orig = NULL; xmlChar *orig = NULL;
if ((CUR != '<') || (NXT(1) != '!'))
return;
SKIP(2);
/* GROW; done in the caller */ /* GROW; done in the caller */
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
int inputid = ctxt->input->id; int inputid = ctxt->input->id;
SHRINK; SHRINK;
SKIP(8); SKIP(6);
if (SKIP_BLANKS == 0) { if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space required after '<!ENTITY'\n"); "Space required after '<!ENTITY'\n");
@@ -6006,7 +6018,7 @@ xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* : parse the Attribute list def for an element * Parse an attribute list declaration for an element. Always consumes '<!'.
* *
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
* *
@@ -6019,10 +6031,14 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
const xmlChar *attrName; const xmlChar *attrName;
xmlEnumerationPtr tree; xmlEnumerationPtr tree;
if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { if ((CUR != '<') || (NXT(1) != '!'))
return;
SKIP(2);
if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
int inputid = ctxt->input->id; int inputid = ctxt->input->id;
SKIP(9); SKIP(7);
if (SKIP_BLANKS == 0) { if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space required after '<!ATTLIST'\n"); "Space required after '<!ATTLIST'\n");
@@ -6634,7 +6650,7 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* parse an Element declaration. * Parse an element declaration. Always consumes '<!'.
* *
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
* *
@@ -6649,11 +6665,15 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
int ret = -1; int ret = -1;
xmlElementContentPtr content = NULL; xmlElementContentPtr content = NULL;
if ((CUR != '<') || (NXT(1) != '!'))
return(ret);
SKIP(2);
/* GROW; done in the caller */ /* GROW; done in the caller */
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
int inputid = ctxt->input->id; int inputid = ctxt->input->id;
SKIP(9); SKIP(7);
if (SKIP_BLANKS == 0) { if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space required after 'ELEMENT'\n"); "Space required after 'ELEMENT'\n");
@@ -6741,6 +6761,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
* xmlParseConditionalSections * xmlParseConditionalSections
* @ctxt: an XML parser context * @ctxt: an XML parser context
* *
* Parse a conditional section. Always consumes '<!['.
*
* [61] conditionalSect ::= includeSect | ignoreSect * [61] conditionalSect ::= includeSect | ignoreSect
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
@@ -6865,18 +6887,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
" in the same entity\n"); " in the same entity\n");
} }
SKIP(3); SKIP(3);
} else { } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
int id = ctxt->input->id;
unsigned long cons = CUR_CONSUMED;
xmlParseMarkupDecl(ctxt); xmlParseMarkupDecl(ctxt);
} else {
if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
xmlHaltParser(ctxt); xmlHaltParser(ctxt);
goto error; goto error;
} }
}
if (depth == 0) if (depth == 0)
break; break;
@@ -6895,7 +6912,7 @@ error:
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* parse Markup declarations * Parse markup declarations. Always consumes '<!' or '<?'.
* *
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
* NotationDecl | PI | Comment * NotationDecl | PI | Comment
@@ -6924,6 +6941,8 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
xmlParseElementDecl(ctxt); xmlParseElementDecl(ctxt);
else if (NXT(3) == 'N') else if (NXT(3) == 'N')
xmlParseEntityDecl(ctxt); xmlParseEntityDecl(ctxt);
else
SKIP(2);
break; break;
case 'A': case 'A':
xmlParseAttributeListDecl(ctxt); xmlParseAttributeListDecl(ctxt);
@@ -6936,6 +6955,7 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
break; break;
default: default:
/* there is an error but it will be detected later */ /* there is an error but it will be detected later */
SKIP(2);
break; break;
} }
} else if (NXT(1) == '?') { } else if (NXT(1) == '?') {
@@ -7099,20 +7119,16 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
while (((RAW == '<') && (NXT(1) == '?')) || while (((RAW == '<') && (NXT(1) == '?')) ||
((RAW == '<') && (NXT(1) == '!')) || ((RAW == '<') && (NXT(1) == '!')) ||
(RAW == '%')) { (RAW == '%')) {
int id = ctxt->input->id;
unsigned long cons = CUR_CONSUMED;
GROW; GROW;
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt); xmlParseConditionalSections(ctxt);
} else } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
xmlParseMarkupDecl(ctxt); xmlParseMarkupDecl(ctxt);
SKIP_BLANKS; } else {
if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
break; break;
} }
SKIP_BLANKS;
} }
if (RAW != 0) { if (RAW != 0) {
@@ -7947,7 +7963,8 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
* *
* DEPRECATED: Internal function, don't use. * DEPRECATED: Internal function, don't use.
* *
* parse PEReference declarations * Parse a parameter entity reference. Always consumes '%'.
*
* The entity content is handled directly by pushing it's content as * The entity content is handled directly by pushing it's content as
* a new input stream. * a new input stream.
* *
@@ -8430,14 +8447,9 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* PEReferences. * PEReferences.
* Subsequence (markupdecl | PEReference | S)* * Subsequence (markupdecl | PEReference | S)*
*/ */
SKIP_BLANKS;
while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
(ctxt->instate != XML_PARSER_EOF)) { (ctxt->instate != XML_PARSER_EOF)) {
int id = ctxt->input->id;
unsigned long cons = CUR_CONSUMED;
SKIP_BLANKS;
xmlParseMarkupDecl(ctxt);
xmlParsePEReference(ctxt);
/* /*
* Conditional sections are allowed from external entities included * Conditional sections are allowed from external entities included
@@ -8446,16 +8458,20 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt); xmlParseConditionalSections(ctxt);
} } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
xmlParseMarkupDecl(ctxt);
if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) { } else if (RAW == '%') {
xmlParsePEReference(ctxt);
} else {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"xmlParseInternalSubset: error detected in Markup declaration\n"); "xmlParseInternalSubset: error detected in"
" Markup declaration\n");
if (ctxt->inputNr > baseInputNr) if (ctxt->inputNr > baseInputNr)
xmlPopInput(ctxt); xmlPopInput(ctxt);
else else
break; break;
} }
SKIP_BLANKS;
} }
if (RAW == ']') { if (RAW == ']') {
NEXT; NEXT;

View File

@@ -12,9 +12,6 @@ A<lbbbbbbbbbbbbbbbbbbb_
./test/errors/754946.xml:4: parser error : DOCTYPE improperly terminated ./test/errors/754946.xml:4: parser error : DOCTYPE improperly terminated
<![ <![
^ ^
./test/errors/754946.xml:4: parser error : StartTag: invalid element name ./test/errors/754946.xml:4: parser error : Start tag expected, '<' not found
<![
^
./test/errors/754946.xml:4: parser error : Extra content at the end of the document
<![ <![
^ ^

View File

@@ -12,9 +12,6 @@ A<lbbbbbbbbbbbbbbbbbbb_
./test/errors/754946.xml:4: parser error : DOCTYPE improperly terminated ./test/errors/754946.xml:4: parser error : DOCTYPE improperly terminated
<![ <![
^ ^
./test/errors/754946.xml:4: parser error : StartTag: invalid element name ./test/errors/754946.xml:4: parser error : Start tag expected, '<' not found
<![
^
./test/errors/754946.xml:4: parser error : Extra content at the end of the document
<![ <![
^ ^