From 59b33661784359c6d3a8309ddbd2129fb2688548 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 27 Dec 2022 14:15:51 +0100 Subject: [PATCH] error: Limit number of parser errors Reporting errors is expensive and some abusive test cases can generate an error for each invalid input byte. This causes the parser to spend most of the time with error handling. Limit the number of errors and warnings to 100. --- HTMLparser.c | 5 +++++ error.c | 27 +++++++++++++++++++++------ include/libxml/parser.h | 2 ++ parser.c | 12 ++++++++++++ testchar.c | 4 ++++ 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index b716f63c..a53f7083 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -6790,6 +6790,11 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt) xmlHashFree(ctxt->attsSpecial, NULL); ctxt->attsSpecial = NULL; } + + ctxt->nbErrors = 0; + ctxt->nbWarnings = 0; + if (ctxt->lastError.code != XML_ERR_OK) + xmlResetError(&ctxt->lastError); } /** diff --git a/error.c b/error.c index c67e4292..eb6c92b3 100644 --- a/error.c +++ b/error.c @@ -18,6 +18,8 @@ #include "private/error.h" +#define XML_MAX_ERRORS 100 + #define XML_GET_VAR_STR(msg, str) { \ int size, prev_size = -1; \ int chars; \ @@ -485,12 +487,25 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, (domain == XML_FROM_DTD) || (domain == XML_FROM_NAMESPACE) || (domain == XML_FROM_IO) || (domain == XML_FROM_VALID)) { ctxt = (xmlParserCtxtPtr) ctx; - if ((schannel == NULL) && (ctxt != NULL) && (ctxt->sax != NULL) && - (ctxt->sax->initialized == XML_SAX2_MAGIC) && - (ctxt->sax->serror != NULL)) { - schannel = ctxt->sax->serror; - data = ctxt->userData; - } + + if (ctxt != NULL) { + if (level == XML_ERR_WARNING) { + if (ctxt->nbWarnings >= XML_MAX_ERRORS) + return; + ctxt->nbWarnings += 1; + } else { + if (ctxt->nbErrors >= XML_MAX_ERRORS) + return; + ctxt->nbErrors += 1; + } + + if ((schannel == NULL) && (ctxt->sax != NULL) && + (ctxt->sax->initialized == XML_SAX2_MAGIC) && + (ctxt->sax->serror != NULL)) { + schannel = ctxt->sax->serror; + data = ctxt->userData; + } + } } /* * Check if structured error handler set diff --git a/include/libxml/parser.h b/include/libxml/parser.h index f1493dfd..6af43ffe 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -310,6 +310,8 @@ struct _xmlParserCtxt { unsigned long sizeentcopy; /* volume of entity copy */ int endCheckState; /* quote state for push parser */ + unsigned short nbErrors; /* number of errors */ + unsigned short nbWarnings; /* number of warnings */ }; /** diff --git a/parser.c b/parser.c index 5dd17a42..28b95200 100644 --- a/parser.c +++ b/parser.c @@ -12848,6 +12848,10 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL, oldctxt); if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); + if (oldctxt != NULL) { + ctxt->nbErrors = oldctxt->nbErrors; + ctxt->nbWarnings = oldctxt->nbWarnings; + } xmlDetectSAX2(ctxt); newDoc = xmlNewDoc(BAD_CAST "1.0"); @@ -13012,6 +13016,8 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, ctxt->dict = NULL; ctxt->attsDefault = NULL; ctxt->attsSpecial = NULL; + oldctxt->nbErrors = ctxt->nbErrors; + oldctxt->nbWarnings = ctxt->nbWarnings; oldctxt->validate = ctxt->validate; oldctxt->valid = ctxt->valid; oldctxt->node_seq.maximum = ctxt->node_seq.maximum; @@ -13138,6 +13144,8 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, ctxt = xmlCreateMemoryParserCtxt((char *) string, size); if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); + ctxt->nbErrors = oldctxt->nbErrors; + ctxt->nbWarnings = oldctxt->nbWarnings; if (user_data != NULL) ctxt->userData = user_data; else @@ -13269,6 +13277,8 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); } + oldctxt->nbErrors = ctxt->nbErrors; + oldctxt->nbWarnings = ctxt->nbWarnings; ctxt->sax = oldsax; ctxt->dict = NULL; ctxt->attsDefault = NULL; @@ -14710,6 +14720,8 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) if (ctxt->catalogs != NULL) xmlCatalogFreeLocal(ctxt->catalogs); #endif + ctxt->nbErrors = 0; + ctxt->nbWarnings = 0; if (ctxt->lastError.code != XML_ERR_OK) xmlResetError(&ctxt->lastError); } diff --git a/testchar.c b/testchar.c index 4fd68c0d..b612cbdb 100644 --- a/testchar.c +++ b/testchar.c @@ -270,6 +270,7 @@ static int testCharRangeByte1(xmlParserCtxtPtr ctxt) { for (i = 0;i <= 0xFF;i++) { data[0] = (char) i; ctxt->charset = XML_CHAR_ENCODING_UTF8; + ctxt->nbErrors = 0; lastError = 0; c = xmlCurrentChar(ctxt, &len); @@ -305,6 +306,7 @@ static int testCharRangeByte2(xmlParserCtxtPtr ctxt) { data[0] = (char) i; data[1] = (char) j; ctxt->charset = XML_CHAR_ENCODING_UTF8; + ctxt->nbErrors = 0; lastError = 0; c = xmlCurrentChar(ctxt, &len); @@ -398,6 +400,7 @@ static int testCharRangeByte3(xmlParserCtxtPtr ctxt) { data[2] = (char) K; value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12); ctxt->charset = XML_CHAR_ENCODING_UTF8; + ctxt->nbErrors = 0; lastError = 0; c = xmlCurrentChar(ctxt, &len); @@ -500,6 +503,7 @@ static int testCharRangeByte4(xmlParserCtxtPtr ctxt) { value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) + ((i & 0x7) << 18); ctxt->charset = XML_CHAR_ENCODING_UTF8; + ctxt->nbErrors = 0; lastError = 0; c = xmlCurrentChar(ctxt, &len);