diff --git a/SAX2.c b/SAX2.c index c825f73c..693a9ba3 100644 --- a/SAX2.c +++ b/SAX2.c @@ -388,6 +388,8 @@ xmlSAX2ExternalSubset(void *ctx, const xmlChar *name, int oldcharset; const xmlChar *oldencoding; int oldprogressive; + unsigned long consumed; + size_t buffered; /* * Ask the Entity resolver to load the damn thing @@ -460,6 +462,18 @@ xmlSAX2ExternalSubset(void *ctx, const xmlChar *name, while (ctxt->inputNr > 1) xmlPopInput(ctxt); + + consumed = ctxt->input->consumed; + buffered = ctxt->input->cur - ctxt->input->base; + if (buffered > ULONG_MAX - consumed) + consumed = ULONG_MAX; + else + consumed += buffered; + if (consumed > ULONG_MAX - ctxt->sizeentities) + ctxt->sizeentities = ULONG_MAX; + else + ctxt->sizeentities += consumed; + xmlFreeInputStream(ctxt->input); xmlFree(ctxt->inputTab); diff --git a/entities.c b/entities.c index cdc6c1ad..0d4622f8 100644 --- a/entities.c +++ b/entities.c @@ -38,35 +38,35 @@ static xmlEntity xmlEntityLt = { NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "<", BAD_CAST "<", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0, 1, 0 + NULL, NULL, NULL, NULL, 0, 1, 0, 0 }; static xmlEntity xmlEntityGt = { NULL, XML_ENTITY_DECL, BAD_CAST "gt", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST ">", BAD_CAST ">", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0, 1, 0 + NULL, NULL, NULL, NULL, 0, 1, 0, 0 }; static xmlEntity xmlEntityAmp = { NULL, XML_ENTITY_DECL, BAD_CAST "amp", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "&", BAD_CAST "&", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0, 1, 0 + NULL, NULL, NULL, NULL, 0, 1, 0, 0 }; static xmlEntity xmlEntityQuot = { NULL, XML_ENTITY_DECL, BAD_CAST "quot", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "\"", BAD_CAST "\"", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0, 1, 0 + NULL, NULL, NULL, NULL, 0, 1, 0, 0 }; static xmlEntity xmlEntityApos = { NULL, XML_ENTITY_DECL, BAD_CAST "apos", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "'", BAD_CAST "'", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0, 1, 0 + NULL, NULL, NULL, NULL, 0, 1, 0, 0 }; /** diff --git a/include/libxml/entities.h b/include/libxml/entities.h index 797f5451..27bf9370 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -61,6 +61,7 @@ struct _xmlEntity { * references done from that entity * and if it contains '<' */ int flags; /* various flags */ + unsigned long expandedSize; /* expanded size */ }; /* diff --git a/include/libxml/parser.h b/include/libxml/parser.h index d104b874..c8c3d66d 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -61,17 +61,14 @@ struct _xmlParserInput { int length; /* length if known */ int line; /* Current line */ int col; /* Current column */ - /* - * NOTE: consumed is only tested for equality in the parser code, - * so even if there is an overflow this should not give troubles - * for parsing very large instances. - */ unsigned long consumed; /* How many xmlChars already consumed */ xmlParserInputDeallocate free; /* function to deallocate the base */ const xmlChar *encoding; /* the encoding string for entity */ const xmlChar *version; /* the version string for entity */ int standalone; /* Was that entity marked standalone */ int id; /* an unique identifier for the entity */ + unsigned long parentConsumed; /* consumed bytes from parents */ + xmlEntityPtr entity; /* entity, if any */ }; /** diff --git a/parser.c b/parser.c index 64f9dacd..214c8c20 100644 --- a/parser.c +++ b/parser.c @@ -95,9 +95,6 @@ struct _xmlStartTag { int nsNr; }; -static void -xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); - static xmlParserCtxtPtr xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData, const xmlChar *URL, const xmlChar *ID, const xmlChar *base, @@ -130,144 +127,7 @@ xmlParseElementEnd(xmlParserCtxtPtr ctxt); */ #define XML_PARSER_NON_LINEAR 10 -/* - * xmlParserEntityCheck - * - * Function to check non-linear entity expansion behaviour - * This is here to detect and stop exponential linear entity expansion - * This is not a limitation of the parser but a safety - * boundary feature. It can be disabled with the XML_PARSE_HUGE - * parser option. - */ -static int -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, - xmlEntityPtr ent, size_t replacement) -{ - size_t consumed = 0; - int i; - - if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) - return (0); - if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) - return (1); - - /* - * This may look absurd but is needed to detect - * entities problems - */ - if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL) && (ent->checked == 0) && - (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { - unsigned long oldnbent = ctxt->nbentities; - xmlChar *rep; - - ent->checked = 1; - - ++ctxt->depth; - rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, 0, 0, 0); - --ctxt->depth; - if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) { - ent->content[0] = 0; - } - - ent->checked = ctxt->nbentities - oldnbent + 1; - if (rep != NULL) { - xmlFree(rep); - rep = NULL; - } - } - - /* - * Prevent entity exponential check, not just replacement while - * parsing the DTD - * The check is potentially costly so do that only once in a thousand - */ - if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && - (ctxt->nbentities % 1024 == 0)) { - for (i = 0;i < ctxt->inputNr;i++) { - consumed += ctxt->inputTab[i]->consumed + - (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); - } - if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - ctxt->instate = XML_PARSER_EOF; - return (1); - } - consumed = 0; - } - - - - if (replacement != 0) { - if (replacement < XML_MAX_TEXT_LENGTH) - return(0); - - /* - * If the volume of entity copy reaches 10 times the - * amount of parsed data and over the large text threshold - * then that's very likely to be an abuse. - */ - if (ctxt->input != NULL) { - consumed = ctxt->input->consumed + - (ctxt->input->cur - ctxt->input->base); - } - consumed += ctxt->sizeentities; - - if (replacement < XML_PARSER_NON_LINEAR * consumed) - return(0); - } else if (size != 0) { - /* - * Do the check based on the replacement size of the entity - */ - if (size < XML_PARSER_BIG_ENTITY) - return(0); - - /* - * A limit on the amount of text data reasonably used - */ - if (ctxt->input != NULL) { - consumed = ctxt->input->consumed + - (ctxt->input->cur - ctxt->input->base); - } - consumed += ctxt->sizeentities; - - if ((size < XML_PARSER_NON_LINEAR * consumed) && - (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) - return (0); - } else if (ent != NULL) { - /* - * use the number of parsed entities in the replacement - */ - size = ent->checked; - - /* - * The amount of data parsed counting entities size only once - */ - if (ctxt->input != NULL) { - consumed = ctxt->input->consumed + - (ctxt->input->cur - ctxt->input->base); - } - consumed += ctxt->sizeentities; - - /* - * Check the density of entities for the amount of data - * knowing an entity reference will take at least 3 bytes - */ - if (size * 3 < consumed * XML_PARSER_NON_LINEAR) - return (0); - } else { - /* - * strange we got no data for checking - */ - if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && - (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || - (ctxt->nbentities <= 10000)) - return (0); - } - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return (1); -} +#define XML_ENT_FIXED_COST 50 /** * xmlParserMaxDepth: @@ -864,6 +724,83 @@ xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, info1, info2, info3); } +static void +xmlSaturatedAdd(unsigned long *dst, unsigned long val) { + if (val > ULONG_MAX - *dst) + *dst = ULONG_MAX; + else + *dst += val; +} + +static void +xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) { + if (val > ULONG_MAX - *dst) + *dst = ULONG_MAX; + else + *dst += val; +} + +/** + * xmlParserEntityCheck: + * @ctxt: parser context + * @extra: sum of unexpanded entity sizes + * + * Check for non-linear entity expansion behaviour. + * + * In some cases like xmlStringDecodeEntities, this function is called + * for each, possibly nested entity and its unexpanded content length. + * + * In other cases like xmlParseReference, it's only called for each + * top-level entity with its unexpanded content length plus the sum of + * the unexpanded content lengths (plus fixed cost) of all nested + * entities. + * + * Summing the unexpanded lengths also adds the length of the reference. + * This is by design. Taking the length of the entity name into account + * discourages attacks that try to waste CPU time with abusively long + * entity names. See test/recurse/lol6.xml for example. Each call also + * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with + * short entities. + * + * Returns 1 on error, 0 on success. + */ +static int +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra) +{ + unsigned long consumed; + + /* + * Compute total consumed bytes so far, including input streams of + * external entities. + */ + consumed = ctxt->input->parentConsumed; + xmlSaturatedAdd(&consumed, ctxt->input->consumed); + xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); + xmlSaturatedAdd(&consumed, ctxt->sizeentities); + + /* + * Add extra cost and some fixed cost. + */ + xmlSaturatedAdd(&ctxt->sizeentcopy, extra); + xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST); + + /* + * It's important to always use saturation arithmetic when tracking + * entity sizes to make the size checks reliable. If "sizeentcopy" + * overflows, we have to abort. + */ + if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) && + ((ctxt->sizeentcopy >= ULONG_MAX) || + (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) { + xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP, + "Maximum entity amplification factor exceeded"); + xmlHaltParser(ctxt); + return(1); + } + + return(0); +} + /************************************************************************ * * * Library wide options * @@ -2231,8 +2168,30 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { break; xmlParsePEReference(ctxt); } else if (CUR == 0) { + unsigned long consumed; + xmlEntityPtr ent; + if (ctxt->inputNr <= 1) break; + + consumed = ctxt->input->consumed; + xmlSaturatedAddSizeT(&consumed, + ctxt->input->cur - ctxt->input->base); + + /* + * Add to sizeentities when parsing an external entity + * for the first time. + */ + ent = ctxt->input->entity; + if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + ((ent->flags & XML_ENT_PARSED) == 0)) { + ent->flags |= XML_ENT_PARSED; + + xmlSaturatedAdd(&ctxt->sizeentities, consumed); + } + + xmlParserEntityCheck(ctxt, consumed); + xmlPopInput(ctxt); } else { break; @@ -2631,7 +2590,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { } /** - * xmlStringLenDecodeEntities: + * xmlStringDecodeEntitiesInt: * @ctxt: the parser context * @str: the input string * @len: the string length @@ -2639,19 +2598,12 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * - * Takes a entity string content and process to do the adequate substitutions. - * - * [67] Reference ::= EntityRef | CharRef - * - * [69] PEReference ::= '%' Name ';' - * - * Returns A newly allocated string with the substitution done. The caller - * must deallocate it ! + * @check: whether to perform entity checks */ -xmlChar * -xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, - int what, xmlChar end, xmlChar end2, xmlChar end3) { +static xmlChar * +xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, + int what, xmlChar end, xmlChar end2, xmlChar end3, + int check) { xmlChar *buffer = NULL; size_t buffer_size = 0; size_t nbchars = 0; @@ -2662,8 +2614,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, xmlEntityPtr ent; int c,l; - if ((ctxt == NULL) || (str == NULL) || (len < 0)) - return(NULL); + if (str == NULL) + return(NULL); last = str + len; if (((ctxt->depth > 40) && @@ -2707,7 +2659,6 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, "String decoding Entity Reference: %.30s\n", str); ent = xmlParseStringEntityRef(ctxt, &str); - xmlParserEntityCheck(ctxt, 0, ent, 0); if (ent != NULL) ctxt->nbentities += ent->checked; if ((ent != NULL) && @@ -2723,9 +2674,12 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, goto int_error; } } else if ((ent != NULL) && (ent->content != NULL)) { + if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) + goto int_error; + ctxt->depth++; - rep = xmlStringDecodeEntities(ctxt, ent->content, what, - 0, 0, 0); + rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, + ent->length, what, 0, 0, 0, check); ctxt->depth--; if (rep == NULL) { ent->content[0] = 0; @@ -2736,8 +2690,6 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) - goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } @@ -2760,7 +2712,6 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, xmlGenericError(xmlGenericErrorContext, "String decoding PE Reference: %.30s\n", str); ent = xmlParseStringPEReference(ctxt, &str); - xmlParserEntityCheck(ctxt, 0, ent, 0); if (ent != NULL) ctxt->nbentities += ent->checked; if (ent != NULL) { @@ -2781,9 +2732,13 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, ent->name, NULL); } } + + if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) + goto int_error; + ctxt->depth++; - rep = xmlStringDecodeEntities(ctxt, ent->content, what, - 0, 0, 0); + rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, + ent->length, what, 0, 0, 0, check); ctxt->depth--; if (rep == NULL) { if (ent->content != NULL) @@ -2794,8 +2749,6 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) - goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } @@ -2827,6 +2780,35 @@ int_error: return(NULL); } +/** + * xmlStringLenDecodeEntities: + * @ctxt: the parser context + * @str: the input string + * @len: the string length + * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Takes a entity string content and process to do the adequate substitutions. + * + * [67] Reference ::= EntityRef | CharRef + * + * [69] PEReference ::= '%' Name ';' + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, + int what, xmlChar end, xmlChar end2, + xmlChar end3) { + if ((ctxt == NULL) || (str == NULL) || (len < 0)) + return(NULL); + return(xmlStringDecodeEntitiesInt(ctxt, str, len, what, + end, end2, end3, 0)); +} + /** * xmlStringDecodeEntities: * @ctxt: the parser context @@ -2849,8 +2831,8 @@ xmlChar * xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, xmlChar end, xmlChar end2, xmlChar end3) { if ((ctxt == NULL) || (str == NULL)) return(NULL); - return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, - end, end2, end3)); + return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what, + end, end2, end3, 0)); } /************************************************************************ @@ -3903,9 +3885,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { * so XML_SUBSTITUTE_REF is not set here. */ ++ctxt->depth; - ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, - 0, 0, 0); + ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF, + 0, 0, 0, /* check */ 1); --ctxt->depth; + if (orig != NULL) { *orig = buf; buf = NULL; @@ -4024,10 +4007,13 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } else if ((ent != NULL) && (ctxt->replaceEntities != 0)) { if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { + if (xmlParserEntityCheck(ctxt, ent->length)) + goto error; + ++ctxt->depth; - rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, - 0, 0, 0); + rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, + ent->length, XML_SUBSTITUTE_REF, 0, 0, 0, + /* check */ 1); --ctxt->depth; if (rep != NULL) { current = rep; @@ -4057,24 +4043,40 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { const xmlChar *cur = ent->name; /* - * This may look absurd but is needed to detect - * entities problems + * We also check for recursion and amplification + * when entities are not substituted. They're + * often expanded later. */ if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL) && (ent->checked == 0)) { - unsigned long oldnbent = ctxt->nbentities; + (ent->content != NULL)) { + if (ent->checked == 0) { + unsigned long oldnbent = ctxt->nbentities; + unsigned long oldCopy = ctxt->sizeentcopy; - ++ctxt->depth; - rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, 0, 0, 0); - --ctxt->depth; + ctxt->sizeentcopy = ent->length; - ent->checked = ctxt->nbentities - oldnbent + 1; - if (rep != NULL) { - xmlFree(rep); - rep = NULL; - } else { - ent->content[0] = 0; + ++ctxt->depth; + rep = xmlStringDecodeEntitiesInt(ctxt, + ent->content, ent->length, + XML_SUBSTITUTE_REF, 0, 0, 0, + /* check */ 1); + --ctxt->depth; + + ent->checked = ctxt->nbentities - oldnbent + 1; + ent->expandedSize = ctxt->sizeentcopy; + + if (rep != NULL) { + xmlFree(rep); + rep = NULL; + } else { + ent->content[0] = 0; + } + + if (xmlParserEntityCheck(ctxt, oldCopy)) + goto error; + } else { + if (xmlParserEntityCheck(ctxt, ent->expandedSize)) + goto error; } } @@ -7224,6 +7226,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { unsigned long oldnbent = ctxt->nbentities; + unsigned long oldsizeentcopy = ctxt->sizeentcopy; /* * This is a bit hackish but this seems the best @@ -7236,6 +7239,9 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { else user_data = ctxt->userData; + /* Avoid overflow as much as possible */ + ctxt->sizeentcopy = 0; + /* * Check that this entity is well formed * 4.3.2: An internal general parsed entity is well-formed @@ -7261,6 +7267,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { } ent->flags |= XML_ENT_PARSED; + ent->expandedSize = ctxt->sizeentcopy; /* * Store the number of entities needing parsing for this entity * content and do checkings @@ -7272,7 +7279,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlFreeNodeList(list); return; } - if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { + if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) { xmlFreeNodeList(list); return; } @@ -7324,7 +7331,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { "Entity '%s' failed to parse\n", ent->name); if (ent->content != NULL) ent->content[0] = 0; - xmlParserEntityCheck(ctxt, 0, ent, 0); } else if (list != NULL) { xmlFreeNodeList(list); list = NULL; @@ -7369,11 +7375,16 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ctxt->depth--; } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + unsigned long oldsizeentities = ctxt->sizeentities; + ctxt->depth++; ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, user_data, ctxt->depth, ent->URI, ent->ExternalID, NULL); ctxt->depth--; + + /* Undo the change to sizeentities */ + ctxt->sizeentities = oldsizeentities; } else { ret = XML_ERR_ENTITY_PE_INTERNAL; xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, @@ -7383,6 +7394,8 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return; } + if (xmlParserEntityCheck(ctxt, 0)) + return; } if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { @@ -7395,6 +7408,14 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { return; } + /* + * We also check for amplification if entities aren't substituted. + * They might be expanded later. + */ + if ((was_checked != 0) && + (xmlParserEntityCheck(ctxt, ent->expandedSize))) + return; + /* * If we didn't get any children for the entity being built */ @@ -7431,13 +7452,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { (ctxt->parseMode == XML_PARSE_READER)) { xmlNodePtr nw = NULL, cur, firstChild = NULL; - /* - * We are copying here, make sure there is no abuse - */ - ctxt->sizeentcopy += ent->length + 5; - if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) - return; - /* * when operating on a reader, the entities definitions * are always owning the entities subtree. @@ -7479,13 +7493,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlNodePtr nw = NULL, cur, next, last, firstChild = NULL; - /* - * We are copying here, make sure there is no abuse - */ - ctxt->sizeentcopy += ent->length + 5; - if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) - return; - /* * Copy the entity child list and make it the new * entity child list. The goal is to make sure any @@ -7670,7 +7677,6 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { ctxt->sax->reference(ctxt->userData, name); } } - xmlParserEntityCheck(ctxt, 0, ent, 0); ctxt->valid = 0; } @@ -7867,7 +7873,6 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { "Entity '%s' not defined\n", name); } - xmlParserEntityCheck(ctxt, 0, ent, 0); /* TODO ? check regressions ctxt->valid = 0; */ } @@ -8041,7 +8046,6 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) name, NULL); ctxt->valid = 0; } - xmlParserEntityCheck(ctxt, 0, NULL, 0); } else { /* * Internal checking in case the entity quest barfed @@ -8054,9 +8058,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) } else { xmlChar start[4]; xmlCharEncoding enc; - - if (xmlParserEntityCheck(ctxt, 0, entity, 0)) - return; + unsigned long parentConsumed; if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && ((ctxt->options & XML_PARSE_NOENT) == 0) && @@ -8067,12 +8069,19 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) (ctxt->validate == 0)) return; + parentConsumed = ctxt->input->parentConsumed; + xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed); + xmlSaturatedAddSizeT(&parentConsumed, + ctxt->input->cur - ctxt->input->base); + input = xmlNewEntityInputStream(ctxt, entity); if (xmlPushInput(ctxt, input) < 0) { xmlFreeInputStream(input); return; } + input->parentConsumed = parentConsumed; + if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) { /* * Get the 4 first bytes and decode the charset @@ -8191,6 +8200,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { } if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { + xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed); xmlPopInput(ctxt); } else if (!IS_CHAR(c)) { xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, @@ -8200,6 +8210,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { return(-1); } entity->content = buf->content; + entity->length = buf->use; buf->content = NULL; xmlBufferFree(buf); @@ -8308,7 +8319,6 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { name, NULL); ctxt->valid = 0; } - xmlParserEntityCheck(ctxt, 0, NULL, 0); } else { /* * Internal checking in case the entity quest barfed @@ -9224,6 +9234,8 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, NEXT; SKIP_BLANKS; val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); + if (val == NULL) + return (NULL); if (normalize) { /* * Sometimes a second normalisation pass for spaces is needed @@ -12972,8 +12984,15 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, * Also record the size of the entity parsed */ if (ctxt->input != NULL && oldctxt != NULL) { - oldctxt->sizeentities += ctxt->input->consumed; - oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); + unsigned long consumed = ctxt->input->consumed; + + xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); + + xmlSaturatedAdd(&oldctxt->sizeentities, consumed); + xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities); + + xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed); + xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); } /* * And record the last error if any @@ -13235,6 +13254,18 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, if (oldctxt != NULL) oldctxt->nbentities += ctxt->nbentities; + /* + * Also record the size of the entity parsed + */ + if (ctxt->input != NULL && oldctxt != NULL) { + unsigned long consumed = ctxt->input->consumed; + + xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base); + + xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed); + xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy); + } + /* * Also record the last error if any */ diff --git a/parserInternals.c b/parserInternals.c index c547a207..92490ebc 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1306,8 +1306,11 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { break; case XML_EXTERNAL_GENERAL_PARSED_ENTITY: case XML_EXTERNAL_PARAMETER_ENTITY: - return(xmlLoadExternalEntity((char *) entity->URI, - (char *) entity->ExternalID, ctxt)); + input = xmlLoadExternalEntity((char *) entity->URI, + (char *) entity->ExternalID, ctxt); + if (input != NULL) + input->entity = entity; + return(input); case XML_INTERNAL_GENERAL_ENTITY: xmlErrInternal(ctxt, "Internal entity %s without content !\n", @@ -1338,6 +1341,7 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { input->cur = entity->content; input->length = entity->length; input->end = &entity->content[input->length]; + input->entity = entity; return(input); } diff --git a/testrecurse.c b/testrecurse.c index b8b6ab94..bc741239 100644 --- a/testrecurse.c +++ b/testrecurse.c @@ -160,7 +160,9 @@ static const char *start = " \ "; -static const char *segment = " &e; &f; &d;\n"; +static const char *segment = + " &e; &f; &d;\n" + " _123456789_123456789_123456789_123456789\n"; static const char *finish = ""; static int curseg = 0;