From 2099441f329c1c4638d8d5364d69cf12206f43b8 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 13 Mar 2023 17:51:13 +0100 Subject: [PATCH] parser: Stop calling xmlParserInputShrink Introduce xmlParserShrink which takes a parser context to simplify error handling. --- HTMLparser.c | 2 +- include/private/parser.h | 2 ++ parser.c | 13 ++------- parserInternals.c | 57 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index a4e691f0..e9084950 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -295,7 +295,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ - xmlParserInputShrink(ctxt->input) + xmlParserShrink(ctxt) #define GROW if ((ctxt->progressive == 0) && \ (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ diff --git a/include/private/parser.h b/include/private/parser.h index 22e1314a..628f5a90 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -25,5 +25,7 @@ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, const xmlChar *str2) LIBXML_ATTR_FORMAT(3,0); XML_HIDDEN int xmlParserGrow(xmlParserCtxtPtr ctxt); +XML_HIDDEN int +xmlParserShrink(xmlParserCtxtPtr ctxt); #endif /* XML_PARSER_H_PRIVATE__ */ diff --git a/parser.c b/parser.c index 44331d11..53289c7a 100644 --- a/parser.c +++ b/parser.c @@ -2065,16 +2065,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { #define SHRINK if ((ctxt->progressive == 0) && \ (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ - xmlSHRINK (ctxt); - -static void xmlSHRINK (xmlParserCtxtPtr ctxt) { - /* Don't shrink memory buffers. */ - if ((ctxt->input->buf) && - ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback))) - xmlParserInputShrink(ctxt->input); - if (*ctxt->input->cur == 0) - xmlParserGrow(ctxt); -} + xmlParserShrink(ctxt); #define GROW if ((ctxt->progressive == 0) && \ (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ @@ -11484,7 +11475,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if ((ctxt->input != NULL) && (ctxt->input->cur - ctxt->input->base > 4096)) { - xmlParserInputShrink(ctxt->input); + xmlParserShrink(ctxt); } while (ctxt->instate != XML_PARSER_EOF) { diff --git a/parserInternals.c b/parserInternals.c index 558186ae..bcf120a9 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -380,6 +380,63 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { return(ret); } +/** + * xmlParserShrink: + * @ctxt: an XML parser context + */ +int +xmlParserShrink(xmlParserCtxtPtr ctxt) { + xmlParserInputPtr in = ctxt->input; + xmlParserInputBufferPtr buf = in->buf; + size_t used; + int ret = 0; + + /* Don't shrink memory buffers. */ + if ((buf == NULL) || + ((buf->encoder == NULL) && (buf->readcallback == NULL))) + return(0); + + used = in->cur - in->base; + /* + * Do not shrink on large buffers whose only a tiny fraction + * was consumed + */ + if (used > INPUT_CHUNK) { + size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN); + + if (res > 0) { + used -= res; + if ((res > ULONG_MAX) || + (in->consumed > ULONG_MAX - (unsigned long)res)) + in->consumed = ULONG_MAX; + else + in->consumed += res; + } + } + + if (xmlBufUse(buf->buffer) < INPUT_CHUNK) + ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK); + + in->base = xmlBufContent(buf->buffer); + if (in->base == NULL) { + in->base = BAD_CAST ""; + in->cur = in->base; + in->end = in->base; + xmlErrMemory(ctxt, NULL); + return(-1); + } + in->cur = in->base + used; + in->end = xmlBufEnd(buf->buffer); + + /* TODO: Get error code from xmlParserInputBufferGrow */ + if (ret < 0) { + xmlErrInternal(ctxt, "Growing input buffer", NULL); + ctxt->instate = XML_PARSER_EOF; + } + + return(ret); +} + /** * xmlParserInputShrink: * @in: an XML parser input