From 3e85d7b7ab511b67d54ccf5dfd2f5953050eec83 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Fri, 17 Mar 2023 13:15:35 +0100 Subject: [PATCH] parser: Rely on CUR_CHAR/NEXT to grow the input buffer The input buffer is now grown reliably when calling CUR_CHAR (xmlCurrentChar) or NEXT (xmlNextChar). This allows to remove many other invocations of GROW. --- parser.c | 252 +++++++++++++------------------------------------------ 1 file changed, 60 insertions(+), 192 deletions(-) diff --git a/parser.c b/parser.c index 98fb87cc..f69231d5 100644 --- a/parser.c +++ b/parser.c @@ -3201,7 +3201,6 @@ static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt) { int len = 0, l; int c; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; @@ -3213,9 +3212,6 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { /* * Handler for more complex cases */ - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); c = CUR_CHAR(l); if ((ctxt->options & XML_PARSE_OLD10) == 0) { /* @@ -3264,12 +3260,6 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { ((c >= 0xFDF0) && (c <= 0xFFFD)) || ((c >= 0x10000) && (c <= 0xEFFFF)) )) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } if (len <= INT_MAX - l) len += l; NEXTL(l); @@ -3291,18 +3281,14 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } if (len <= INT_MAX - l) len += l; NEXTL(l); c = CUR_CHAR(l); } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); return(NULL); @@ -3393,7 +3379,6 @@ static const xmlChar * xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { int len = 0, l; int c; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; @@ -3406,9 +3391,6 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { /* * Handler for more complex cases */ - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); startPosition = CUR_PTR - BASE_PTR; c = CUR_CHAR(l); if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ @@ -3418,31 +3400,13 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ (xmlIsNameChar(ctxt, c) && (c != ':'))) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } if (len <= INT_MAX - l) len += l; NEXTL(l); c = CUR_CHAR(l); - if (c == 0) { - count = 0; - /* - * when shrinking to extend the buffer we really need to preserve - * the part of the name we already parsed. Hence rolling back - * by current length. - */ - ctxt->input->cur -= l; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - ctxt->input->cur += l; - c = CUR_CHAR(l); - } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); return(NULL); @@ -3668,7 +3632,6 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int c; - int count = 0; int maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_TEXT_LENGTH : XML_MAX_NAME_LENGTH; @@ -3677,28 +3640,12 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { nbParseNmToken++; #endif - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); c = CUR_CHAR(l); while (xmlIsNameChar(ctxt, c)) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); - if (c == 0) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - } if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay @@ -3714,14 +3661,6 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } memcpy(buffer, buf, len); while (xmlIsNameChar(ctxt, c)) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buffer); - return(NULL); - } - } if (len + 10 > max) { xmlChar *tmp; @@ -3735,18 +3674,22 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { buffer = tmp; } COPY_BUF(l,buffer,len,c); - NEXTL(l); - c = CUR_CHAR(l); if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); xmlFree(buffer); return(NULL); } + NEXTL(l); + c = CUR_CHAR(l); } buffer[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); return(buffer); } } + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (len == 0) return(NULL); if (len > maxLength) { @@ -4237,7 +4180,6 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { XML_MAX_NAME_LENGTH; xmlChar stop; int state = ctxt->instate; - int count = 0; SHRINK; if (RAW == '"') { @@ -4272,32 +4214,21 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { } buf = tmp; } - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - } COPY_BUF(l,buf,len,cur); - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - GROW; - SHRINK; - cur = CUR_CHAR(l); - } if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); xmlFree(buf); ctxt->instate = (xmlParserInputState) state; return(NULL); } + NEXTL(l); + cur = CUR_CHAR(l); } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } ctxt->instate = (xmlParserInputState) state; if (!IS_CHAR(cur)) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); @@ -4330,7 +4261,6 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { XML_MAX_NAME_LENGTH; xmlChar cur; xmlChar stop; - int count = 0; xmlParserInputState oldstate = ctxt->instate; SHRINK; @@ -4365,34 +4295,23 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { buf = tmp; } buf[len++] = cur; - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - } - NEXT; - cur = CUR; - if (cur == 0) { - GROW; - SHRINK; - cur = CUR; - } if (len > maxLength) { xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); xmlFree(buf); return(NULL); } + NEXT; + cur = CUR; } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } if (cur != stop) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); } else { - NEXT; + NEXTL(1); } ctxt->instate = oldstate; return(buf); @@ -4611,10 +4530,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur, l; - int count = 0; SHRINK; - GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ (cur != '&') && @@ -4625,7 +4542,6 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { COPY_BUF(l,buf,nbchar,cur); /* move current position before possible calling of ctxt->sax->characters */ NEXTL(l); - cur = CUR_CHAR(l); if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { buf[nbchar] = 0; @@ -4650,16 +4566,12 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) { /* something really bad happened in the SAX callback */ if (ctxt->instate != XML_PARSER_CONTENT) return; + SHRINK; } - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) - return; - } + cur = CUR_CHAR(l); } + if (ctxt->instate == XML_PARSER_EOF) + return; if (nbchar != 0) { buf[nbchar] = 0; /* @@ -4784,7 +4696,6 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int q, ql; int r, rl; int cur, l; - size_t count = 0; size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_HUGE_LENGTH : XML_MAX_TEXT_LENGTH; @@ -4801,7 +4712,6 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, return; } } - GROW; /* Assure there's enough input data */ q = CUR_CHAR(ql); if (q == 0) goto not_terminated; @@ -4848,37 +4758,27 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, size = new_size; } COPY_BUF(ql,buf,len,q); - q = r; - ql = rl; - r = cur; - rl = l; - - count++; - if (count > 50) { - SHRINK; - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } - } - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - SHRINK; - GROW; - cur = CUR_CHAR(l); - } - if (len > maxLength) { xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment too big found", NULL); xmlFree (buf); return; } + + q = r; + ql = rl; + r = cur; + rl = l; + + NEXTL(l); + cur = CUR_CHAR(l); + } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } if (cur == 0) { xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment not terminated \n