1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

parser: Rely on CUR_CHAR/NEXT to grow the input buffer

The input buffer is now grown reliably when calling CUR_CHAR
(xmlCurrentChar) or NEXT (xmlNextChar). This allows to remove many
other invocations of GROW.
This commit is contained in:
Nick Wellnhofer
2023-03-17 13:15:35 +01:00
parent 62f199ed7d
commit 3e85d7b7ab

242
parser.c
View File

@@ -3201,7 +3201,6 @@ static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
int len = 0, l;
int c;
int count = 0;
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_TEXT_LENGTH :
XML_MAX_NAME_LENGTH;
@@ -3213,9 +3212,6 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
/*
* Handler for more complex cases
*/
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
c = CUR_CHAR(l);
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
/*
@@ -3264,12 +3260,6 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
((c >= 0x10000) && (c <= 0xEFFFF))
)) {
if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
}
if (len <= INT_MAX - l)
len += l;
NEXTL(l);
@@ -3291,18 +3281,14 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))) {
if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
}
if (len <= INT_MAX - l)
len += l;
NEXTL(l);
c = CUR_CHAR(l);
}
}
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
if (len > maxLength) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
return(NULL);
@@ -3393,7 +3379,6 @@ static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
int len = 0, l;
int c;
int count = 0;
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_TEXT_LENGTH :
XML_MAX_NAME_LENGTH;
@@ -3406,9 +3391,6 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
/*
* Handler for more complex cases
*/
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
startPosition = CUR_PTR - BASE_PTR;
c = CUR_CHAR(l);
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
@@ -3418,31 +3400,13 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
}
if (len <= INT_MAX - l)
len += l;
NEXTL(l);
c = CUR_CHAR(l);
if (c == 0) {
count = 0;
/*
* when shrinking to extend the buffer we really need to preserve
* the part of the name we already parsed. Hence rolling back
* by current length.
*/
ctxt->input->cur -= l;
GROW;
}
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
ctxt->input->cur += l;
c = CUR_CHAR(l);
}
}
if (len > maxLength) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
return(NULL);
@@ -3668,7 +3632,6 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
xmlChar buf[XML_MAX_NAMELEN + 5];
int len = 0, l;
int c;
int count = 0;
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_TEXT_LENGTH :
XML_MAX_NAME_LENGTH;
@@ -3677,28 +3640,12 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
nbParseNmToken++;
#endif
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
c = CUR_CHAR(l);
while (xmlIsNameChar(ctxt, c)) {
if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
}
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
if (c == 0) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
c = CUR_CHAR(l);
}
if (len >= XML_MAX_NAMELEN) {
/*
* Okay someone managed to make a huge token, so he's ready to pay
@@ -3714,14 +3661,6 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
}
memcpy(buffer, buf, len);
while (xmlIsNameChar(ctxt, c)) {
if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buffer);
return(NULL);
}
}
if (len + 10 > max) {
xmlChar *tmp;
@@ -3735,18 +3674,22 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
buffer = tmp;
}
COPY_BUF(l,buffer,len,c);
NEXTL(l);
c = CUR_CHAR(l);
if (len > maxLength) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
xmlFree(buffer);
return(NULL);
}
NEXTL(l);
c = CUR_CHAR(l);
}
buffer[len] = 0;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
return(buffer);
}
}
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
if (len == 0)
return(NULL);
if (len > maxLength) {
@@ -4237,7 +4180,6 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
XML_MAX_NAME_LENGTH;
xmlChar stop;
int state = ctxt->instate;
int count = 0;
SHRINK;
if (RAW == '"') {
@@ -4272,32 +4214,21 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
}
buf = tmp;
}
count++;
if (count > 50) {
SHRINK;
GROW;
count = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return(NULL);
}
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
cur = CUR_CHAR(l);
if (cur == 0) {
GROW;
SHRINK;
cur = CUR_CHAR(l);
}
if (len > maxLength) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
xmlFree(buf);
ctxt->instate = (xmlParserInputState) state;
return(NULL);
}
NEXTL(l);
cur = CUR_CHAR(l);
}
buf[len] = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return(NULL);
}
ctxt->instate = (xmlParserInputState) state;
if (!IS_CHAR(cur)) {
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
@@ -4330,7 +4261,6 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
XML_MAX_NAME_LENGTH;
xmlChar cur;
xmlChar stop;
int count = 0;
xmlParserInputState oldstate = ctxt->instate;
SHRINK;
@@ -4365,34 +4295,23 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
buf = tmp;
}
buf[len++] = cur;
count++;
if (count > 50) {
SHRINK;
GROW;
count = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return(NULL);
}
}
NEXT;
cur = CUR;
if (cur == 0) {
GROW;
SHRINK;
cur = CUR;
}
if (len > maxLength) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
xmlFree(buf);
return(NULL);
}
NEXT;
cur = CUR;
}
buf[len] = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return(NULL);
}
if (cur != stop) {
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
} else {
NEXT;
NEXTL(1);
}
ctxt->instate = oldstate;
return(buf);
@@ -4611,10 +4530,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
int nbchar = 0;
int cur, l;
int count = 0;
SHRINK;
GROW;
cur = CUR_CHAR(l);
while ((cur != '<') && /* checked */
(cur != '&') &&
@@ -4625,7 +4542,6 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
COPY_BUF(l,buf,nbchar,cur);
/* move current position before possible calling of ctxt->sax->characters */
NEXTL(l);
cur = CUR_CHAR(l);
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
buf[nbchar] = 0;
@@ -4650,16 +4566,12 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
/* something really bad happened in the SAX callback */
if (ctxt->instate != XML_PARSER_CONTENT)
return;
}
count++;
if (count > 50) {
SHRINK;
GROW;
count = 0;
}
cur = CUR_CHAR(l);
}
if (ctxt->instate == XML_PARSER_EOF)
return;
}
}
if (nbchar != 0) {
buf[nbchar] = 0;
/*
@@ -4784,7 +4696,6 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
int q, ql;
int r, rl;
int cur, l;
size_t count = 0;
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_HUGE_LENGTH :
XML_MAX_TEXT_LENGTH;
@@ -4801,7 +4712,6 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
return;
}
}
GROW; /* Assure there's enough input data */
q = CUR_CHAR(ql);
if (q == 0)
goto not_terminated;
@@ -4848,37 +4758,27 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
size = new_size;
}
COPY_BUF(ql,buf,len,q);
q = r;
ql = rl;
r = cur;
rl = l;
count++;
if (count > 50) {
SHRINK;
GROW;
count = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return;
}
}
NEXTL(l);
cur = CUR_CHAR(l);
if (cur == 0) {
SHRINK;
GROW;
cur = CUR_CHAR(l);
}
if (len > maxLength) {
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment too big found", NULL);
xmlFree (buf);
return;
}
q = r;
ql = rl;
r = cur;
rl = l;
NEXTL(l);
cur = CUR_CHAR(l);
}
buf[len] = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return;
}
if (cur == 0) {
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated \n<!--%.50s\n", buf);
@@ -5222,7 +5122,6 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
int cur, l;
const xmlChar *target;
xmlParserInputState state;
int count = 0;
if ((RAW == '<') && (NXT(1) == '?')) {
int inputid = ctxt->input->id;
@@ -5285,24 +5184,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
buf = tmp;
size = new_size;
}
count++;
if (count > 50) {
SHRINK;
GROW;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return;
}
count = 0;
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
cur = CUR_CHAR(l);
if (cur == 0) {
SHRINK;
GROW;
cur = CUR_CHAR(l);
}
if (len > maxLength) {
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
"PI %s too big found", target);
@@ -5310,8 +5192,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
ctxt->instate = state;
return;
}
NEXTL(l);
cur = CUR_CHAR(l);
}
buf[len] = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return;
}
if (cur != '?') {
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
"ParsePI: PI %s never end ...\n", target);
@@ -8143,7 +8031,6 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlParserInputPtr input;
xmlBufferPtr buf;
int l, c;
int count = 0;
if ((ctxt == NULL) || (entity == NULL) ||
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
@@ -8189,26 +8076,13 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
(IS_CHAR(c))) {
xmlBufferAdd(buf, ctxt->input->cur, l);
if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF) {
xmlBufferFree(buf);
return(-1);
}
}
NEXTL(l);
c = CUR_CHAR(l);
if (c == 0) {
count = 0;
GROW;
}
if (ctxt->instate == XML_PARSER_EOF) {
xmlBufferFree(buf);
return(-1);
}
c = CUR_CHAR(l);
}
}
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
@@ -9850,7 +9724,6 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
int r, rl;
int s, sl;
int cur, l;
int count = 0;
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
XML_MAX_HUGE_LENGTH :
XML_MAX_TEXT_LENGTH;
@@ -9896,28 +9769,23 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
size *= 2;
}
COPY_BUF(rl,buf,len,r);
r = s;
rl = sl;
s = cur;
sl = l;
count++;
if (count > 50) {
SHRINK;
GROW;
if (ctxt->instate == XML_PARSER_EOF) {
goto out;
}
count = 0;
}
NEXTL(l);
cur = CUR_CHAR(l);
if (len > maxLength) {
xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
"CData section too big found\n");
goto out;
}
r = s;
rl = sl;
s = cur;
sl = l;
NEXTL(l);
cur = CUR_CHAR(l);
}
buf[len] = 0;
if (ctxt->instate == XML_PARSER_EOF) {
xmlFree(buf);
return;
}
if (cur != '>') {
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
"CData section not finished\n%.50s\n", buf);