diff --git a/parserInternals.c b/parserInternals.c index af7ba45c..a805a13f 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1340,7 +1340,16 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input, input->consumed += processed; in->rawconsumed = processed; - nbchars = 4000 /* MINLEN */; + /* + * If we're push-parsing, we must convert the whole buffer. + * + * If we're pull-parsing, we could be parsing from a huge + * memory buffer which we don't want to convert completely. + */ + if (input->flags & XML_INPUT_PROGRESSIVE) + nbchars = SIZE_MAX; + else + nbchars = 4000 /* MINLEN */; res = xmlCharEncInput(in, &nbchars); if (res < 0) code = in->error; diff --git a/testparser.c b/testparser.c index 6dfb7ea8..561f644f 100644 --- a/testparser.c +++ b/testparser.c @@ -340,6 +340,36 @@ testHugeEncodedChunk(void) { xmlFreeParserCtxt(ctxt); xmlFree(chunk); + /* + * Test the push parser with + * + * - a single call to xmlParseChunk, + * - a non-UTF8 encoding, + * - a chunk larger then MINLEN (4000 bytes). + * + * This verifies that the whole buffer is processed in the initial + * charset conversion. + */ + buf = xmlBufferCreate(); + xmlBufferCat(buf, + BAD_CAST "\n"); + xmlBufferCat(buf, BAD_CAST ""); + /* 20,000 characters */ + for (i = 0; i < 2000; i++) + xmlBufferCat(buf, BAD_CAST "0123456789"); + xmlBufferCat(buf, BAD_CAST ""); + chunk = xmlBufferDetach(buf); + xmlBufferFree(buf); + + ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL); + + xmlParseChunk(ctxt, (char *) chunk, xmlStrlen(chunk), 1); + + err = ctxt->wellFormed ? 0 : 1; + xmlFreeDoc(ctxt->myDoc); + xmlFreeParserCtxt(ctxt); + xmlFree(chunk); + return err; } #endif