mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2026-01-26 21:41:34 +03:00
encoding: Add sizeOut argument to xmlCharEncInput
When push parsing, we want to convert as much of the input as possible. When pull parsing memory buffers, we want to convert data chunk by chunk to save memory.
This commit is contained in:
127
encoding.c
127
encoding.c
@@ -1537,75 +1537,104 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
|
|||||||
/**
|
/**
|
||||||
* xmlCharEncInput:
|
* xmlCharEncInput:
|
||||||
* @input: a parser input buffer
|
* @input: a parser input buffer
|
||||||
|
* @sizeOut: pointer to output size
|
||||||
|
*
|
||||||
|
* @sizeOut should be set to the maximum output size (or SIZE_MAX).
|
||||||
|
* After return, it is set to the number of bytes written.
|
||||||
*
|
*
|
||||||
* Generic front-end for the encoding handler on parser input
|
* Generic front-end for the encoding handler on parser input
|
||||||
*
|
*
|
||||||
* Returns the number of bytes written or an XML_ENC_ERR code.
|
* Returns an XML_ENC_ERR code.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
xmlCharEncInput(xmlParserInputBufferPtr input)
|
xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut)
|
||||||
{
|
{
|
||||||
|
xmlBufPtr out, in;
|
||||||
|
const xmlChar *dataIn;
|
||||||
|
size_t availIn;
|
||||||
|
size_t maxOut;
|
||||||
|
size_t totalIn, totalOut;
|
||||||
int ret;
|
int ret;
|
||||||
size_t avail;
|
|
||||||
size_t toconv;
|
|
||||||
int c_in;
|
|
||||||
int c_out;
|
|
||||||
xmlBufPtr in;
|
|
||||||
xmlBufPtr out;
|
|
||||||
const xmlChar *inData;
|
|
||||||
size_t inTotal = 0;
|
|
||||||
|
|
||||||
if ((input == NULL) || (input->encoder == NULL) ||
|
|
||||||
(input->buffer == NULL) || (input->raw == NULL))
|
|
||||||
return(XML_ENC_ERR_INTERNAL);
|
|
||||||
out = input->buffer;
|
out = input->buffer;
|
||||||
in = input->raw;
|
in = input->raw;
|
||||||
|
|
||||||
toconv = xmlBufUse(in);
|
maxOut = *sizeOut;
|
||||||
if (toconv == 0)
|
totalOut = 0;
|
||||||
return (0);
|
|
||||||
inData = xmlBufContent(in);
|
|
||||||
inTotal = 0;
|
|
||||||
|
|
||||||
do {
|
*sizeOut = 0;
|
||||||
c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
|
|
||||||
|
|
||||||
avail = xmlBufAvail(out);
|
availIn = xmlBufUse(in);
|
||||||
if (avail > INT_MAX)
|
if (availIn == 0)
|
||||||
avail = INT_MAX;
|
return(0);
|
||||||
if (avail < 4096) {
|
dataIn = xmlBufContent(in);
|
||||||
|
totalIn = 0;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
size_t availOut;
|
||||||
|
int completeOut, completeIn;
|
||||||
|
int c_out, c_in;
|
||||||
|
|
||||||
|
availOut = xmlBufAvail(out);
|
||||||
|
if (availOut > INT_MAX / 2)
|
||||||
|
availOut = INT_MAX / 2;
|
||||||
|
|
||||||
|
if (availOut < maxOut) {
|
||||||
|
c_out = availOut;
|
||||||
|
completeOut = 0;
|
||||||
|
} else {
|
||||||
|
c_out = maxOut;
|
||||||
|
completeOut = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (availIn > INT_MAX / 2) {
|
||||||
|
c_in = INT_MAX / 2;
|
||||||
|
completeIn = 0;
|
||||||
|
} else {
|
||||||
|
c_in = availIn;
|
||||||
|
completeIn = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
|
||||||
|
dataIn, &c_in);
|
||||||
|
|
||||||
|
totalIn += c_in;
|
||||||
|
dataIn += c_in;
|
||||||
|
availIn -= c_in;
|
||||||
|
|
||||||
|
totalOut += c_out;
|
||||||
|
maxOut -= c_out;
|
||||||
|
xmlBufAddLen(out, c_out);
|
||||||
|
|
||||||
|
if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE)) {
|
||||||
|
input->error = xmlEncConvertError(ret);
|
||||||
|
return(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((completeOut) && (completeIn))
|
||||||
|
break;
|
||||||
|
if ((completeOut) && (ret == XML_ENC_ERR_SPACE))
|
||||||
|
break;
|
||||||
|
if ((completeIn) && (ret == XML_ENC_ERR_SUCCESS))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (ret == XML_ENC_ERR_SPACE) {
|
||||||
if (xmlBufGrow(out, 4096) < 0) {
|
if (xmlBufGrow(out, 4096) < 0) {
|
||||||
input->error = XML_ERR_NO_MEMORY;
|
input->error = XML_ERR_NO_MEMORY;
|
||||||
return(XML_ENC_ERR_MEMORY);
|
return(XML_ENC_ERR_MEMORY);
|
||||||
}
|
}
|
||||||
avail = xmlBufAvail(out);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c_in = toconv;
|
|
||||||
c_out = avail;
|
|
||||||
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
|
|
||||||
inData, &c_in);
|
|
||||||
inTotal += c_in;
|
|
||||||
inData += c_in;
|
|
||||||
toconv -= c_in;
|
|
||||||
xmlBufAddLen(out, c_out);
|
|
||||||
} while (ret == XML_ENC_ERR_SPACE);
|
|
||||||
|
|
||||||
xmlBufShrink(in, inTotal);
|
|
||||||
|
|
||||||
if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
|
|
||||||
input->rawconsumed = ULONG_MAX;
|
|
||||||
else
|
|
||||||
input->rawconsumed += c_in;
|
|
||||||
|
|
||||||
if (((ret != 0) && (c_out == 0)) ||
|
|
||||||
(ret == XML_ENC_ERR_MEMORY)) {
|
|
||||||
if (input->error == 0)
|
|
||||||
input->error = xmlEncConvertError(ret);
|
|
||||||
return(ret);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (c_out);
|
xmlBufShrink(in, totalIn);
|
||||||
|
|
||||||
|
if (input->rawconsumed > ULONG_MAX - (unsigned long) totalIn)
|
||||||
|
input->rawconsumed = ULONG_MAX;
|
||||||
|
else
|
||||||
|
input->rawconsumed += totalIn;
|
||||||
|
|
||||||
|
*sizeOut = totalOut;
|
||||||
|
return(XML_ERR_OK);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ XML_HIDDEN int
|
|||||||
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
||||||
int *outlen, const unsigned char *in, int *inlen);
|
int *outlen, const unsigned char *in, int *inlen);
|
||||||
XML_HIDDEN int
|
XML_HIDDEN int
|
||||||
xmlCharEncInput(xmlParserInputBufferPtr input);
|
xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut);
|
||||||
XML_HIDDEN int
|
XML_HIDDEN int
|
||||||
xmlCharEncOutput(xmlOutputBufferPtr output, int init);
|
xmlCharEncOutput(xmlOutputBufferPtr output, int init);
|
||||||
|
|
||||||
|
|||||||
41
parser.c
41
parser.c
@@ -11561,14 +11561,18 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
int terminate) {
|
int terminate) {
|
||||||
size_t curBase;
|
size_t curBase;
|
||||||
size_t maxLength;
|
size_t maxLength;
|
||||||
|
size_t pos;
|
||||||
int end_in_lf = 0;
|
int end_in_lf = 0;
|
||||||
|
int res;
|
||||||
|
|
||||||
if ((ctxt == NULL) || (size < 0))
|
if ((ctxt == NULL) || (size < 0))
|
||||||
return(XML_ERR_ARGUMENT);
|
return(XML_ERR_ARGUMENT);
|
||||||
|
if ((chunk == NULL) && (size > 0))
|
||||||
|
return(XML_ERR_ARGUMENT);
|
||||||
|
if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
|
||||||
|
return(XML_ERR_ARGUMENT);
|
||||||
if (ctxt->disableSAX != 0)
|
if (ctxt->disableSAX != 0)
|
||||||
return(ctxt->errNo);
|
return(ctxt->errNo);
|
||||||
if (ctxt->input == NULL)
|
|
||||||
return(XML_ERR_INTERNAL_ERROR);
|
|
||||||
|
|
||||||
ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
|
ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
|
||||||
if (ctxt->instate == XML_PARSER_START)
|
if (ctxt->instate == XML_PARSER_START)
|
||||||
@@ -11579,18 +11583,17 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
/*
|
||||||
(ctxt->input->buf != NULL)) {
|
* Also push an empty chunk to make sure that the raw buffer
|
||||||
size_t pos = ctxt->input->cur - ctxt->input->base;
|
* will be flushed if there is an encoder.
|
||||||
int res;
|
*/
|
||||||
|
pos = ctxt->input->cur - ctxt->input->base;
|
||||||
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
|
||||||
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
|
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
|
xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
|
||||||
xmlHaltParser(ctxt);
|
xmlHaltParser(ctxt);
|
||||||
return(ctxt->errNo);
|
return(ctxt->errNo);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlParseTryOrFinish(ctxt, terminate);
|
xmlParseTryOrFinish(ctxt, terminate);
|
||||||
@@ -11608,11 +11611,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
|
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
|
||||||
return(ctxt->errNo);
|
return(ctxt->errNo);
|
||||||
|
|
||||||
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
|
if (end_in_lf == 1) {
|
||||||
(ctxt->input->buf != NULL)) {
|
pos = ctxt->input->cur - ctxt->input->base;
|
||||||
size_t pos = ctxt->input->cur - ctxt->input->base;
|
|
||||||
int res;
|
|
||||||
|
|
||||||
res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
|
res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
|
||||||
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
|
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
@@ -11639,8 +11639,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
||||||
"Start tag expected, '<' not found\n");
|
"Start tag expected, '<' not found\n");
|
||||||
}
|
}
|
||||||
} else if ((ctxt->input->buf != NULL) &&
|
} else if ((ctxt->input->buf->encoder != NULL) &&
|
||||||
(ctxt->input->buf->encoder != NULL) &&
|
|
||||||
(ctxt->input->buf->error == 0) &&
|
(ctxt->input->buf->error == 0) &&
|
||||||
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
|
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
|
||||||
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
|
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
|
||||||
|
|||||||
@@ -1273,7 +1273,6 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
|||||||
xmlCharEncodingHandlerPtr handler) {
|
xmlCharEncodingHandlerPtr handler) {
|
||||||
xmlParserInputBufferPtr in;
|
xmlParserInputBufferPtr in;
|
||||||
xmlBufPtr buf;
|
xmlBufPtr buf;
|
||||||
int nbchars;
|
|
||||||
int code = XML_ERR_OK;
|
int code = XML_ERR_OK;
|
||||||
|
|
||||||
if ((input == NULL) || (input->buf == NULL)) {
|
if ((input == NULL) || (input->buf == NULL)) {
|
||||||
@@ -1326,6 +1325,8 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
|||||||
*/
|
*/
|
||||||
if (input->end > input->base) {
|
if (input->end > input->base) {
|
||||||
size_t processed;
|
size_t processed;
|
||||||
|
size_t nbchars;
|
||||||
|
int res;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Shrink the current input buffer.
|
* Shrink the current input buffer.
|
||||||
@@ -1336,8 +1337,9 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
|||||||
input->consumed += processed;
|
input->consumed += processed;
|
||||||
in->rawconsumed = processed;
|
in->rawconsumed = processed;
|
||||||
|
|
||||||
nbchars = xmlCharEncInput(in);
|
nbchars = 4000 /* MINLEN */;
|
||||||
if (nbchars < 0)
|
res = xmlCharEncInput(in, &nbchars);
|
||||||
|
if (res < 0)
|
||||||
code = in->error;
|
code = in->error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
28
xmlIO.c
28
xmlIO.c
@@ -48,6 +48,10 @@
|
|||||||
#include "private/error.h"
|
#include "private/error.h"
|
||||||
#include "private/io.h"
|
#include "private/io.h"
|
||||||
|
|
||||||
|
#ifndef SIZE_MAX
|
||||||
|
#define SIZE_MAX ((size_t) -1)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* #define VERBOSE_FAILURE */
|
/* #define VERBOSE_FAILURE */
|
||||||
|
|
||||||
#define MINLEN 4000
|
#define MINLEN 4000
|
||||||
@@ -2105,7 +2109,7 @@ xmlOutputBufferCreateFilenameDefault(xmlOutputBufferCreateFilenameFunc func)
|
|||||||
int
|
int
|
||||||
xmlParserInputBufferPush(xmlParserInputBufferPtr in,
|
xmlParserInputBufferPush(xmlParserInputBufferPtr in,
|
||||||
int len, const char *buf) {
|
int len, const char *buf) {
|
||||||
int nbchars = 0;
|
size_t nbchars = 0;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (len < 0) return(0);
|
if (len < 0) return(0);
|
||||||
@@ -2130,9 +2134,11 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in,
|
|||||||
/*
|
/*
|
||||||
* convert as much as possible to the parser reading buffer.
|
* convert as much as possible to the parser reading buffer.
|
||||||
*/
|
*/
|
||||||
nbchars = xmlCharEncInput(in);
|
nbchars = SIZE_MAX;
|
||||||
if (nbchars < 0)
|
if (xmlCharEncInput(in, &nbchars) < 0)
|
||||||
return(-1);
|
return(-1);
|
||||||
|
if (nbchars > INT_MAX)
|
||||||
|
nbchars = INT_MAX;
|
||||||
} else {
|
} else {
|
||||||
nbchars = len;
|
nbchars = len;
|
||||||
ret = xmlBufAdd(in->buffer, (xmlChar *) buf, nbchars);
|
ret = xmlBufAdd(in->buffer, (xmlChar *) buf, nbchars);
|
||||||
@@ -2229,9 +2235,19 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (in->encoder != NULL) {
|
if (in->encoder != NULL) {
|
||||||
res = xmlCharEncInput(in);
|
size_t sizeOut;
|
||||||
if (res < 0)
|
|
||||||
|
/*
|
||||||
|
* Don't convert whole buffer when reading from memory.
|
||||||
|
*/
|
||||||
|
if (in->readcallback == NULL)
|
||||||
|
sizeOut = len;
|
||||||
|
else
|
||||||
|
sizeOut = SIZE_MAX;
|
||||||
|
|
||||||
|
if (xmlCharEncInput(in, &sizeOut) < 0)
|
||||||
return(-1);
|
return(-1);
|
||||||
|
res = sizeOut;
|
||||||
}
|
}
|
||||||
return(res);
|
return(res);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user