mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-21 14:53:44 +03:00
encoding: Detect truncated multi-byte sequences with ICU
Unlike iconv or the internal converters, ICU consumes truncated multi- byte sequences at the end of an input buffer. We currently check for a non-empty raw input buffer to detect truncated sequences, so this fails with ICU. It might be possible to inspect the pivot buffer pointers, but it seems cleaner to implement a `flush` flag for some encoding and I/O functions. After flushing, we can check for U_TRUNCATED_CHAR_FOUND with ICU, or detect remaining input with other converters. Also fix detection of truncated sequences for HTML, XML content and DTDs with iconv.
This commit is contained in:
25
HTMLparser.c
25
HTMLparser.c
@@ -4385,6 +4385,11 @@ htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
|
||||
|
||||
htmlParseContent(ctxt);
|
||||
|
||||
/*
|
||||
* Only check for truncated multi-byte sequences
|
||||
*/
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_INTERNAL_ERROR);
|
||||
|
||||
/* TODO: Use xmlCtxtIsCatastrophicError */
|
||||
if (ctxt->errNo != XML_ERR_NO_MEMORY) {
|
||||
xmlNodePtr cur;
|
||||
@@ -4509,11 +4514,9 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
htmlParseContent(ctxt);
|
||||
|
||||
/*
|
||||
* autoclose
|
||||
* Only check for truncated multi-byte sequences
|
||||
*/
|
||||
if (CUR == 0)
|
||||
htmlAutoCloseOnEnd(ctxt);
|
||||
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_INTERNAL_ERROR);
|
||||
|
||||
/*
|
||||
* SAX: end of the document processing.
|
||||
@@ -5237,12 +5240,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
int
|
||||
htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
||||
int terminate) {
|
||||
if ((ctxt == NULL) || (ctxt->input == NULL))
|
||||
if ((ctxt == NULL) ||
|
||||
(ctxt->input == NULL) || (ctxt->input->buf == NULL) ||
|
||||
(size < 0) ||
|
||||
((size > 0) && (chunk == NULL)))
|
||||
return(XML_ERR_ARGUMENT);
|
||||
if (PARSER_STOPPED(ctxt) != 0)
|
||||
return(ctxt->errNo);
|
||||
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
||||
(ctxt->input->buf != NULL)) {
|
||||
|
||||
if (size > 0) {
|
||||
size_t pos = ctxt->input->cur - ctxt->input->base;
|
||||
int res;
|
||||
|
||||
@@ -5261,6 +5267,11 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
||||
if ((terminate) && (ctxt->instate != XML_PARSER_EOF)) {
|
||||
htmlAutoCloseOnEnd(ctxt);
|
||||
|
||||
/*
|
||||
* Only check for truncated multi-byte sequences
|
||||
*/
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_INTERNAL_ERROR);
|
||||
|
||||
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
|
||||
ctxt->sax->endDocument(ctxt->userData);
|
||||
|
||||
|
Reference in New Issue
Block a user