1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-23 01:52:48 +03:00

encoding: Detect truncated multi-byte sequences with ICU

Unlike iconv or the internal converters, ICU consumes truncated multi-
byte sequences at the end of an input buffer. We currently check for a
non-empty raw input buffer to detect truncated sequences, so this fails
with ICU.

It might be possible to inspect the pivot buffer pointers, but it seems
cleaner to implement a `flush` flag for some encoding and I/O functions.
After flushing, we can check for U_TRUNCATED_CHAR_FOUND with ICU, or
detect remaining input with other converters.

Also fix detection of truncated sequences for HTML, XML content and
DTDs with iconv.
This commit is contained in:
Nick Wellnhofer
2025-03-10 02:18:51 +01:00
parent 76c6ddfef9
commit 69b83bb68e
14 changed files with 287 additions and 133 deletions

View File

@@ -7300,9 +7300,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
while (ctxt->inputNr > oldInputNr)
xmlPopPE(ctxt);
if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
}
xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
}
/**
@@ -9875,8 +9873,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
xmlParseContentInternal(ctxt);
if (ctxt->input->cur < ctxt->input->end)
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
}
/**
@@ -10737,16 +10734,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
*/
xmlParseMisc(ctxt);
if (ctxt->input->cur < ctxt->input->end) {
if (ctxt->wellFormed)
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
} else if ((ctxt->input->buf != NULL) &&
(ctxt->input->buf->encoder != NULL) &&
(ctxt->input->buf->error == 0) &&
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
"Truncated multi-byte sequence at EOF\n");
}
xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
}
ctxt->instate = XML_PARSER_EOF;
@@ -11596,11 +11584,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
"Start tag expected, '<' not found\n");
}
} else if ((ctxt->input->buf->encoder != NULL) &&
(ctxt->input->buf->error == 0) &&
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
"Truncated multi-byte sequence at EOF\n");
} else {
xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
}
if (ctxt->instate != XML_PARSER_EOF) {
ctxt->instate = XML_PARSER_EOF;