mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-23 01:52:48 +03:00
encoding: Detect truncated multi-byte sequences with ICU
Unlike iconv or the internal converters, ICU consumes truncated multi- byte sequences at the end of an input buffer. We currently check for a non-empty raw input buffer to detect truncated sequences, so this fails with ICU. It might be possible to inspect the pivot buffer pointers, but it seems cleaner to implement a `flush` flag for some encoding and I/O functions. After flushing, we can check for U_TRUNCATED_CHAR_FOUND with ICU, or detect remaining input with other converters. Also fix detection of truncated sequences for HTML, XML content and DTDs with iconv.
This commit is contained in:
25
parser.c
25
parser.c
@@ -7300,9 +7300,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
|
||||
while (ctxt->inputNr > oldInputNr)
|
||||
xmlPopPE(ctxt);
|
||||
|
||||
if (RAW != 0) {
|
||||
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
|
||||
}
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -9875,8 +9873,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
|
||||
|
||||
xmlParseContentInternal(ctxt);
|
||||
|
||||
if (ctxt->input->cur < ctxt->input->end)
|
||||
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -10737,16 +10734,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
||||
*/
|
||||
xmlParseMisc(ctxt);
|
||||
|
||||
if (ctxt->input->cur < ctxt->input->end) {
|
||||
if (ctxt->wellFormed)
|
||||
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
|
||||
} else if ((ctxt->input->buf != NULL) &&
|
||||
(ctxt->input->buf->encoder != NULL) &&
|
||||
(ctxt->input->buf->error == 0) &&
|
||||
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
|
||||
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Truncated multi-byte sequence at EOF\n");
|
||||
}
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
|
||||
}
|
||||
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
@@ -11596,11 +11584,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
||||
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
||||
"Start tag expected, '<' not found\n");
|
||||
}
|
||||
} else if ((ctxt->input->buf->encoder != NULL) &&
|
||||
(ctxt->input->buf->error == 0) &&
|
||||
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
|
||||
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Truncated multi-byte sequence at EOF\n");
|
||||
} else {
|
||||
xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
|
||||
}
|
||||
if (ctxt->instate != XML_PARSER_EOF) {
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
|
Reference in New Issue
Block a user