1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

Print error messages for truncated UTF-8 sequences

Before, truncated UTF-8 sequences at the end of a file were treated as
EOF. Create an error message containing the offending bytes.

xmlStringCurrentChar would also print characters from the input stream,
not the string it's working on.
This commit is contained in:
Nick Wellnhofer
2017-06-10 17:01:27 +02:00
parent fb2f518cc6
commit 79c8a6b105
13 changed files with 66 additions and 27 deletions

View File

@@ -709,16 +709,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
}
return((int) *ctxt->input->cur);
encoding_error:
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle. In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if (ctxt->input->end - ctxt->input->cur < 4) {
*len = 0;
return(0);
}
/*
* If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the
@@ -729,9 +719,21 @@ encoding_error:
{
char buffer[150];
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
if (ctxt->input->cur[1] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
ctxt->input->cur[0]);
} else if (ctxt->input->cur[2] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
ctxt->input->cur[0], ctxt->input->cur[1]);
} else if (ctxt->input->cur[3] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2]);
} else {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
}
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
@@ -821,17 +823,6 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
*len = 1;
return ((int) *cur);
encoding_error:
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle. In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if ((ctxt == NULL) || (ctxt->input == NULL) ||
(ctxt->input->end - ctxt->input->cur < 4)) {
*len = 0;
return(0);
}
/*
* If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the
@@ -842,9 +833,19 @@ encoding_error:
{
char buffer[150];
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
if (cur[1] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
cur[0]);
} else if (cur[2] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
cur[0], cur[1]);
} else if (cur[3] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
cur[0], cur[1], cur[2]);
} else {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
cur[0], cur[1], cur[2], cur[3]);
}
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);