1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

Print error messages for truncated UTF-8 sequences

Before, truncated UTF-8 sequences at the end of a file were treated as
EOF. Create an error message containing the offending bytes.

xmlStringCurrentChar would also print characters from the input stream,
not the string it's working on.
This commit is contained in:
Nick Wellnhofer
2017-06-10 17:01:27 +02:00
parent fb2f518cc6
commit 79c8a6b105
13 changed files with 66 additions and 27 deletions

View File

@@ -709,16 +709,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
} }
return((int) *ctxt->input->cur); return((int) *ctxt->input->cur);
encoding_error: encoding_error:
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle. In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if (ctxt->input->end - ctxt->input->cur < 4) {
*len = 0;
return(0);
}
/* /*
* If we detect an UTF8 error that probably mean that the * If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the * input encoding didn't get properly advertised in the
@@ -729,9 +719,21 @@ encoding_error:
{ {
char buffer[150]; char buffer[150];
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", if (ctxt->input->cur[1] == 0) {
ctxt->input->cur[0], ctxt->input->cur[1], snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
ctxt->input->cur[2], ctxt->input->cur[3]); ctxt->input->cur[0]);
} else if (ctxt->input->cur[2] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
ctxt->input->cur[0], ctxt->input->cur[1]);
} else if (ctxt->input->cur[3] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2]);
} else {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
ctxt->input->cur[0], ctxt->input->cur[1],
ctxt->input->cur[2], ctxt->input->cur[3]);
}
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s", "Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL); BAD_CAST buffer, NULL);
@@ -821,17 +823,6 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
*len = 1; *len = 1;
return ((int) *cur); return ((int) *cur);
encoding_error: encoding_error:
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle. In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if ((ctxt == NULL) || (ctxt->input == NULL) ||
(ctxt->input->end - ctxt->input->cur < 4)) {
*len = 0;
return(0);
}
/* /*
* If we detect an UTF8 error that probably mean that the * If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the * input encoding didn't get properly advertised in the
@@ -842,9 +833,19 @@ encoding_error:
{ {
char buffer[150]; char buffer[150];
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", if (cur[1] == 0) {
ctxt->input->cur[0], ctxt->input->cur[1], snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
ctxt->input->cur[2], ctxt->input->cur[3]); cur[0]);
} else if (cur[2] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
cur[0], cur[1]);
} else if (cur[3] == 0) {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
cur[0], cur[1], cur[2]);
} else {
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
cur[0], cur[1], cur[2], cur[3]);
}
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n%s", "Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL); BAD_CAST buffer, NULL);

View File

View File

@@ -0,0 +1,7 @@
./test/errors/partial_utf8_1.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xC2 EOF
<a><3E>
^
./test/errors/partial_utf8_1.xml:1: parser error : Premature end of data in tag a line 1
<a><3E>
^

View File

@@ -0,0 +1,4 @@
./test/errors/partial_utf8_1.xml:1: parser error : Extra content at the end of the document
<a><3E>
^
./test/errors/partial_utf8_1.xml : failed to parse

View File

View File

@@ -0,0 +1,7 @@
./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE3 0xA0 EOF
<a><3E><>
^
./test/errors/partial_utf8_2.xml:1: parser error : Premature end of data in tag a line 1
<a><3E><>
^

View File

@@ -0,0 +1,5 @@
./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xE3 0xA0 EOF
<a><3E><>
^
./test/errors/partial_utf8_2.xml : failed to parse

View File

View File

@@ -0,0 +1,7 @@
./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xF2 0xA0 0xA0 EOF
<a><3E><><EFBFBD>
^
./test/errors/partial_utf8_3.xml:1: parser error : Premature end of data in tag a line 1
<a><3E><><EFBFBD>
^

View File

@@ -0,0 +1,5 @@
./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
Bytes: 0xF2 0xA0 0xA0 EOF
<a><3E><><EFBFBD>
^
./test/errors/partial_utf8_3.xml : failed to parse

View File

@@ -0,0 +1 @@
<a><EFBFBD>

View File

@@ -0,0 +1 @@
<a><EFBFBD><EFBFBD>

View File

@@ -0,0 +1 @@
<a><EFBFBD><EFBFBD><EFBFBD>