mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
Print error messages for truncated UTF-8 sequences
Before, truncated UTF-8 sequences at the end of a file were treated as EOF. Create an error message containing the offending bytes. xmlStringCurrentChar would also print characters from the input stream, not the string it's working on.
This commit is contained in:
@@ -709,16 +709,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
|
||||
}
|
||||
return((int) *ctxt->input->cur);
|
||||
encoding_error:
|
||||
/*
|
||||
* An encoding problem may arise from a truncated input buffer
|
||||
* splitting a character in the middle. In that case do not raise
|
||||
* an error but return 0 to endicate an end of stream problem
|
||||
*/
|
||||
if (ctxt->input->end - ctxt->input->cur < 4) {
|
||||
*len = 0;
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we detect an UTF8 error that probably mean that the
|
||||
* input encoding didn't get properly advertised in the
|
||||
@@ -729,9 +719,21 @@ encoding_error:
|
||||
{
|
||||
char buffer[150];
|
||||
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
|
||||
ctxt->input->cur[0], ctxt->input->cur[1],
|
||||
ctxt->input->cur[2], ctxt->input->cur[3]);
|
||||
if (ctxt->input->cur[1] == 0) {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
|
||||
ctxt->input->cur[0]);
|
||||
} else if (ctxt->input->cur[2] == 0) {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
|
||||
ctxt->input->cur[0], ctxt->input->cur[1]);
|
||||
} else if (ctxt->input->cur[3] == 0) {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
|
||||
ctxt->input->cur[0], ctxt->input->cur[1],
|
||||
ctxt->input->cur[2]);
|
||||
} else {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
|
||||
ctxt->input->cur[0], ctxt->input->cur[1],
|
||||
ctxt->input->cur[2], ctxt->input->cur[3]);
|
||||
}
|
||||
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Input is not proper UTF-8, indicate encoding !\n%s",
|
||||
BAD_CAST buffer, NULL);
|
||||
@@ -821,17 +823,6 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
|
||||
*len = 1;
|
||||
return ((int) *cur);
|
||||
encoding_error:
|
||||
|
||||
/*
|
||||
* An encoding problem may arise from a truncated input buffer
|
||||
* splitting a character in the middle. In that case do not raise
|
||||
* an error but return 0 to endicate an end of stream problem
|
||||
*/
|
||||
if ((ctxt == NULL) || (ctxt->input == NULL) ||
|
||||
(ctxt->input->end - ctxt->input->cur < 4)) {
|
||||
*len = 0;
|
||||
return(0);
|
||||
}
|
||||
/*
|
||||
* If we detect an UTF8 error that probably mean that the
|
||||
* input encoding didn't get properly advertised in the
|
||||
@@ -842,9 +833,19 @@ encoding_error:
|
||||
{
|
||||
char buffer[150];
|
||||
|
||||
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
|
||||
ctxt->input->cur[0], ctxt->input->cur[1],
|
||||
ctxt->input->cur[2], ctxt->input->cur[3]);
|
||||
if (cur[1] == 0) {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
|
||||
cur[0]);
|
||||
} else if (cur[2] == 0) {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
|
||||
cur[0], cur[1]);
|
||||
} else if (cur[3] == 0) {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
|
||||
cur[0], cur[1], cur[2]);
|
||||
} else {
|
||||
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
|
||||
cur[0], cur[1], cur[2], cur[3]);
|
||||
}
|
||||
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Input is not proper UTF-8, indicate encoding !\n%s",
|
||||
BAD_CAST buffer, NULL);
|
||||
|
0
result/errors/partial_utf8_1.xml
Normal file
0
result/errors/partial_utf8_1.xml
Normal file
7
result/errors/partial_utf8_1.xml.err
Normal file
7
result/errors/partial_utf8_1.xml.err
Normal file
@@ -0,0 +1,7 @@
|
||||
./test/errors/partial_utf8_1.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
|
||||
Bytes: 0xC2 EOF
|
||||
<a><3E>
|
||||
^
|
||||
./test/errors/partial_utf8_1.xml:1: parser error : Premature end of data in tag a line 1
|
||||
<a><3E>
|
||||
^
|
4
result/errors/partial_utf8_1.xml.str
Normal file
4
result/errors/partial_utf8_1.xml.str
Normal file
@@ -0,0 +1,4 @@
|
||||
./test/errors/partial_utf8_1.xml:1: parser error : Extra content at the end of the document
|
||||
<a><3E>
|
||||
^
|
||||
./test/errors/partial_utf8_1.xml : failed to parse
|
0
result/errors/partial_utf8_2.xml
Normal file
0
result/errors/partial_utf8_2.xml
Normal file
7
result/errors/partial_utf8_2.xml.err
Normal file
7
result/errors/partial_utf8_2.xml.err
Normal file
@@ -0,0 +1,7 @@
|
||||
./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
|
||||
Bytes: 0xE3 0xA0 EOF
|
||||
<a><3E><>
|
||||
^
|
||||
./test/errors/partial_utf8_2.xml:1: parser error : Premature end of data in tag a line 1
|
||||
<a><3E><>
|
||||
^
|
5
result/errors/partial_utf8_2.xml.str
Normal file
5
result/errors/partial_utf8_2.xml.str
Normal file
@@ -0,0 +1,5 @@
|
||||
./test/errors/partial_utf8_2.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
|
||||
Bytes: 0xE3 0xA0 EOF
|
||||
<a><3E><>
|
||||
^
|
||||
./test/errors/partial_utf8_2.xml : failed to parse
|
0
result/errors/partial_utf8_3.xml
Normal file
0
result/errors/partial_utf8_3.xml
Normal file
7
result/errors/partial_utf8_3.xml.err
Normal file
7
result/errors/partial_utf8_3.xml.err
Normal file
@@ -0,0 +1,7 @@
|
||||
./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
|
||||
Bytes: 0xF2 0xA0 0xA0 EOF
|
||||
<a><3E><><EFBFBD>
|
||||
^
|
||||
./test/errors/partial_utf8_3.xml:1: parser error : Premature end of data in tag a line 1
|
||||
<a><3E><><EFBFBD>
|
||||
^
|
5
result/errors/partial_utf8_3.xml.str
Normal file
5
result/errors/partial_utf8_3.xml.str
Normal file
@@ -0,0 +1,5 @@
|
||||
./test/errors/partial_utf8_3.xml:1: parser error : Input is not proper UTF-8, indicate encoding !
|
||||
Bytes: 0xF2 0xA0 0xA0 EOF
|
||||
<a><3E><><EFBFBD>
|
||||
^
|
||||
./test/errors/partial_utf8_3.xml : failed to parse
|
1
test/errors/partial_utf8_1.xml
Normal file
1
test/errors/partial_utf8_1.xml
Normal file
@@ -0,0 +1 @@
|
||||
<a><EFBFBD>
|
1
test/errors/partial_utf8_2.xml
Normal file
1
test/errors/partial_utf8_2.xml
Normal file
@@ -0,0 +1 @@
|
||||
<a><EFBFBD><EFBFBD>
|
1
test/errors/partial_utf8_3.xml
Normal file
1
test/errors/partial_utf8_3.xml
Normal file
@@ -0,0 +1 @@
|
||||
<a><EFBFBD><EFBFBD><EFBFBD>
|
Reference in New Issue
Block a user