1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

Fix UTF-8 decoder in HTML parser

Reject sequences starting with a continuation byte as well as overlong
sequences like the XML parser.

Also fixes an infinite loop in connection with previous commit 50078922
since htmlCurrentChar would return 0 even if not at the end of the
buffer.

Found by OSS-Fuzz.
This commit is contained in:
Nick Wellnhofer
2020-07-15 12:54:25 +02:00
parent beb7d71a8f
commit 1493130ef2

View File

@@ -439,6 +439,8 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
c = *cur; c = *cur;
if (c & 0x80) { if (c & 0x80) {
if ((c & 0x40) == 0)
goto encoding_error;
if (cur[1] == 0) { if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur; cur = ctxt->input->cur;
@@ -467,18 +469,24 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
val |= (cur[1] & 0x3f) << 12; val |= (cur[1] & 0x3f) << 12;
val |= (cur[2] & 0x3f) << 6; val |= (cur[2] & 0x3f) << 6;
val |= cur[3] & 0x3f; val |= cur[3] & 0x3f;
if (val < 0x10000)
goto encoding_error;
} else { } else {
/* 3-byte code */ /* 3-byte code */
*len = 3; *len = 3;
val = (cur[0] & 0xf) << 12; val = (cur[0] & 0xf) << 12;
val |= (cur[1] & 0x3f) << 6; val |= (cur[1] & 0x3f) << 6;
val |= cur[2] & 0x3f; val |= cur[2] & 0x3f;
if (val < 0x800)
goto encoding_error;
} }
} else { } else {
/* 2-byte code */ /* 2-byte code */
*len = 2; *len = 2;
val = (cur[0] & 0x1f) << 6; val = (cur[0] & 0x1f) << 6;
val |= cur[1] & 0x3f; val |= cur[1] & 0x3f;
if (val < 0x80)
goto encoding_error;
} }
if (!IS_CHAR(val)) { if (!IS_CHAR(val)) {
htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,