mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-08 23:22:04 +03:00
html: Handle incomplete UTF-8 when push-parsing
For now, incomplete UTF-8 is always an error in push mode. Eventually, we could pass chunked data to the character handler when push-parsing. Then we'd have to handle incomplete sequences.
This commit is contained in:
16
HTMLparser.c
16
HTMLparser.c
@ -379,7 +379,8 @@ htmlMaskMatch(htmlAsciiMask mask, unsigned c) {
|
||||
}
|
||||
|
||||
static int
|
||||
htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len) {
|
||||
htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len,
|
||||
int partial) {
|
||||
unsigned c = str[0];
|
||||
int size;
|
||||
|
||||
@ -424,7 +425,8 @@ htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len) {
|
||||
return(size);
|
||||
|
||||
incomplete:
|
||||
return(0);
|
||||
if (partial)
|
||||
return(0);
|
||||
|
||||
invalid:
|
||||
/* Only report the first error */
|
||||
@ -2424,7 +2426,7 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) {
|
||||
buf[nbchar++] = c;
|
||||
}
|
||||
} else {
|
||||
size = htmlValidateUtf8(ctxt, in, avail);
|
||||
size = htmlValidateUtf8(ctxt, in, avail, /* partial */ 0);
|
||||
|
||||
if (size > 0) {
|
||||
if (nbchar + size <= HTML_PARSER_BUFFER_SIZE) {
|
||||
@ -2811,7 +2813,7 @@ htmlParseData(htmlParserCtxtPtr ctxt, htmlAsciiMask mask,
|
||||
goto restart;
|
||||
}
|
||||
|
||||
size = htmlValidateUtf8(ctxt, in, avail);
|
||||
size = htmlValidateUtf8(ctxt, in, avail, /* partial */ 0);
|
||||
|
||||
if (size <= 0) {
|
||||
skip = 1;
|
||||
@ -3260,7 +3262,11 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
|
||||
goto restart;
|
||||
}
|
||||
|
||||
size = htmlValidateUtf8(ctxt, in, avail);
|
||||
/*
|
||||
* We should handle partial data to allow the push
|
||||
* parser to pass incomplete chunks.
|
||||
*/
|
||||
size = htmlValidateUtf8(ctxt, in, avail, /* partial */ 0);
|
||||
|
||||
if (size <= 0) {
|
||||
skip = 1;
|
||||
|
Reference in New Issue
Block a user