From 8a103793f2cf3686112e6a3f2d17ac1987573c29 Mon Sep 17 00:00:00 2001 From: Adiel Mittmann Date: Tue, 25 Aug 2009 11:27:13 +0200 Subject: [PATCH] Non ASCII character may be split at buffer end * HTMLparser.c: make sure when we call xmlParserInputGrow in htmlCurrentChar, to reset the current pointer --- HTMLparser.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index c64590e0..d06724c7 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -384,19 +384,25 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { c = *cur; if (c & 0x80) { - if (cur[1] == 0) + if (cur[1] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { - if (cur[2] == 0) + if (cur[2] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { - if (cur[3] == 0) + if (cur[3] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error;