diff --git a/HTMLparser.c b/HTMLparser.c
index a48b2318..07de5b2b 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -372,43 +372,6 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)
xmlParserGrow(ctxt);
- if ((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) {
- xmlChar * guess;
-
- /*
- * Assume it's a fixed length encoding (1) with
- * a compatible encoding for the ASCII set, since
- * HTML constructs only use < 128 chars
- */
- if (*ctxt->input->cur < 0x80) {
- if (*ctxt->input->cur == 0) {
- if (ctxt->input->cur < ctxt->input->end) {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Char 0x%X out of allowed range\n", 0);
- *len = 1;
- return(' ');
- } else {
- *len = 0;
- return(0);
- }
- }
- *len = 1;
- return(*ctxt->input->cur);
- }
-
- /*
- * Humm this is bad, do an automatic flow conversion
- */
- guess = htmlFindEncoding(ctxt);
- if (guess == NULL) {
- xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
- } else {
- xmlSwitchEncodingName(ctxt, (const char *) guess);
- xmlFree(guess);
- }
- ctxt->input->flags |= XML_INPUT_HAS_ENCODING;
- }
-
/*
* We are supposed to handle UTF8, check it's valid
* From rfc2044: encoding of the Unicode values on UTF-8:
@@ -422,9 +385,40 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*/
cur = ctxt->input->cur;
c = *cur;
- if (c & 0x80) {
+ if (c < 0x80) {
+ if (c == 0) {
+ if (ctxt->input->cur < ctxt->input->end) {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x%X out of allowed range\n", 0);
+ *len = 1;
+ return(' ');
+ } else {
+ *len = 0;
+ return(0);
+ }
+ }
+
+ *len = 1;
+ return(c);
+ } else {
size_t avail;
+ if ((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) {
+ xmlChar * guess;
+
+ guess = htmlFindEncoding(ctxt);
+ if (guess == NULL) {
+ xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
+ } else {
+ xmlSwitchEncodingName(ctxt, (const char *) guess);
+ xmlFree(guess);
+ }
+ ctxt->input->flags |= XML_INPUT_HAS_ENCODING;
+
+ cur = ctxt->input->cur;
+ c = *cur;
+ }
+
if ((c & 0x40) == 0)
goto encoding_error;
@@ -469,21 +463,6 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
"Char 0x%X out of allowed range\n", val);
}
return(val);
- } else {
- if (*ctxt->input->cur == 0) {
- if (ctxt->input->cur < ctxt->input->end) {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Char 0x%X out of allowed range\n", 0);
- *len = 1;
- return(' ');
- } else {
- *len = 0;
- return(0);
- }
- }
- /* 1-byte code */
- *len = 1;
- return(*ctxt->input->cur);
}
encoding_error: