1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-26 00:37:43 +03:00

Rework control flow in htmlCurrentChar

Don't call xmlCurrentChar after switching encodings. Rearrange code
blocks and fall through to normal UTF-8 handling.
This commit is contained in:
Nick Wellnhofer
2020-07-15 14:22:08 +02:00
parent 922bebccdd
commit dfd4e33048

View File

@@ -414,6 +414,10 @@ htmlFindEncoding(xmlParserCtxtPtr ctxt) {
static int static int
htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
const unsigned char *cur;
unsigned char c;
unsigned int val;
if (ctxt->instate == XML_PARSER_EOF) if (ctxt->instate == XML_PARSER_EOF)
return(0); return(0);
@@ -421,7 +425,41 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*len = 0; *len = 0;
return(ctxt->token); return(ctxt->token);
} }
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
xmlChar * guess;
xmlCharEncodingHandlerPtr handler;
/*
* Assume it's a fixed length encoding (1) with
* a compatible encoding for the ASCII set, since
* HTML constructs only use < 128 chars
*/
if ((int) *ctxt->input->cur < 0x80) {
*len = 1;
return((int) *ctxt->input->cur);
}
/*
* Humm this is bad, do an automatic flow conversion
*/
guess = htmlFindEncoding(ctxt);
if (guess == NULL) {
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
} else {
if (ctxt->input->encoding != NULL)
xmlFree((xmlChar *) ctxt->input->encoding);
ctxt->input->encoding = guess;
handler = xmlFindCharEncodingHandler((const char *) guess);
if (handler != NULL) {
xmlSwitchToEncoding(ctxt, handler);
} else {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"Unsupported encoding %s", guess, NULL);
}
}
ctxt->charset = XML_CHAR_ENCODING_UTF8;
}
/* /*
* We are supposed to handle UTF8, check it's valid * We are supposed to handle UTF8, check it's valid
* From rfc2044: encoding of the Unicode values on UTF-8: * From rfc2044: encoding of the Unicode values on UTF-8:
@@ -433,10 +471,7 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
* *
* Check for the 0x110000 limit too * Check for the 0x110000 limit too
*/ */
const unsigned char *cur = ctxt->input->cur; cur = ctxt->input->cur;
unsigned char c;
unsigned int val;
c = *cur; c = *cur;
if (c & 0x80) { if (c & 0x80) {
if ((c & 0x40) == 0) if ((c & 0x40) == 0)
@@ -505,42 +540,6 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*len = 1; *len = 1;
return((int) *ctxt->input->cur); return((int) *ctxt->input->cur);
} }
}
/*
* Assume it's a fixed length encoding (1) with
* a compatible encoding for the ASCII set, since
* XML constructs only use < 128 chars
*/
*len = 1;
if ((int) *ctxt->input->cur < 0x80)
return((int) *ctxt->input->cur);
/*
* Humm this is bad, do an automatic flow conversion
*/
{
xmlChar * guess;
xmlCharEncodingHandlerPtr handler;
guess = htmlFindEncoding(ctxt);
if (guess == NULL) {
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
} else {
if (ctxt->input->encoding != NULL)
xmlFree((xmlChar *) ctxt->input->encoding);
ctxt->input->encoding = guess;
handler = xmlFindCharEncodingHandler((const char *) guess);
if (handler != NULL) {
xmlSwitchToEncoding(ctxt, handler);
} else {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"Unsupported encoding %s", guess, NULL);
}
}
ctxt->charset = XML_CHAR_ENCODING_UTF8;
}
return(xmlCurrentChar(ctxt, len));
encoding_error: encoding_error:
/* /*