From b1a416bf525e57ed77d3dbb2bf08e01e442cec7f Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 27 Jun 2024 12:00:45 +0200 Subject: [PATCH] encoding: Restore old lookup order in xmlOpenCharEncodingHandler When looking up encodings with xmlLookupCharEncodingHandler, the returned handler can have a different name than requested (capitalization, internal aliases). This should eventually be fixed. For now we revert part of commit 5b893fa9, start the lookup with xmlFindHandler and add an explicit check for UTF-8. Should fix the encoding name issue mentioned in #749. --- encoding.c | 36 ++++++++++++++++++++---------------- testparser.c | 3 ++- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/encoding.c b/encoding.c index d2024d6e..73752d04 100644 --- a/encoding.c +++ b/encoding.c @@ -1161,7 +1161,7 @@ xmlParseCharEncoding(const char* name) if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); - return(XML_CHAR_ENCODING_NONE); + return(XML_CHAR_ENCODING_ERROR); } /** @@ -1936,7 +1936,9 @@ int xmlOpenCharEncodingHandler(const char *name, int output, xmlCharEncodingHandler **out) { const char *nalias; + const char *norig; xmlCharEncoding enc; + int ret; if (out == NULL) return(XML_ERR_ARGUMENT); @@ -1945,30 +1947,32 @@ xmlOpenCharEncodingHandler(const char *name, int output, if (name == NULL) return(XML_ERR_ARGUMENT); + if ((xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-8") == 0) || + (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF8") == 0)) + return(XML_ERR_OK); + /* * Do the alias resolution */ + norig = name; nalias = xmlGetEncodingAlias(name); if (nalias != NULL) name = nalias; + ret = xmlFindHandler(name, output, out); + if (*out != NULL) + return(0); + if (ret != XML_ERR_UNSUPPORTED_ENCODING) + return(ret); + /* - * UTF-16 needs the built-in handler which is only available via - * xmlFindHandler. + * Fallback using the canonical names + * + * TODO: We should make sure that the name of the returned + * handler equals norig. */ - if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF16") == 0) { - name = "UTF-16"; - } else if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-16") != 0) { - enc = xmlParseCharEncoding(name); - if (enc != XML_CHAR_ENCODING_NONE) { - int res = xmlLookupCharEncodingHandler(enc, out); - - if (res != XML_ERR_UNSUPPORTED_ENCODING) - return(res); - } - } - - return(xmlFindHandler(name, output, out)); + enc = xmlParseCharEncoding(norig); + return(xmlLookupCharEncodingHandler(enc, out)); } /** diff --git a/testparser.c b/testparser.c index 68ab9444..cc7fef0b 100644 --- a/testparser.c +++ b/testparser.c @@ -50,7 +50,8 @@ testUnsupportedEncoding(void) { xmlFreeDoc(doc); error = xmlGetLastError(); - if (error->code != XML_ERR_UNSUPPORTED_ENCODING || + if (error == NULL || + error->code != XML_ERR_UNSUPPORTED_ENCODING || error->level != XML_ERR_WARNING || strcmp(error->message, "Unsupported encoding: #unsupported\n") != 0) {