From 5b893fa9994295c6e2ced909d84c7f562f861c0e Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 22 Jun 2024 19:15:17 +0200 Subject: [PATCH] encoding: Fix encoding lookup with xmlOpenCharEncodingHandler Make xmlOpenCharEncodingHandler call xmlParseCharEncoding first so we prefer our own handlers for names like "UTF8". Only UTF-16 needs an exception. Make callers check the return value. For UTF-8, a NULL encoding doesn't mean an error. Remove unnecessary UTF-8 check from htmlFindOutputEncoder. Don't try to look up ASCII handler since the HTML handler is always available. Fix return code of xmlParseCharEncoding. Should fix #744. --- HTMLtree.c | 19 +++++++------------ encoding.c | 29 ++++++++++++++++------------- xmlsave.c | 18 +++++++++++------- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/HTMLtree.c b/HTMLtree.c index 6e8baf488..8fa022c11 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -385,22 +385,17 @@ htmlFindOutputEncoder(const char *encoding) { xmlCharEncodingHandler *handler = NULL; if (encoding != NULL) { - xmlCharEncoding enc; + int res; - enc = xmlParseCharEncoding(encoding); - if (enc != XML_CHAR_ENCODING_UTF8) { - xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); - if (handler == NULL) - htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); - } + res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, + &handler); + if (res != XML_ERR_OK) + htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); } else { /* - * Fallback to HTML or ASCII when the encoding is unspecified + * Fallback to HTML when the encoding is unspecified */ - if (handler == NULL) - xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler); - if (handler == NULL) - xmlOpenCharEncodingHandler("ascii", /* output */ 1, &handler); + xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler); } return(handler); diff --git a/encoding.c b/encoding.c index edfa1c457..de14c596a 100644 --- a/encoding.c +++ b/encoding.c @@ -1161,7 +1161,7 @@ xmlParseCharEncoding(const char* name) if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); - return(XML_CHAR_ENCODING_ERROR); + return(XML_CHAR_ENCODING_NONE); } /** @@ -1931,9 +1931,7 @@ int xmlOpenCharEncodingHandler(const char *name, int output, xmlCharEncodingHandler **out) { const char *nalias; - const char *norig; xmlCharEncoding enc; - int ret; if (out == NULL) return(XML_ERR_ARGUMENT); @@ -1945,22 +1943,27 @@ xmlOpenCharEncodingHandler(const char *name, int output, /* * Do the alias resolution */ - norig = name; nalias = xmlGetEncodingAlias(name); if (nalias != NULL) name = nalias; - ret = xmlFindHandler(name, output, out); - if (*out != NULL) - return(0); - if (ret != XML_ERR_UNSUPPORTED_ENCODING) - return(ret); - /* - * Fallback using the canonical names + * UTF-16 needs the built-in handler which is only available via + * xmlFindHandler. */ - enc = xmlParseCharEncoding(norig); - return(xmlLookupCharEncodingHandler(enc, out)); + if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF16") == 0) { + name = "UTF-16"; + } else if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-16") != 0) { + enc = xmlParseCharEncoding(name); + if (enc != XML_CHAR_ENCODING_NONE) { + int res = xmlLookupCharEncodingHandler(enc, out); + + if (res != XML_ERR_UNSUPPORTED_ENCODING) + return(res); + } + } + + return(xmlFindHandler(name, output, out)); } /** diff --git a/xmlsave.c b/xmlsave.c index 5bd3445e6..8e30229d9 100644 --- a/xmlsave.c +++ b/xmlsave.c @@ -343,7 +343,7 @@ xmlNewSaveCtxt(const char *encoding, int options) res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &ret->handler); - if (ret->handler == NULL) { + if (res != XML_ERR_OK) { xmlSaveErr(NULL, res, NULL, encoding); xmlFreeSaveCtxt(ret); return(NULL); @@ -801,7 +801,7 @@ static int xmlSaveSwitchEncoding(xmlSaveCtxtPtr ctxt, const char *encoding) { int res; res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); - if (handler == NULL) { + if (res != XML_ERR_OK) { xmlSaveErr(buf, res, NULL, encoding); return(-1); } @@ -2669,7 +2669,7 @@ xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr, res = xmlOpenCharEncodingHandler(txt_encoding, /* output */ 1, &conv_hdlr); - if (conv_hdlr == NULL) { + if (res != XML_ERR_OK) { xmlSaveErr(NULL, res, NULL, txt_encoding); return; } @@ -2784,8 +2784,10 @@ xmlDocFormatDump(FILE *f, xmlDocPtr cur, int format) { encoding = (const char *) cur->encoding; if (encoding != NULL) { - xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); - if (handler == NULL) { + int res; + + res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); + if (res != XML_ERR_OK) { xmlFree((char *) cur->encoding); cur->encoding = NULL; encoding = NULL; @@ -2921,8 +2923,10 @@ xmlSaveFormatFileEnc( const char * filename, xmlDocPtr cur, encoding = (const char *) cur->encoding; if (encoding != NULL) { - xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); - if (handler == NULL) + int res; + + res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); + if (res != XML_ERR_OK) return(-1); }