From ee501f5449e14a25115a46e8bedec728ca00a89d Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 13 Oct 2018 15:23:35 +0200 Subject: [PATCH] Stop using doc->charset outside parser code doc->charset does not specify the in-memory encoding which is always UTF-8. --- HTMLtree.c | 38 ++++---------------------------------- c14n.c | 9 --------- include/libxml/tree.h | 2 +- xmlsave.c | 3 --- 4 files changed, 5 insertions(+), 47 deletions(-) diff --git a/HTMLtree.c b/HTMLtree.c index 2fd0c9c5..6a2f43d8 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -570,16 +570,7 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); - if (enc != cur->charset) { - if (cur->charset != XML_CHAR_ENCODING_UTF8) { - /* - * Not supported yet - */ - *mem = NULL; - *size = 0; - return; - } - + if (enc != XML_CHAR_ENCODING_UTF8) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); @@ -1101,14 +1092,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) { xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); - if (enc != cur->charset) { - if (cur->charset != XML_CHAR_ENCODING_UTF8) { - /* - * Not supported yet - */ - return(-1); - } - + if (enc != XML_CHAR_ENCODING_UTF8) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); @@ -1160,14 +1144,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); - if (enc != cur->charset) { - if (cur->charset != XML_CHAR_ENCODING_UTF8) { - /* - * Not supported yet - */ - return(-1); - } - + if (enc != XML_CHAR_ENCODING_UTF8) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); @@ -1221,14 +1198,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); - if (enc != cur->charset) { - if (cur->charset != XML_CHAR_ENCODING_UTF8) { - /* - * Not supported yet - */ - return(-1); - } - + if (enc != XML_CHAR_ENCODING_UTF8) { handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); diff --git a/c14n.c b/c14n.c index c04ce666..d80ae8b7 100644 --- a/c14n.c +++ b/c14n.c @@ -1797,15 +1797,6 @@ xmlC14NNewCtx(xmlDocPtr doc, return (NULL); } - /* - * Validate the XML document encoding value, if provided. - */ - if (doc->charset != XML_CHAR_ENCODING_UTF8) { - xmlC14NErr(ctx, (xmlNodePtr) doc, XML_C14N_REQUIRES_UTF8, - "xmlC14NNewCtx: source document not in UTF8\n"); - return (NULL); - } - /* * Allocate a new xmlC14NCtxPtr and fill the fields. */ diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 4a9b3bc6..626ed6ae 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -575,7 +575,7 @@ struct _xmlDoc { void *ids; /* Hash table for ID attributes if any */ void *refs; /* Hash table for IDREFs attributes if any */ const xmlChar *URL; /* The URI for that document */ - int charset; /* encoding of the in-memory content + int charset; /* Internal flag for charset handling, actually an xmlCharEncoding */ struct _xmlDict *dict; /* dict used to allocate names or NULL */ void *psvi; /* for type/PSVI informations */ diff --git a/xmlsave.c b/xmlsave.c index 6c7418ec..7a05d832 100644 --- a/xmlsave.c +++ b/xmlsave.c @@ -1123,9 +1123,6 @@ xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur) { cur->encoding = BAD_CAST ctxt->encoding; } else if (cur->encoding != NULL) { encoding = cur->encoding; - } else if (cur->charset != XML_CHAR_ENCODING_UTF8) { - encoding = (const xmlChar *) - xmlGetCharEncodingName((xmlCharEncoding) cur->charset); } if (((cur->type == XML_HTML_DOCUMENT_NODE) &&