1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2026-01-26 21:41:34 +03:00
Files
libxml2/include/private/html.h
Nick Wellnhofer 46f05ea4d5 html: Rework meta charset handling
Don't use encoding from meta tags when serializing. Only use the value
in `doc->encoding`, matching the XML serializer. This is the actual
encoding used when parsing.

Stop modifying the input document by setting meta tags before
serializing. Meta tags are now injected during serialization.

Add full support for <meta charset=""> which is also used when adding
meta tags.

Align with HTML5 and implement the "algorithm for extracting a character
encoding from a meta element". Only modify the encoding substring in
Content-Type meta tags.

Only switch encoding once when parsing.

Fix htmlSaveFileFormat with a NULL encoding not to declare a misleading
UTF-8 charset.

Fixes #909.
2025-05-11 20:29:25 +02:00

32 lines
727 B
C

#ifndef XML_HTML_H_PRIVATE__
#define XML_HTML_H_PRIVATE__
#include <libxml/xmlversion.h>
#ifdef LIBXML_HTML_ENABLED
#define IS_WS_HTML(c) \
(((c) == 0x20) || \
(((c) >= 0x09) && ((c) <= 0x0D) && ((c) != 0x0B)))
typedef struct {
size_t start;
size_t end;
size_t size;
} htmlMetaEncodingOffsets;
XML_HIDDEN xmlNodePtr
htmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input);
XML_HIDDEN int
htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off);
XML_HIDDEN void
htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
const char *encoding, int format);
#endif /* LIBXML_HTML_ENABLED */
#endif /* XML_HTML_H_PRIVATE__ */