1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-31 21:50:33 +03:00

html: Call lower-level escaping functions

Removes the need to pass a document around.
This commit is contained in:
Nick Wellnhofer
2025-05-09 20:26:33 +02:00
parent 63535d3922
commit 971038e59f
5 changed files with 40 additions and 36 deletions

View File

@@ -24,10 +24,12 @@
#include <libxml/uri.h> #include <libxml/uri.h>
#include "private/buf.h" #include "private/buf.h"
#include "private/entities.h"
#include "private/error.h" #include "private/error.h"
#include "private/html.h" #include "private/html.h"
#include "private/io.h" #include "private/io.h"
#include "private/save.h" #include "private/save.h"
#include "private/tree.h"
/************************************************************************ /************************************************************************
* * * *
@@ -404,14 +406,14 @@ htmlFindOutputEncoder(const char *encoding, xmlCharEncodingHandler **out) {
* Serialize an HTML document to an xmlBuf. * Serialize an HTML document to an xmlBuf.
* *
* @param buf the xmlBufPtr output * @param buf the xmlBufPtr output
* @param doc the document * @param doc the document (unused)
* @param cur the current node * @param cur the current node
* @param format should formatting newlines been added * @param format should formatting newlines been added
* @returns the number of bytes written or -1 in case of error * @returns the number of bytes written or -1 in case of error
*/ */
static size_t static size_t
htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc ATTRIBUTE_UNUSED,
int format) { xmlNodePtr cur, int format) {
size_t use; size_t use;
size_t ret; size_t ret;
xmlOutputBufferPtr outbuf; xmlOutputBufferPtr outbuf;
@@ -434,7 +436,7 @@ htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
outbuf->written = 0; outbuf->written = 0;
use = xmlBufUse(buf); use = xmlBufUse(buf);
htmlNodeDumpInternal(outbuf, doc, cur, NULL, format); htmlNodeDumpInternal(outbuf, cur, NULL, format);
if (outbuf->error) if (outbuf->error)
ret = (size_t) -1; ret = (size_t) -1;
else else
@@ -482,14 +484,14 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
* changed in a future version. * changed in a future version.
* *
* @param out the FILE pointer * @param out the FILE pointer
* @param doc the document * @param doc the document (unused)
* @param cur the current node * @param cur the current node
* @param encoding the document encoding (optional) * @param encoding the document encoding (optional)
* @param format should formatting newlines been added * @param format should formatting newlines been added
* @returns the number of bytes written or -1 in case of failure. * @returns the number of bytes written or -1 in case of failure.
*/ */
int int
htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc ATTRIBUTE_UNUSED,
xmlNodePtr cur, const char *encoding, int format) { xmlNodePtr cur, const char *encoding, int format) {
xmlOutputBufferPtr buf; xmlOutputBufferPtr buf;
xmlCharEncodingHandlerPtr handler; xmlCharEncodingHandlerPtr handler;
@@ -506,7 +508,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
if (buf == NULL) if (buf == NULL)
return(-1); return(-1);
htmlNodeDumpInternal(buf, doc, cur, NULL, format); htmlNodeDumpInternal(buf, cur, NULL, format);
ret = xmlOutputBufferClose(buf); ret = xmlOutputBufferClose(buf);
return(ret); return(ret);
@@ -636,18 +638,17 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
* Serialize an HTML attribute. * Serialize an HTML attribute.
* *
* @param buf the HTML buffer output * @param buf the HTML buffer output
* @param doc the document
* @param cur the attribute pointer * @param cur the attribute pointer
*/ */
static void static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) {
xmlChar *value; xmlChar *value;
/* /*
* The html output method should not escape a & character * The html output method should not escape a & character
* occurring in an attribute value immediately followed by * occurring in an attribute value immediately followed by
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation). * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
* This is implemented in xmlEncodeEntitiesReentrant * This is implemented in xmlEscapeText.
*/ */
if (cur == NULL) { if (cur == NULL) {
@@ -660,7 +661,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
} }
xmlOutputBufferWriteString(buf, (const char *)cur->name); xmlOutputBufferWriteString(buf, (const char *)cur->name);
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
value = xmlNodeListGetString(doc, cur->children, 0); int flags = XML_ESCAPE_HTML | XML_ESCAPE_ATTR;
value = xmlNodeListGetStringInternal(cur->children, /* escape */ 1,
flags);
if (value) { if (value) {
xmlOutputBufferWriteString(buf, "="); xmlOutputBufferWriteString(buf, "=");
if ((cur->ns == NULL) && (cur->parent != NULL) && if ((cur->ns == NULL) && (cur->parent != NULL) &&
@@ -708,13 +712,12 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
* tags containing the character encoding. * tags containing the character encoding.
* *
* @param buf the HTML buffer output * @param buf the HTML buffer output
* @param doc the document
* @param cur the current node * @param cur the current node
* @param encoding the encoding string (optional) * @param encoding the encoding string (optional)
* @param format should formatting newlines been added * @param format should formatting newlines been added
*/ */
void void
htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlNodePtr cur,
const char *encoding, int format) { const char *encoding, int format) {
xmlNodePtr root, parent, metaHead = NULL; xmlNodePtr root, parent, metaHead = NULL;
xmlAttrPtr attr; xmlAttrPtr attr;
@@ -758,7 +761,7 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
* case. * case.
*/ */
if ((cur->parent != parent) && (cur->children != NULL)) { if ((cur->parent != parent) && (cur->children != NULL)) {
htmlNodeDumpInternal(buf, doc, cur, encoding, format); htmlNodeDumpInternal(buf, cur, encoding, format);
break; break;
} }
@@ -814,7 +817,7 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
attr = cur->properties; attr = cur->properties;
while (attr != NULL) { while (attr != NULL) {
if ((!isMeta) || (attr != menc.attr)) { if ((!isMeta) || (attr != menc.attr)) {
htmlAttrDumpOutput(buf, doc, attr); htmlAttrDumpOutput(buf, attr);
} else { } else {
xmlChar *newVal; xmlChar *newVal;
@@ -897,7 +900,7 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
} }
case XML_ATTRIBUTE_NODE: case XML_ATTRIBUTE_NODE:
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur); htmlAttrDumpOutput(buf, (xmlAttrPtr) cur);
break; break;
case HTML_TEXT_NODE: case HTML_TEXT_NODE:
@@ -910,7 +913,7 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
(xmlStrcasecmp(parent->name, BAD_CAST "style"))))) { (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
xmlChar *buffer; xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, cur->content); buffer = xmlEscapeText(cur->content, XML_ESCAPE_HTML);
if (buffer == NULL) { if (buffer == NULL) {
buf->error = XML_ERR_NO_MEMORY; buf->error = XML_ERR_NO_MEMORY;
return; return;
@@ -1017,15 +1020,16 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
* Serialize an HTML node to an output buffer. * Serialize an HTML node to an output buffer.
* *
* @param buf the HTML buffer output * @param buf the HTML buffer output
* @param doc the document * @param doc the document (unused)
* @param cur the current node * @param cur the current node
* @param encoding the encoding string (unused) * @param encoding the encoding string (unused)
* @param format should formatting newlines been added * @param format should formatting newlines been added
*/ */
void void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf,
xmlDocPtr doc ATTRIBUTE_UNUSED, xmlNodePtr cur,
const char *encoding ATTRIBUTE_UNUSED, int format) { const char *encoding ATTRIBUTE_UNUSED, int format) {
htmlNodeDumpInternal(buf, doc, cur, NULL, format); htmlNodeDumpInternal(buf, cur, NULL, format);
} }
/** /**
@@ -1034,14 +1038,14 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
* of htmlNodeDumpFormatOutput(). * of htmlNodeDumpFormatOutput().
* *
* @param buf the HTML buffer output * @param buf the HTML buffer output
* @param doc the document * @param doc the document (unused)
* @param cur the current node * @param cur the current node
* @param encoding the encoding string (unused) * @param encoding the encoding string (unused)
*/ */
void void
htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc ATTRIBUTE_UNUSED,
const char *encoding ATTRIBUTE_UNUSED) { xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
htmlNodeDumpInternal(buf, doc, cur, NULL, 1); htmlNodeDumpInternal(buf, cur, NULL, 1);
} }
/** /**
@@ -1056,7 +1060,7 @@ void
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
const char *encoding ATTRIBUTE_UNUSED, const char *encoding ATTRIBUTE_UNUSED,
int format) { int format) {
htmlNodeDumpInternal(buf, cur, (xmlNodePtr) cur, NULL, format); htmlNodeDumpInternal(buf, (xmlNodePtr) cur, NULL, format);
} }
/** /**
@@ -1071,7 +1075,7 @@ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
void void
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
const char *encoding ATTRIBUTE_UNUSED) { const char *encoding ATTRIBUTE_UNUSED) {
htmlNodeDumpInternal(buf, cur, (xmlNodePtr) cur, NULL, 1); htmlNodeDumpInternal(buf, (xmlNodePtr) cur, NULL, 1);
} }
/************************************************************************ /************************************************************************

View File

@@ -22,7 +22,7 @@ XML_HIDDEN int
htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off); htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off);
XML_HIDDEN void XML_HIDDEN void
htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlNodePtr cur,
const char *encoding, int format); const char *encoding, int format);
#endif /* LIBXML_HTML_ENABLED */ #endif /* LIBXML_HTML_ENABLED */

View File

@@ -19,4 +19,7 @@ xmlStaticCopyNodeList(xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent);
XML_HIDDEN const xmlChar * XML_HIDDEN const xmlChar *
xmlSplitQName4(const xmlChar *name, xmlChar **prefixPtr); xmlSplitQName4(const xmlChar *name, xmlChar **prefixPtr);
XML_HIDDEN xmlChar *
xmlNodeListGetStringInternal(const xmlNode *node, int escape, int flags);
#endif /* XML_TREE_H_PRIVATE__ */ #endif /* XML_TREE_H_PRIVATE__ */

2
tree.c
View File

@@ -1444,7 +1444,7 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) {
* @param flags escape flags * @param flags escape flags
* @returns a pointer to the string. * @returns a pointer to the string.
*/ */
static xmlChar * xmlChar *
xmlNodeListGetStringInternal(const xmlNode *node, int escape, int flags) { xmlNodeListGetStringInternal(const xmlNode *node, int escape, int flags) {
xmlBufPtr buf; xmlBufPtr buf;
xmlChar *ret; xmlChar *ret;

View File

@@ -1025,17 +1025,14 @@ static int
htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
int switched_encoding = 0; int switched_encoding = 0;
int format = 0; int format = 0;
xmlDocPtr doc;
xmlInitParser(); xmlInitParser();
doc = cur->doc;
if (ctxt->encoding == NULL) { if (ctxt->encoding == NULL) {
const char *encoding = NULL; const char *encoding = NULL;
if (doc != NULL) if (cur->doc != NULL)
encoding = (char *) doc->encoding; encoding = (char *) cur->doc->encoding;
if (encoding == NULL) if (encoding == NULL)
encoding = "HTML"; encoding = "HTML";
@@ -1048,7 +1045,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
if (ctxt->options & XML_SAVE_FORMAT) if (ctxt->options & XML_SAVE_FORMAT)
format = 1; format = 1;
htmlNodeDumpInternal(ctxt->buf, doc, cur, (char *) ctxt->encoding, format); htmlNodeDumpInternal(ctxt->buf, cur, (char *) ctxt->encoding, format);
if (switched_encoding) { if (switched_encoding) {
xmlSaveClearEncoding(ctxt); xmlSaveClearEncoding(ctxt);
@@ -1366,8 +1363,8 @@ xmlSaveDocInternal(xmlSaveCtxtPtr ctxt, xmlDocPtr cur,
if (ctxt->options & XML_SAVE_FORMAT) if (ctxt->options & XML_SAVE_FORMAT)
format = 1; format = 1;
htmlNodeDumpInternal(buf, cur, (htmlNodePtr) cur, htmlNodeDumpInternal(buf, (htmlNodePtr) cur, (char *) ctxt->encoding,
(char *) ctxt->encoding, format); format);
#else #else
return(-1); return(-1);
#endif #endif