1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

html: Always serialize attributes with double quotes

Align with HTML5.
This commit is contained in:
Nick Wellnhofer
2025-05-11 21:38:16 +02:00
parent 5c4cc456a4
commit 825f3a9d0c
6 changed files with 72 additions and 54 deletions

View File

@@ -642,59 +642,76 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
*/ */
static void static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) { htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) {
xmlChar *value;
if (cur == NULL) {
return;
}
xmlOutputBufferWrite(buf, 1, " "); xmlOutputBufferWrite(buf, 1, " ");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWrite(buf, 1, ":"); xmlOutputBufferWrite(buf, 1, ":");
} }
xmlOutputBufferWriteString(buf, (const char *)cur->name); xmlOutputBufferWriteString(buf, (const char *)cur->name);
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
int flags = XML_ESCAPE_HTML | XML_ESCAPE_ATTR;
value = xmlNodeListGetStringInternal(cur->children, /* escape */ 1, if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
flags); xmlNodePtr child;
if (value) { int isUri;
xmlOutputBufferWrite(buf, 1, "=");
if ((cur->ns == NULL) && (cur->parent != NULL) && xmlOutputBufferWrite(buf, 2, "=\"");
isUri = (cur->ns == NULL) && (cur->parent != NULL) &&
(cur->parent->ns == NULL) && (cur->parent->ns == NULL) &&
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) || (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) || (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))));
xmlChar *escaped;
xmlChar *tmp = value;
while (IS_BLANK_CH(*tmp)) tmp++; for (child = cur->children; child != NULL; child = child->next) {
if (child->type == XML_TEXT_NODE) {
const xmlChar *content = child->content;
if (content == NULL)
continue;
if (!isUri) {
xmlSerializeText(buf, content,
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
} else {
xmlChar *escaped;
const xmlChar *tmp = content;
while (IS_WS_HTML(*tmp)) tmp++;
/* /*
* Angle brackets are technically illegal in URIs, but they're * See appendix "B.2.1 Non-ASCII characters in URI
* used in server side includes, for example. Curly brackets * attribute values" in the HTML 4.01 spec.
* are illegal as well and often used in templates. *
* Angle brackets are technically illegal in URIs, but
* they're used in server side includes, for example.
* Curly brackets are illegal as well and often used in
* templates.
*
* Don't escape non-whitespace, printable ASCII chars for * Don't escape non-whitespace, printable ASCII chars for
* improved interoperability. Only escape space, control * improved interoperability. Only escape space, control
* and non-ASCII chars. * and non-ASCII chars.
*/ */
escaped = xmlURIEscapeStr(tmp, escaped = xmlURIEscapeStr(tmp,
BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}"); BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}");
if (escaped != NULL) { if (escaped == NULL) {
xmlOutputBufferWriteQuotedString(buf, escaped); buf->error = XML_ERR_NO_MEMORY;
break;
}
xmlSerializeText(buf, escaped,
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
xmlFree(escaped); xmlFree(escaped);
} else {
buf->error = XML_ERR_NO_MEMORY;
} }
} else { } else if (child->type == XML_ENTITY_REF_NODE) {
xmlOutputBufferWriteQuotedString(buf, value); /* TODO: We should probably expand entity refs */
xmlOutputBufferWrite(buf, 1, "&");
xmlOutputBufferWriteString(buf, (char *) child->name);
xmlOutputBufferWrite(buf, 1, ";");
} }
xmlFree(value);
} else {
buf->error = XML_ERR_NO_MEMORY;
} }
xmlOutputBufferWrite(buf, 1, "\"");
} }
} }
@@ -823,8 +840,10 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlNodePtr cur,
buf->error = XML_ERR_NO_MEMORY; buf->error = XML_ERR_NO_MEMORY;
return; return;
} }
xmlOutputBufferWrite(buf, 1, "="); xmlOutputBufferWrite(buf, 2, "=\"");
xmlOutputBufferWriteQuotedString(buf, newVal); xmlSerializeText(buf, newVal,
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
xmlOutputBufferWrite(buf, 1, "\"");
xmlFree(newVal); xmlFree(newVal);
} }
attr = attr->next; attr = attr->next;

View File

@@ -329,7 +329,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
event of a bad flash, just flip a switch on the RDI and boot up your event of a bad flash, just flip a switch on the RDI and boot up your
system, and flash again. This is also good as a failsafe in case you system, and flash again. This is also good as a failsafe in case you
don't believe in Virus Protecting your computer. (Thanks to Fred for don't believe in Virus Protecting your computer. (Thanks to Fred for
link)<br><a href="http://www.ioss.com.tw/eg/rd1/RD1info0004.PDF" target="_NEW">Manufacturers Brochure</a> (PDF Format)<br><a href="http://192.216.185.10/mwave/doc/A06950.html" target='_BLANK"'>Another info page</a><br><a href="http://192.216.185.10/mwave/ProdMB-AC-MW.hmx?UID=&amp;CID=&amp;updepts=MB&amp;DNAME=%3Cb%3EMotherboards%3C%2Fb%3E&amp;Back=ProdMB-AC-MW.hmx?" target="_BLANK">Available for about $20</a><br><br><img src="doc3_files/rd1.jpg"></font><br><br><a name="newsitem963875853,12731,"></a> link)<br><a href="http://www.ioss.com.tw/eg/rd1/RD1info0004.PDF" target="_NEW">Manufacturers Brochure</a> (PDF Format)<br><a href="http://192.216.185.10/mwave/doc/A06950.html" target="_BLANK&quot;">Another info page</a><br><a href="http://192.216.185.10/mwave/ProdMB-AC-MW.hmx?UID=&amp;CID=&amp;updepts=MB&amp;DNAME=%3Cb%3EMotherboards%3C%2Fb%3E&amp;Back=ProdMB-AC-MW.hmx?" target="_BLANK">Available for about $20</a><br><br><img src="doc3_files/rd1.jpg"></font><br><br><a name="newsitem963875853,12731,"></a>
<table bgcolor="#003399" width="100%"> <table bgcolor="#003399" width="100%">
<tbody> <tbody>
<tr> <tr>

View File

@@ -1,6 +1,6 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html> <html>
<head><meta http-equiv='\"content-type\"' content='\"text/html;charset=utf-8\"'></head> <head><meta http-equiv="\&quot;content-type\&quot;" content="\&quot;text/html;charset=utf-8\&quot;"></head>
<body> <body>
</body> </body>
</html> </html>

View File

@@ -692,8 +692,8 @@ testHtmlUpdateMetaEncoding(void) {
" <" MHE " content=\"text/html; charset = \">\n" " <" MHE " content=\"text/html; charset = \">\n"
" <" MHE " content=\"text/html; charset = ' utf-8 '\">\n" " <" MHE " content=\"text/html; charset = ' utf-8 '\">\n"
" <" MHE " content=\"text/html; charset = ' foo \">\n" " <" MHE " content=\"text/html; charset = ' foo \">\n"
" <" MHE " content='text/html; charset = \" utf-8 \"'>\n" " <" MHE " content=\"text/html; charset = &quot; utf-8 &quot;\">\n"
" <" MHE " content='text/html; charset = \" foo '>\n" " <" MHE " content=\"text/html; charset = &quot; foo \">\n"
" <" MHE " content=\"charset ; charset = utf-8; baz\">\n" " <" MHE " content=\"charset ; charset = utf-8; baz\">\n"
" <" MHE " content=\"text/html\">\n" " <" MHE " content=\"text/html\">\n"
" <" MHE " content=\"\">\n" " <" MHE " content=\"\">\n"

View File

@@ -62,5 +62,5 @@ gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True)
print('#ifdef LIBXML_HTML_ENABLED\n') print('#ifdef LIBXML_HTML_ENABLED\n')
gen_tab('htmlEscapeTab', '&<>', False) gen_tab('htmlEscapeTab', '&<>', False)
gen_tab('htmlEscapeTabAttr', '&', False) # TODO: Add '"' gen_tab('htmlEscapeTabAttr', '"&', False)
print('#endif /* LIBXML_HTML_ENABLED */') print('#endif /* LIBXML_HTML_ENABLED */')

View File

@@ -220,7 +220,7 @@ static const signed char htmlEscapeTab[128] = {
static const signed char htmlEscapeTabAttr[128] = { static const signed char htmlEscapeTabAttr[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -2687,8 +2687,7 @@ xmlOutputBufferWriteString(xmlOutputBufferPtr out, const char *str) {
* double quotes. * double quotes.
* *
* This should only be used to escape system IDs. Currently, * This should only be used to escape system IDs. Currently,
* we also use it for public IDs and original entity values, * we also use it for public IDs and original entity values.
* as well as HTML attributes.
* *
* @param buf output buffer * @param buf output buffer
* @param string the string to add * @param string the string to add