1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

html: Always serialize attributes with double quotes

Align with HTML5.
This commit is contained in:
Nick Wellnhofer
2025-05-11 21:38:16 +02:00
parent 5c4cc456a4
commit 825f3a9d0c
6 changed files with 72 additions and 54 deletions

View File

@@ -642,59 +642,76 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
*/
static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) {
xmlChar *value;
if (cur == NULL) {
return;
}
xmlOutputBufferWrite(buf, 1, " ");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
xmlOutputBufferWrite(buf, 1, ":");
}
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
int flags = XML_ESCAPE_HTML | XML_ESCAPE_ATTR;
value = xmlNodeListGetStringInternal(cur->children, /* escape */ 1,
flags);
if (value) {
xmlOutputBufferWrite(buf, 1, "=");
if ((cur->ns == NULL) && (cur->parent != NULL) &&
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
xmlNodePtr child;
int isUri;
xmlOutputBufferWrite(buf, 2, "=\"");
isUri = (cur->ns == NULL) && (cur->parent != NULL) &&
(cur->parent->ns == NULL) &&
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
xmlChar *escaped;
xmlChar *tmp = value;
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))));
while (IS_BLANK_CH(*tmp)) tmp++;
for (child = cur->children; child != NULL; child = child->next) {
if (child->type == XML_TEXT_NODE) {
const xmlChar *content = child->content;
if (content == NULL)
continue;
if (!isUri) {
xmlSerializeText(buf, content,
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
} else {
xmlChar *escaped;
const xmlChar *tmp = content;
while (IS_WS_HTML(*tmp)) tmp++;
/*
* Angle brackets are technically illegal in URIs, but they're
* used in server side includes, for example. Curly brackets
* are illegal as well and often used in templates.
* See appendix "B.2.1 Non-ASCII characters in URI
* attribute values" in the HTML 4.01 spec.
*
* Angle brackets are technically illegal in URIs, but
* they're used in server side includes, for example.
* Curly brackets are illegal as well and often used in
* templates.
*
* Don't escape non-whitespace, printable ASCII chars for
* improved interoperability. Only escape space, control
* and non-ASCII chars.
*/
escaped = xmlURIEscapeStr(tmp,
BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}");
if (escaped != NULL) {
xmlOutputBufferWriteQuotedString(buf, escaped);
if (escaped == NULL) {
buf->error = XML_ERR_NO_MEMORY;
break;
}
xmlSerializeText(buf, escaped,
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
xmlFree(escaped);
} else {
buf->error = XML_ERR_NO_MEMORY;
}
} else {
xmlOutputBufferWriteQuotedString(buf, value);
} else if (child->type == XML_ENTITY_REF_NODE) {
/* TODO: We should probably expand entity refs */
xmlOutputBufferWrite(buf, 1, "&");
xmlOutputBufferWriteString(buf, (char *) child->name);
xmlOutputBufferWrite(buf, 1, ";");
}
xmlFree(value);
} else {
buf->error = XML_ERR_NO_MEMORY;
}
xmlOutputBufferWrite(buf, 1, "\"");
}
}
@@ -823,8 +840,10 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlNodePtr cur,
buf->error = XML_ERR_NO_MEMORY;
return;
}
xmlOutputBufferWrite(buf, 1, "=");
xmlOutputBufferWriteQuotedString(buf, newVal);
xmlOutputBufferWrite(buf, 2, "=\"");
xmlSerializeText(buf, newVal,
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
xmlOutputBufferWrite(buf, 1, "\"");
xmlFree(newVal);
}
attr = attr->next;

View File

@@ -329,7 +329,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
event of a bad flash, just flip a switch on the RDI and boot up your
system, and flash again. This is also good as a failsafe in case you
don't believe in Virus Protecting your computer. (Thanks to Fred for
link)<br><a href="http://www.ioss.com.tw/eg/rd1/RD1info0004.PDF" target="_NEW">Manufacturers Brochure</a> (PDF Format)<br><a href="http://192.216.185.10/mwave/doc/A06950.html" target='_BLANK"'>Another info page</a><br><a href="http://192.216.185.10/mwave/ProdMB-AC-MW.hmx?UID=&amp;CID=&amp;updepts=MB&amp;DNAME=%3Cb%3EMotherboards%3C%2Fb%3E&amp;Back=ProdMB-AC-MW.hmx?" target="_BLANK">Available for about $20</a><br><br><img src="doc3_files/rd1.jpg"></font><br><br><a name="newsitem963875853,12731,"></a>
link)<br><a href="http://www.ioss.com.tw/eg/rd1/RD1info0004.PDF" target="_NEW">Manufacturers Brochure</a> (PDF Format)<br><a href="http://192.216.185.10/mwave/doc/A06950.html" target="_BLANK&quot;">Another info page</a><br><a href="http://192.216.185.10/mwave/ProdMB-AC-MW.hmx?UID=&amp;CID=&amp;updepts=MB&amp;DNAME=%3Cb%3EMotherboards%3C%2Fb%3E&amp;Back=ProdMB-AC-MW.hmx?" target="_BLANK">Available for about $20</a><br><br><img src="doc3_files/rd1.jpg"></font><br><br><a name="newsitem963875853,12731,"></a>
<table bgcolor="#003399" width="100%">
<tbody>
<tr>

View File

@@ -1,6 +1,6 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
<head><meta http-equiv='\"content-type\"' content='\"text/html;charset=utf-8\"'></head>
<head><meta http-equiv="\&quot;content-type\&quot;" content="\&quot;text/html;charset=utf-8\&quot;"></head>
<body>
</body>
</html>

View File

@@ -692,8 +692,8 @@ testHtmlUpdateMetaEncoding(void) {
" <" MHE " content=\"text/html; charset = \">\n"
" <" MHE " content=\"text/html; charset = ' utf-8 '\">\n"
" <" MHE " content=\"text/html; charset = ' foo \">\n"
" <" MHE " content='text/html; charset = \" utf-8 \"'>\n"
" <" MHE " content='text/html; charset = \" foo '>\n"
" <" MHE " content=\"text/html; charset = &quot; utf-8 &quot;\">\n"
" <" MHE " content=\"text/html; charset = &quot; foo \">\n"
" <" MHE " content=\"charset ; charset = utf-8; baz\">\n"
" <" MHE " content=\"text/html\">\n"
" <" MHE " content=\"\">\n"

View File

@@ -62,5 +62,5 @@ gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True)
print('#ifdef LIBXML_HTML_ENABLED\n')
gen_tab('htmlEscapeTab', '&<>', False)
gen_tab('htmlEscapeTabAttr', '&', False) # TODO: Add '"'
gen_tab('htmlEscapeTabAttr', '"&', False)
print('#endif /* LIBXML_HTML_ENABLED */')

View File

@@ -220,7 +220,7 @@ static const signed char htmlEscapeTab[128] = {
static const signed char htmlEscapeTabAttr[128] = {
0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -2687,8 +2687,7 @@ xmlOutputBufferWriteString(xmlOutputBufferPtr out, const char *str) {
* double quotes.
*
* This should only be used to escape system IDs. Currently,
* we also use it for public IDs and original entity values,
* as well as HTML attributes.
* we also use it for public IDs and original entity values.
*
* @param buf output buffer
* @param string the string to add