diff --git a/HTMLtree.c b/HTMLtree.c index ef2b534f..c8f9a3c9 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -642,59 +642,76 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, */ static void htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) { - xmlChar *value; - - if (cur == NULL) { - return; - } xmlOutputBufferWrite(buf, 1, " "); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWrite(buf, 1, ":"); + xmlOutputBufferWrite(buf, 1, ":"); } xmlOutputBufferWriteString(buf, (const char *)cur->name); + if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { - int flags = XML_ESCAPE_HTML | XML_ESCAPE_ATTR; + xmlNodePtr child; + int isUri; - value = xmlNodeListGetStringInternal(cur->children, /* escape */ 1, - flags); - if (value) { - xmlOutputBufferWrite(buf, 1, "="); - if ((cur->ns == NULL) && (cur->parent != NULL) && - (cur->parent->ns == NULL) && - ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || - (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || - (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || - ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && - (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { - xmlChar *escaped; - xmlChar *tmp = value; + xmlOutputBufferWrite(buf, 2, "=\""); - while (IS_BLANK_CH(*tmp)) tmp++; + isUri = (cur->ns == NULL) && (cur->parent != NULL) && + (cur->parent->ns == NULL) && + ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || + (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || + (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || + ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && + (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a")))); - /* - * Angle brackets are technically illegal in URIs, but they're - * used in server side includes, for example. Curly brackets - * are illegal as well and often used in templates. - * Don't escape non-whitespace, printable ASCII chars for - * improved interoperability. Only escape space, control - * and non-ASCII chars. - */ - escaped = xmlURIEscapeStr(tmp, - BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}"); - if (escaped != NULL) { - xmlOutputBufferWriteQuotedString(buf, escaped); - xmlFree(escaped); - } else { - buf->error = XML_ERR_NO_MEMORY; - } - } else { - xmlOutputBufferWriteQuotedString(buf, value); - } - xmlFree(value); - } else { - buf->error = XML_ERR_NO_MEMORY; - } + for (child = cur->children; child != NULL; child = child->next) { + if (child->type == XML_TEXT_NODE) { + const xmlChar *content = child->content; + + if (content == NULL) + continue; + + if (!isUri) { + xmlSerializeText(buf, content, + XML_ESCAPE_HTML | XML_ESCAPE_ATTR); + } else { + xmlChar *escaped; + const xmlChar *tmp = content; + + while (IS_WS_HTML(*tmp)) tmp++; + + /* + * See appendix "B.2.1 Non-ASCII characters in URI + * attribute values" in the HTML 4.01 spec. + * + * Angle brackets are technically illegal in URIs, but + * they're used in server side includes, for example. + * Curly brackets are illegal as well and often used in + * templates. + * + * Don't escape non-whitespace, printable ASCII chars for + * improved interoperability. Only escape space, control + * and non-ASCII chars. + */ + escaped = xmlURIEscapeStr(tmp, + BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{|}"); + if (escaped == NULL) { + buf->error = XML_ERR_NO_MEMORY; + break; + } + xmlSerializeText(buf, escaped, + XML_ESCAPE_HTML | XML_ESCAPE_ATTR); + xmlFree(escaped); + } + } else if (child->type == XML_ENTITY_REF_NODE) { + /* TODO: We should probably expand entity refs */ + xmlOutputBufferWrite(buf, 1, "&"); + xmlOutputBufferWriteString(buf, (char *) child->name); + xmlOutputBufferWrite(buf, 1, ";"); + } + } + + xmlOutputBufferWrite(buf, 1, "\""); } } @@ -823,8 +840,10 @@ htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlNodePtr cur, buf->error = XML_ERR_NO_MEMORY; return; } - xmlOutputBufferWrite(buf, 1, "="); - xmlOutputBufferWriteQuotedString(buf, newVal); + xmlOutputBufferWrite(buf, 2, "=\""); + xmlSerializeText(buf, newVal, + XML_ESCAPE_HTML | XML_ESCAPE_ATTR); + xmlOutputBufferWrite(buf, 1, "\""); xmlFree(newVal); } attr = attr->next; diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm index 23de7681..0bf184df 100644 --- a/result/HTML/doc3.htm +++ b/result/HTML/doc3.htm @@ -329,7 +329,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, event of a bad flash, just flip a switch on the RDI and boot up your system, and flash again. This is also good as a failsafe in case you don't believe in Virus Protecting your computer. (Thanks to Fred for - link)
Manufacturers Brochure (PDF Format)
Another info page
Available for about $20



+ link)
Manufacturers Brochure (PDF Format)
Another info page
Available for about $20



diff --git a/result/HTML/issue318.html b/result/HTML/issue318.html index b3a01e43..dd31c265 100644 --- a/result/HTML/issue318.html +++ b/result/HTML/issue318.html @@ -1,6 +1,6 @@ - + diff --git a/testparser.c b/testparser.c index 24264bcc..318b3b52 100644 --- a/testparser.c +++ b/testparser.c @@ -692,8 +692,8 @@ testHtmlUpdateMetaEncoding(void) { " <" MHE " content=\"text/html; charset = \">\n" " <" MHE " content=\"text/html; charset = ' utf-8 '\">\n" " <" MHE " content=\"text/html; charset = ' foo \">\n" - " <" MHE " content='text/html; charset = \" utf-8 \"'>\n" - " <" MHE " content='text/html; charset = \" foo '>\n" + " <" MHE " content=\"text/html; charset = " utf-8 "\">\n" + " <" MHE " content=\"text/html; charset = " foo \">\n" " <" MHE " content=\"charset ; charset = utf-8; baz\">\n" " <" MHE " content=\"text/html\">\n" " <" MHE " content=\"\">\n" diff --git a/tools/genEscape.py b/tools/genEscape.py index eab8df64..fbd12c90 100755 --- a/tools/genEscape.py +++ b/tools/genEscape.py @@ -62,5 +62,5 @@ gen_tab('xmlEscapeTabAttr', '\t\n\r"&<>', True) print('#ifdef LIBXML_HTML_ENABLED\n') gen_tab('htmlEscapeTab', '&<>', False) -gen_tab('htmlEscapeTabAttr', '&', False) # TODO: Add '"' +gen_tab('htmlEscapeTabAttr', '"&', False) print('#endif /* LIBXML_HTML_ENABLED */') diff --git a/xmlIO.c b/xmlIO.c index 4b3d2964..7cd76854 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -220,7 +220,7 @@ static const signed char htmlEscapeTab[128] = { static const signed char htmlEscapeTabAttr[128] = { 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 26, -1, -1, -1, 33, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -2687,8 +2687,7 @@ xmlOutputBufferWriteString(xmlOutputBufferPtr out, const char *str) { * double quotes. * * This should only be used to escape system IDs. Currently, - * we also use it for public IDs and original entity values, - * as well as HTML attributes. + * we also use it for public IDs and original entity values. * * @param buf output buffer * @param string the string to add