1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

Improve HTML escaping of attribute on output

Handle special cases of &{...} constructs as hinted in the spec
  http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
and special values as comment <!-- ... --> used for server side includes
This is limited to attribute values in HTML content.
This commit is contained in:
Daniel Veillard
2012-09-05 11:45:32 +08:00
parent 857104cd49
commit 7d4c529a33
4 changed files with 98 additions and 13 deletions

View File

@@ -690,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
xmlChar *value; xmlChar *value;
/* /*
* TODO: The html output method should not escape a & character * The html output method should not escape a & character
* occurring in an attribute value immediately followed by * occurring in an attribute value immediately followed by
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation). * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
* This is implemented in xmlEncodeEntitiesReentrant
*/ */
if (cur == NULL) { if (cur == NULL) {
@@ -720,7 +721,11 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
while (IS_BLANK_CH(*tmp)) tmp++; while (IS_BLANK_CH(*tmp)) tmp++;
escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); /*
* the < and > have already been escaped at the entity level
* And doing so here breaks server side includes
*/
escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
if (escaped != NULL) { if (escaped != NULL) {
xmlBufWriteQuotedString(buf->buffer, escaped); xmlBufWriteQuotedString(buf->buffer, escaped);
xmlFree(escaped); xmlFree(escaped);

View File

@@ -537,11 +537,11 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
buffer_size = new_size; \ buffer_size = new_size; \
} }
/** /**
* xmlEncodeEntitiesReentrant: * xmlEncodeEntitiesInternal:
* @doc: the document containing the string * @doc: the document containing the string
* @input: A string to convert to XML. * @input: A string to convert to XML.
* @attr: are we handling an atrbute value
* *
* Do a global encoding of a string, replacing the predefined entities * Do a global encoding of a string, replacing the predefined entities
* and non ASCII values with their entities and CharRef counterparts. * and non ASCII values with their entities and CharRef counterparts.
@@ -550,8 +550,8 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
* *
* Returns A newly allocated string with the substitution done. * Returns A newly allocated string with the substitution done.
*/ */
xmlChar * static xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
const xmlChar *cur = input; const xmlChar *cur = input;
xmlChar *buffer = NULL; xmlChar *buffer = NULL;
xmlChar *out = NULL; xmlChar *out = NULL;
@@ -568,7 +568,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
buffer_size = 1000; buffer_size = 1000;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) { if (buffer == NULL) {
xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed"); xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
return(NULL); return(NULL);
} }
out = buffer; out = buffer;
@@ -585,6 +585,27 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
* By default one have to encode at least '<', '>', '"' and '&' ! * By default one have to encode at least '<', '>', '"' and '&' !
*/ */
if (*cur == '<') { if (*cur == '<') {
const xmlChar *end;
/*
* Special handling of server side include in HTML attributes
*/
if (html && attr &&
(cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
while (cur != end) {
*out++ = *cur++;
indx = out - buffer;
if (indx + 100 > buffer_size) {
growBufferReentrant();
out = &buffer[indx];
}
}
*out++ = *cur++;
*out++ = *cur++;
*out++ = *cur++;
continue;
}
*out++ = '&'; *out++ = '&';
*out++ = 'l'; *out++ = 'l';
*out++ = 't'; *out++ = 't';
@@ -595,6 +616,22 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
*out++ = 't'; *out++ = 't';
*out++ = ';'; *out++ = ';';
} else if (*cur == '&') { } else if (*cur == '&') {
/*
* Special handling of &{...} construct from HTML 4, see
* http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
*/
if (html && attr && (cur[1] == '{') && (strchr(cur, '}'))) {
while (*cur != '}') {
*out++ = *cur++;
indx = out - buffer;
if (indx + 100 > buffer_size) {
growBufferReentrant();
out = &buffer[indx];
}
}
*out++ = *cur++;
continue;
}
*out++ = '&'; *out++ = '&';
*out++ = 'a'; *out++ = 'a';
*out++ = 'm'; *out++ = 'm';
@@ -627,7 +664,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
if (*cur < 0xC0) { if (*cur < 0xC0) {
xmlEntitiesErr(XML_CHECK_NOT_UTF8, xmlEntitiesErr(XML_CHECK_NOT_UTF8,
"xmlEncodeEntitiesReentrant : input not UTF-8"); "xmlEncodeEntities: input not UTF-8");
if (doc != NULL) if (doc != NULL)
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
snprintf(buf, sizeof(buf), "&#%d;", *cur); snprintf(buf, sizeof(buf), "&#%d;", *cur);
@@ -660,7 +697,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
} }
if ((l == 1) || (!IS_CHAR(val))) { if ((l == 1) || (!IS_CHAR(val))) {
xmlEntitiesErr(XML_ERR_INVALID_CHAR, xmlEntitiesErr(XML_ERR_INVALID_CHAR,
"xmlEncodeEntitiesReentrant : char out of range\n"); "xmlEncodeEntities: char out of range\n");
if (doc != NULL) if (doc != NULL)
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
snprintf(buf, sizeof(buf), "&#%d;", *cur); snprintf(buf, sizeof(buf), "&#%d;", *cur);
@@ -694,11 +731,44 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
return(buffer); return(buffer);
mem_error: mem_error:
xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: realloc failed"); xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
xmlFree(buffer); xmlFree(buffer);
return(NULL); return(NULL);
} }
/**
* xmlEncodeAttributeEntities:
* @doc: the document containing the string
* @input: A string to convert to XML.
*
* Do a global encoding of a string, replacing the predefined entities
* and non ASCII values with their entities and CharRef counterparts for
* attribute values.
*
* Returns A newly allocated string with the substitution done.
*/
xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
return xmlEncodeEntitiesInternal(doc, input, 1);
}
/**
* xmlEncodeEntitiesReentrant:
* @doc: the document containing the string
* @input: A string to convert to XML.
*
* Do a global encoding of a string, replacing the predefined entities
* and non ASCII values with their entities and CharRef counterparts.
* Contrary to xmlEncodeEntities, this routine is reentrant, and result
* must be deallocated.
*
* Returns A newly allocated string with the substitution done.
*/
xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
return xmlEncodeEntitiesInternal(doc, input, 0);
}
/** /**
* xmlEncodeSpecialChars: * xmlEncodeSpecialChars:
* @doc: the document containing the string * @doc: the document containing the string

1
save.h
View File

@@ -25,6 +25,7 @@ void xmlBufDumpNotationTable(xmlBufPtr buf, xmlNotationTablePtr table);
void xmlBufDumpElementDecl(xmlBufPtr buf, xmlElementPtr elem); void xmlBufDumpElementDecl(xmlBufPtr buf, xmlElementPtr elem);
void xmlBufDumpAttributeDecl(xmlBufPtr buf, xmlAttributePtr attr); void xmlBufDumpAttributeDecl(xmlBufPtr buf, xmlAttributePtr attr);
void xmlBufDumpEntityDecl(xmlBufPtr buf, xmlEntityPtr ent); void xmlBufDumpEntityDecl(xmlBufPtr buf, xmlEntityPtr ent);
xmlChar *xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus

11
tree.c
View File

@@ -42,6 +42,7 @@
#endif #endif
#include "buf.h" #include "buf.h"
#include "save.h"
int __xmlRegisterCallbacks = 0; int __xmlRegisterCallbacks = 0;
@@ -1661,9 +1662,14 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
xmlNodePtr node = list; xmlNodePtr node = list;
xmlChar *ret = NULL; xmlChar *ret = NULL;
xmlEntityPtr ent; xmlEntityPtr ent;
int attr;
if (list == NULL) if (list == NULL)
return (NULL); return (NULL);
if ((list->parent != NULL) && (list->parent->type == XML_ATTRIBUTE_NODE))
attr = 1;
else
attr = 0;
while (node != NULL) { while (node != NULL) {
if ((node->type == XML_TEXT_NODE) || if ((node->type == XML_TEXT_NODE) ||
@@ -1673,7 +1679,10 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
} else { } else {
xmlChar *buffer; xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, node->content); if (attr)
buffer = xmlEncodeAttributeEntities(doc, node->content);
else
buffer = xmlEncodeEntitiesReentrant(doc, node->content);
if (buffer != NULL) { if (buffer != NULL) {
ret = xmlStrcat(ret, buffer); ret = xmlStrcat(ret, buffer);
xmlFree(buffer); xmlFree(buffer);