aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@redhat.com>2012-09-05 11:45:32 +0800
committerDaniel Veillard <veillard@redhat.com>2012-09-05 12:11:43 +0800
commit7d4c529a334845621e2f805c8ed0e154b3350cec (patch)
tree541645b371f66a4e28a9da123c70e53f4dfe7299
parent857104cd494765daf0749ae4f7dd49563811b669 (diff)
downloadandroid_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.tar.gz
android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.tar.bz2
android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.zip
Improve HTML escaping of attribute on output
Handle special cases of &{...} constructs as hinted in the spec http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 and special values as comment <!-- ... --> used for server side includes This is limited to attribute values in HTML content.
-rw-r--r--HTMLtree.c13
-rw-r--r--entities.c86
-rw-r--r--save.h1
-rw-r--r--tree.c11
4 files changed, 98 insertions, 13 deletions
diff --git a/HTMLtree.c b/HTMLtree.c
index 1f9dbece..530fd530 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -690,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
xmlChar *value;
/*
- * TODO: The html output method should not escape a & character
- * occurring in an attribute value immediately followed by
- * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+ * The html output method should not escape a & character
+ * occurring in an attribute value immediately followed by
+ * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+ * This is implemented in xmlEncodeEntitiesReentrant
*/
if (cur == NULL) {
@@ -720,7 +721,11 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
while (IS_BLANK_CH(*tmp)) tmp++;
- escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
+ /*
+ * the < and > have already been escaped at the entity level
+ * And doing so here breaks server side includes
+ */
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
if (escaped != NULL) {
xmlBufWriteQuotedString(buf->buffer, escaped);
xmlFree(escaped);
diff --git a/entities.c b/entities.c
index 7d068206..f9e9cd9d 100644
--- a/entities.c
+++ b/entities.c
@@ -537,11 +537,11 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
buffer_size = new_size; \
}
-
/**
- * xmlEncodeEntitiesReentrant:
+ * xmlEncodeEntitiesInternal:
* @doc: the document containing the string
* @input: A string to convert to XML.
+ * @attr: are we handling an atrbute value
*
* Do a global encoding of a string, replacing the predefined entities
* and non ASCII values with their entities and CharRef counterparts.
@@ -550,8 +550,8 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
*
* Returns A newly allocated string with the substitution done.
*/
-xmlChar *
-xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
+static xmlChar *
+xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
const xmlChar *cur = input;
xmlChar *buffer = NULL;
xmlChar *out = NULL;
@@ -568,7 +568,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
buffer_size = 1000;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) {
- xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed");
+ xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
return(NULL);
}
out = buffer;
@@ -585,6 +585,27 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
* By default one have to encode at least '<', '>', '"' and '&' !
*/
if (*cur == '<') {
+ const xmlChar *end;
+
+ /*
+ * Special handling of server side include in HTML attributes
+ */
+ if (html && attr &&
+ (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
+ ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
+ while (cur != end) {
+ *out++ = *cur++;
+ indx = out - buffer;
+ if (indx + 100 > buffer_size) {
+ growBufferReentrant();
+ out = &buffer[indx];
+ }
+ }
+ *out++ = *cur++;
+ *out++ = *cur++;
+ *out++ = *cur++;
+ continue;
+ }
*out++ = '&';
*out++ = 'l';
*out++ = 't';
@@ -595,6 +616,22 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
*out++ = 't';
*out++ = ';';
} else if (*cur == '&') {
+ /*
+ * Special handling of &{...} construct from HTML 4, see
+ * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
+ */
+ if (html && attr && (cur[1] == '{') && (strchr(cur, '}'))) {
+ while (*cur != '}') {
+ *out++ = *cur++;
+ indx = out - buffer;
+ if (indx + 100 > buffer_size) {
+ growBufferReentrant();
+ out = &buffer[indx];
+ }
+ }
+ *out++ = *cur++;
+ continue;
+ }
*out++ = '&';
*out++ = 'a';
*out++ = 'm';
@@ -627,7 +664,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
if (*cur < 0xC0) {
xmlEntitiesErr(XML_CHECK_NOT_UTF8,
- "xmlEncodeEntitiesReentrant : input not UTF-8");
+ "xmlEncodeEntities: input not UTF-8");
if (doc != NULL)
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
snprintf(buf, sizeof(buf), "&#%d;", *cur);
@@ -660,7 +697,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
}
if ((l == 1) || (!IS_CHAR(val))) {
xmlEntitiesErr(XML_ERR_INVALID_CHAR,
- "xmlEncodeEntitiesReentrant : char out of range\n");
+ "xmlEncodeEntities: char out of range\n");
if (doc != NULL)
doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
snprintf(buf, sizeof(buf), "&#%d;", *cur);
@@ -694,12 +731,45 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
return(buffer);
mem_error:
- xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: realloc failed");
+ xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
xmlFree(buffer);
return(NULL);
}
/**
+ * xmlEncodeAttributeEntities:
+ * @doc: the document containing the string
+ * @input: A string to convert to XML.
+ *
+ * Do a global encoding of a string, replacing the predefined entities
+ * and non ASCII values with their entities and CharRef counterparts for
+ * attribute values.
+ *
+ * Returns A newly allocated string with the substitution done.
+ */
+xmlChar *
+xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
+ return xmlEncodeEntitiesInternal(doc, input, 1);
+}
+
+/**
+ * xmlEncodeEntitiesReentrant:
+ * @doc: the document containing the string
+ * @input: A string to convert to XML.
+ *
+ * Do a global encoding of a string, replacing the predefined entities
+ * and non ASCII values with their entities and CharRef counterparts.
+ * Contrary to xmlEncodeEntities, this routine is reentrant, and result
+ * must be deallocated.
+ *
+ * Returns A newly allocated string with the substitution done.
+ */
+xmlChar *
+xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
+ return xmlEncodeEntitiesInternal(doc, input, 0);
+}
+
+/**
* xmlEncodeSpecialChars:
* @doc: the document containing the string
* @input: A string to convert to XML.
diff --git a/save.h b/save.h
index f24f273f..2c32a103 100644
--- a/save.h
+++ b/save.h
@@ -25,6 +25,7 @@ void xmlBufDumpNotationTable(xmlBufPtr buf, xmlNotationTablePtr table);
void xmlBufDumpElementDecl(xmlBufPtr buf, xmlElementPtr elem);
void xmlBufDumpAttributeDecl(xmlBufPtr buf, xmlAttributePtr attr);
void xmlBufDumpEntityDecl(xmlBufPtr buf, xmlEntityPtr ent);
+xmlChar *xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input);
#endif
#ifdef __cplusplus
diff --git a/tree.c b/tree.c
index df6f608c..145a6896 100644
--- a/tree.c
+++ b/tree.c
@@ -42,6 +42,7 @@
#endif
#include "buf.h"
+#include "save.h"
int __xmlRegisterCallbacks = 0;
@@ -1661,9 +1662,14 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
+ int attr;
if (list == NULL)
return (NULL);
+ if ((list->parent != NULL) && (list->parent->type == XML_ATTRIBUTE_NODE))
+ attr = 1;
+ else
+ attr = 0;
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
@@ -1673,7 +1679,10 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
} else {
xmlChar *buffer;
- buffer = xmlEncodeEntitiesReentrant(doc, node->content);
+ if (attr)
+ buffer = xmlEncodeAttributeEntities(doc, node->content);
+ else
+ buffer = xmlEncodeEntitiesReentrant(doc, node->content);
if (buffer != NULL) {
ret = xmlStrcat(ret, buffer);
xmlFree(buffer);