diff options
author | Daniel Veillard <veillard@redhat.com> | 2012-09-05 11:45:32 +0800 |
---|---|---|
committer | Daniel Veillard <veillard@redhat.com> | 2012-09-05 12:11:43 +0800 |
commit | 7d4c529a334845621e2f805c8ed0e154b3350cec (patch) | |
tree | 541645b371f66a4e28a9da123c70e53f4dfe7299 | |
parent | 857104cd494765daf0749ae4f7dd49563811b669 (diff) | |
download | android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.tar.gz android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.tar.bz2 android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.zip |
Improve HTML escaping of attribute on output
Handle special cases of &{...} constructs as hinted in the spec
http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
and special values as comment <!-- ... --> used for server side includes
This is limited to attribute values in HTML content.
-rw-r--r-- | HTMLtree.c | 13 | ||||
-rw-r--r-- | entities.c | 86 | ||||
-rw-r--r-- | save.h | 1 | ||||
-rw-r--r-- | tree.c | 11 |
4 files changed, 98 insertions, 13 deletions
@@ -690,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, xmlChar *value; /* - * TODO: The html output method should not escape a & character - * occurring in an attribute value immediately followed by - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * The html output method should not escape a & character + * occurring in an attribute value immediately followed by + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * This is implemented in xmlEncodeEntitiesReentrant */ if (cur == NULL) { @@ -720,7 +721,11 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, while (IS_BLANK_CH(*tmp)) tmp++; - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); + /* + * the < and > have already been escaped at the entity level + * And doing so here breaks server side includes + */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>"); if (escaped != NULL) { xmlBufWriteQuotedString(buf->buffer, escaped); xmlFree(escaped); @@ -537,11 +537,11 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { buffer_size = new_size; \ } - /** - * xmlEncodeEntitiesReentrant: + * xmlEncodeEntitiesInternal: * @doc: the document containing the string * @input: A string to convert to XML. + * @attr: are we handling an atrbute value * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. @@ -550,8 +550,8 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { * * Returns A newly allocated string with the substitution done. */ -xmlChar * -xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { +static xmlChar * +xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; @@ -568,7 +568,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { - xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed"); + xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); return(NULL); } out = buffer; @@ -585,6 +585,27 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { + const xmlChar *end; + + /* + * Special handling of server side include in HTML attributes + */ + if (html && attr && + (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && + ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { + while (cur != end) { + *out++ = *cur++; + indx = out - buffer; + if (indx + 100 > buffer_size) { + growBufferReentrant(); + out = &buffer[indx]; + } + } + *out++ = *cur++; + *out++ = *cur++; + *out++ = *cur++; + continue; + } *out++ = '&'; *out++ = 'l'; *out++ = 't'; @@ -595,6 +616,22 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { + /* + * Special handling of &{...} construct from HTML 4, see + * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 + */ + if (html && attr && (cur[1] == '{') && (strchr(cur, '}'))) { + while (*cur != '}') { + *out++ = *cur++; + indx = out - buffer; + if (indx + 100 > buffer_size) { + growBufferReentrant(); + out = &buffer[indx]; + } + } + *out++ = *cur++; + continue; + } *out++ = '&'; *out++ = 'a'; *out++ = 'm'; @@ -627,7 +664,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, - "xmlEncodeEntitiesReentrant : input not UTF-8"); + "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); @@ -660,7 +697,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, - "xmlEncodeEntitiesReentrant : char out of range\n"); + "xmlEncodeEntities: char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); @@ -694,12 +731,45 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { return(buffer); mem_error: - xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: realloc failed"); + xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); xmlFree(buffer); return(NULL); } /** + * xmlEncodeAttributeEntities: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts for + * attribute values. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) { + return xmlEncodeEntitiesInternal(doc, input, 1); +} + +/** + * xmlEncodeEntitiesReentrant: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * Contrary to xmlEncodeEntities, this routine is reentrant, and result + * must be deallocated. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { + return xmlEncodeEntitiesInternal(doc, input, 0); +} + +/** * xmlEncodeSpecialChars: * @doc: the document containing the string * @input: A string to convert to XML. @@ -25,6 +25,7 @@ void xmlBufDumpNotationTable(xmlBufPtr buf, xmlNotationTablePtr table); void xmlBufDumpElementDecl(xmlBufPtr buf, xmlElementPtr elem); void xmlBufDumpAttributeDecl(xmlBufPtr buf, xmlAttributePtr attr); void xmlBufDumpEntityDecl(xmlBufPtr buf, xmlEntityPtr ent); +xmlChar *xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input); #endif #ifdef __cplusplus @@ -42,6 +42,7 @@ #endif #include "buf.h" +#include "save.h" int __xmlRegisterCallbacks = 0; @@ -1661,9 +1662,14 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) xmlNodePtr node = list; xmlChar *ret = NULL; xmlEntityPtr ent; + int attr; if (list == NULL) return (NULL); + if ((list->parent != NULL) && (list->parent->type == XML_ATTRIBUTE_NODE)) + attr = 1; + else + attr = 0; while (node != NULL) { if ((node->type == XML_TEXT_NODE) || @@ -1673,7 +1679,10 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) } else { xmlChar *buffer; - buffer = xmlEncodeEntitiesReentrant(doc, node->content); + if (attr) + buffer = xmlEncodeAttributeEntities(doc, node->content); + else + buffer = xmlEncodeEntitiesReentrant(doc, node->content); if (buffer != NULL) { ret = xmlStrcat(ret, buffer); xmlFree(buffer); |