diff options
author | Daniel Veillard <veillard@redhat.com> | 2012-09-05 11:45:32 +0800 |
---|---|---|
committer | Daniel Veillard <veillard@redhat.com> | 2012-09-05 12:11:43 +0800 |
commit | 7d4c529a334845621e2f805c8ed0e154b3350cec (patch) | |
tree | 541645b371f66a4e28a9da123c70e53f4dfe7299 /entities.c | |
parent | 857104cd494765daf0749ae4f7dd49563811b669 (diff) | |
download | android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.tar.gz android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.tar.bz2 android_external_libxml2-7d4c529a334845621e2f805c8ed0e154b3350cec.zip |
Improve HTML escaping of attribute on output
Handle special cases of &{...} constructs as hinted in the spec
http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
and special values as comment <!-- ... --> used for server side includes
This is limited to attribute values in HTML content.
Diffstat (limited to 'entities.c')
-rw-r--r-- | entities.c | 86 |
1 files changed, 78 insertions, 8 deletions
@@ -537,11 +537,11 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { buffer_size = new_size; \ } - /** - * xmlEncodeEntitiesReentrant: + * xmlEncodeEntitiesInternal: * @doc: the document containing the string * @input: A string to convert to XML. + * @attr: are we handling an atrbute value * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. @@ -550,8 +550,8 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { * * Returns A newly allocated string with the substitution done. */ -xmlChar * -xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { +static xmlChar * +xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; @@ -568,7 +568,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { - xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed"); + xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); return(NULL); } out = buffer; @@ -585,6 +585,27 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { + const xmlChar *end; + + /* + * Special handling of server side include in HTML attributes + */ + if (html && attr && + (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && + ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { + while (cur != end) { + *out++ = *cur++; + indx = out - buffer; + if (indx + 100 > buffer_size) { + growBufferReentrant(); + out = &buffer[indx]; + } + } + *out++ = *cur++; + *out++ = *cur++; + *out++ = *cur++; + continue; + } *out++ = '&'; *out++ = 'l'; *out++ = 't'; @@ -595,6 +616,22 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { + /* + * Special handling of &{...} construct from HTML 4, see + * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 + */ + if (html && attr && (cur[1] == '{') && (strchr(cur, '}'))) { + while (*cur != '}') { + *out++ = *cur++; + indx = out - buffer; + if (indx + 100 > buffer_size) { + growBufferReentrant(); + out = &buffer[indx]; + } + } + *out++ = *cur++; + continue; + } *out++ = '&'; *out++ = 'a'; *out++ = 'm'; @@ -627,7 +664,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, - "xmlEncodeEntitiesReentrant : input not UTF-8"); + "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); @@ -660,7 +697,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, - "xmlEncodeEntitiesReentrant : char out of range\n"); + "xmlEncodeEntities: char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); @@ -694,12 +731,45 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { return(buffer); mem_error: - xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: realloc failed"); + xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); xmlFree(buffer); return(NULL); } /** + * xmlEncodeAttributeEntities: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts for + * attribute values. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) { + return xmlEncodeEntitiesInternal(doc, input, 1); +} + +/** + * xmlEncodeEntitiesReentrant: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * Contrary to xmlEncodeEntities, this routine is reentrant, and result + * must be deallocated. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { + return xmlEncodeEntitiesInternal(doc, input, 0); +} + +/** * xmlEncodeSpecialChars: * @doc: the document containing the string * @input: A string to convert to XML. |