aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@redhat.com>2010-01-29 20:47:08 +0100
committerDaniel Veillard <veillard@redhat.com>2010-01-29 20:47:08 +0100
commite20fb5a72c83cbfc8e4a8aa3943c6be8febadab7 (patch)
tree82aa4d72813c39a23930b860acb4ef95261f2456
parentddb01cbf61104e1f746c2224316881e4e4b2b233 (diff)
downloadandroid_external_libxml2-e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7.tar.gz
android_external_libxml2-e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7.tar.bz2
android_external_libxml2-e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7.zip
Fix xmlParseInNodeContext for HTML content
xmlParseInNodeContext notices that the enclosing document is an HTML document, so invoke the HTML parser for that fragment, and the HTML parser finding a "<p>hello world!</p>" document automatically augment it with defaulted <html> and <body>. This defaulting should be turned off in the HTML parser for this to work, but there is no such HTML parser option. There is an htmlOmittedDefaultValue global variable that you could use, but really we should not rely on global variable for processing options anymore, best is to add an HTML_PARSE_NOIMPLIED. * include/libxml/HTMLparser.h: add the HTML_PARSE_NOIMPLIED parser flag * HTMLparser.c: do add implied element if HTML_PARSE_NOIMPLIED is set * parser.c: add HTML_PARSE_NOIMPLIED to options for xmlParseInNodeContext on HTML documents
-rw-r--r--HTMLparser.c2
-rw-r--r--include/libxml/HTMLparser.h1
-rw-r--r--parser.c8
3 files changed, 10 insertions, 1 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index 9e275a29..3d4831ce 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -1394,6 +1394,8 @@ static void
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
int i;
+ if (ctxt->options & HTML_PARSE_NOIMPLIED)
+ return;
if (!htmlOmittedDefaultValue)
return;
if (xmlStrEqual(newtag, BAD_CAST"html"))
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 05905e4b..cde0ac6d 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -182,6 +182,7 @@ typedef enum {
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
HTML_PARSE_NONET = 1<<11,/* Forbid network access */
+ HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
} htmlParserOption;
diff --git a/parser.c b/parser.c
index c779c1d0..a63c6682 100644
--- a/parser.c
+++ b/parser.c
@@ -12870,8 +12870,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
if (doc->type == XML_DOCUMENT_NODE)
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
#ifdef LIBXML_HTML_ENABLED
- else if (doc->type == XML_HTML_DOCUMENT_NODE)
+ else if (doc->type == XML_HTML_DOCUMENT_NODE) {
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
+ /*
+ * When parsing in context, it makes no sense to add implied
+ * elements like html/body/etc...
+ */
+ options |= HTML_PARSE_NOIMPLIED;
+ }
#endif
else
return(XML_ERR_INTERNAL_ERROR);