aboutsummaryrefslogtreecommitdiffstats
path: root/result
diff options
context:
space:
mode:
authorDenis Pauk <pauk.denis@gmail.com>2012-05-10 15:34:57 +0800
committerDaniel Veillard <veillard@redhat.com>2012-05-10 15:34:57 +0800
commit868d92da8915fc5dc5e329d93cc7882370a28475 (patch)
tree4b39353761e2137b6adce484378b4434c223ef74 /result
parent1eabc31401b7b8c3b5273993778f37eeef37a055 (diff)
downloadandroid_external_libxml2-868d92da8915fc5dc5e329d93cc7882370a28475.tar.gz
android_external_libxml2-868d92da8915fc5dc5e329d93cc7882370a28475.tar.bz2
android_external_libxml2-868d92da8915fc5dc5e329d93cc7882370a28475.zip
Add HTML parser support for HTML5 meta charset encoding declaration
For https://bugzilla.gnome.org/show_bug.cgi?id=655218 http://www.w3.org/TR/2011/WD-html5-20110525/semantics.html#the-meta-element """ The charset attribute specifies the character encoding used by the document. This is a character encoding declaration. If the attribute is present in an XML document, its value must be an ASCII case-insensitive match for the string "UTF-8" (and the document is therefore forced to use UTF-8 as its encoding). """ However, while <meta http-equiv="Content-Type" content="text/html; charset=utf8"> works, <meta charset="utf8"> does not. While libxml2 HTML parser is not tuned for HTML5, this is a simple addition Also added a testcase
Diffstat (limited to 'result')
-rw-r--r--result/HTML/html5_enc.html7
-rw-r--r--result/HTML/html5_enc.html.err0
-rw-r--r--result/HTML/html5_enc.html.sax30
3 files changed, 37 insertions, 0 deletions
diff --git a/result/HTML/html5_enc.html b/result/HTML/html5_enc.html
new file mode 100644
index 00000000..596d54d7
--- /dev/null
+++ b/result/HTML/html5_enc.html
@@ -0,0 +1,7 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html>
+<head><meta charset="iso-8859-1"></head>
+<body>
+ <p>tr&egrave;s</p>
+</body>
+</html>
diff --git a/result/HTML/html5_enc.html.err b/result/HTML/html5_enc.html.err
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/result/HTML/html5_enc.html.err
diff --git a/result/HTML/html5_enc.html.sax b/result/HTML/html5_enc.html.sax
new file mode 100644
index 00000000..292be575
--- /dev/null
+++ b/result/HTML/html5_enc.html.sax
@@ -0,0 +1,30 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(html)
+SAX.ignorableWhitespace(
+, 1)
+SAX.startElement(head)
+SAX.ignorableWhitespace(
+, 1)
+SAX.startElement(meta, charset='iso-8859-1')
+SAX.endElement(meta)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endElement(head)
+SAX.ignorableWhitespace(
+, 1)
+SAX.startElement(body)
+SAX.characters(
+ , 3)
+SAX.startElement(p)
+SAX.characters(tr&egrave;s, 5)
+SAX.endElement(p)
+SAX.characters(
+, 1)
+SAX.endElement(body)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endElement(html)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endDocument()