aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2003-07-16 21:18:19 +0000
committerDaniel Veillard <veillard@src.gnome.org>2003-07-16 21:18:19 +0000
commit2dcb937a9ae59e9865a6957d40974db7a9033534 (patch)
treec0667a84bbb81e1624b97a4cf87652cc30387a71
parent2009c4ed4d2069b0727827a0d47133e24a6146fc (diff)
downloadandroid_external_libxml2-2dcb937a9ae59e9865a6957d40974db7a9033534.tar.gz
android_external_libxml2-2dcb937a9ae59e9865a6957d40974db7a9033534.tar.bz2
android_external_libxml2-2dcb937a9ae59e9865a6957d40974db7a9033534.zip
patch from Dodji Seketeli about UTF16 BOM when using the push XML parser.
* parserInternals.c: patch from Dodji Seketeli about UTF16 BOM when using the push XML parser. * result/utf16bom.xml result/noent/utf16bom.xml test/utf16bom.xml: added the test to the regression suite. Daniel
-rw-r--r--ChangeLog7
-rw-r--r--parserInternals.c17
-rw-r--r--result/noent/utf16bom.xmlbin0 -> 258 bytes
-rw-r--r--result/utf16bom.xmlbin0 -> 258 bytes
-rw-r--r--test/utf16bom.xmlbin0 -> 256 bytes
5 files changed, 24 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 26107dfc..8a8411fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Wed Jul 16 23:15:53 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+ * parserInternals.c: patch from Dodji Seketeli about UTF16 BOM
+ when using the push XML parser.
+ * result/utf16bom.xml result/noent/utf16bom.xml test/utf16bom.xml:
+ added the test to the regression suite.
+
Tue Jul 15 22:03:13 CEST 2003 Daniel Veillard <daniel@veillard.com>
* globals.c: add xmlThrDefMutex = NULL in xmlCleanupGlobals()
diff --git a/parserInternals.c b/parserInternals.c
index 8e57cdaa..9c71570a 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1621,6 +1621,23 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
ctxt->input->cur += 3;
}
return(0);
+ case XML_CHAR_ENCODING_UTF16LE:
+ case XML_CHAR_ENCODING_UTF16BE:
+ /*The raw input characters are encoded
+ *in UTF-16. As we expect this function
+ *to be called after xmlCharEncInFunc, we expect
+ *ctxt->input->cur to contain UTF-8 encoded characters.
+ *So the raw UTF16 Byte Order Mark
+ *has also been converted into
+ *an UTF-8 BOM. Let's skip that BOM.
+ */
+ if ((ctxt->input != NULL) &&
+ (ctxt->input->cur[0] == 0xEF) &&
+ (ctxt->input->cur[1] == 0xBB) &&
+ (ctxt->input->cur[2] == 0xBF)) {
+ ctxt->input->cur += 3;
+ }
+ break ;
default:
break;
}
diff --git a/result/noent/utf16bom.xml b/result/noent/utf16bom.xml
new file mode 100644
index 00000000..6ea296e2
--- /dev/null
+++ b/result/noent/utf16bom.xml
Binary files differ
diff --git a/result/utf16bom.xml b/result/utf16bom.xml
new file mode 100644
index 00000000..6ea296e2
--- /dev/null
+++ b/result/utf16bom.xml
Binary files differ
diff --git a/test/utf16bom.xml b/test/utf16bom.xml
new file mode 100644
index 00000000..1916dc1e
--- /dev/null
+++ b/test/utf16bom.xml
Binary files differ