diff options
| author | Daniel Veillard <veillard@src.gnome.org> | 2003-07-16 21:18:19 +0000 |
|---|---|---|
| committer | Daniel Veillard <veillard@src.gnome.org> | 2003-07-16 21:18:19 +0000 |
| commit | 2dcb937a9ae59e9865a6957d40974db7a9033534 (patch) | |
| tree | c0667a84bbb81e1624b97a4cf87652cc30387a71 | |
| parent | 2009c4ed4d2069b0727827a0d47133e24a6146fc (diff) | |
| download | android_external_libxml2-2dcb937a9ae59e9865a6957d40974db7a9033534.tar.gz android_external_libxml2-2dcb937a9ae59e9865a6957d40974db7a9033534.tar.bz2 android_external_libxml2-2dcb937a9ae59e9865a6957d40974db7a9033534.zip | |
patch from Dodji Seketeli about UTF16 BOM when using the push XML parser.
* parserInternals.c: patch from Dodji Seketeli about UTF16 BOM
when using the push XML parser.
* result/utf16bom.xml result/noent/utf16bom.xml test/utf16bom.xml:
added the test to the regression suite.
Daniel
| -rw-r--r-- | ChangeLog | 7 | ||||
| -rw-r--r-- | parserInternals.c | 17 | ||||
| -rw-r--r-- | result/noent/utf16bom.xml | bin | 0 -> 258 bytes | |||
| -rw-r--r-- | result/utf16bom.xml | bin | 0 -> 258 bytes | |||
| -rw-r--r-- | test/utf16bom.xml | bin | 0 -> 256 bytes |
5 files changed, 24 insertions, 0 deletions
@@ -1,3 +1,10 @@ +Wed Jul 16 23:15:53 CEST 2003 Daniel Veillard <daniel@veillard.com> + + * parserInternals.c: patch from Dodji Seketeli about UTF16 BOM + when using the push XML parser. + * result/utf16bom.xml result/noent/utf16bom.xml test/utf16bom.xml: + added the test to the regression suite. + Tue Jul 15 22:03:13 CEST 2003 Daniel Veillard <daniel@veillard.com> * globals.c: add xmlThrDefMutex = NULL in xmlCleanupGlobals() diff --git a/parserInternals.c b/parserInternals.c index 8e57cdaa..9c71570a 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1621,6 +1621,23 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) ctxt->input->cur += 3; } return(0); + case XML_CHAR_ENCODING_UTF16LE: + case XML_CHAR_ENCODING_UTF16BE: + /*The raw input characters are encoded + *in UTF-16. As we expect this function + *to be called after xmlCharEncInFunc, we expect + *ctxt->input->cur to contain UTF-8 encoded characters. + *So the raw UTF16 Byte Order Mark + *has also been converted into + *an UTF-8 BOM. Let's skip that BOM. + */ + if ((ctxt->input != NULL) && + (ctxt->input->cur[0] == 0xEF) && + (ctxt->input->cur[1] == 0xBB) && + (ctxt->input->cur[2] == 0xBF)) { + ctxt->input->cur += 3; + } + break ; default: break; } diff --git a/result/noent/utf16bom.xml b/result/noent/utf16bom.xml Binary files differnew file mode 100644 index 00000000..6ea296e2 --- /dev/null +++ b/result/noent/utf16bom.xml diff --git a/result/utf16bom.xml b/result/utf16bom.xml Binary files differnew file mode 100644 index 00000000..6ea296e2 --- /dev/null +++ b/result/utf16bom.xml diff --git a/test/utf16bom.xml b/test/utf16bom.xml Binary files differnew file mode 100644 index 00000000..1916dc1e --- /dev/null +++ b/test/utf16bom.xml |
