aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2011-08-07 04:54:14 +0200
committerBruno Haible <bruno@clisp.org>2011-08-07 04:54:28 +0200
commitac379c197217b32954792294269d3ec4fb4c48db (patch)
tree4b6e2fd56dd156d40a6473dd7f46cdcfaee25484
parent33b05e19ce93d8c9353939f40d843ba08b4e1564 (diff)
downloadandroid-libiconv-ac379c197217b32954792294269d3ec4fb4c48db.tar.gz
android-libiconv-ac379c197217b32954792294269d3ec4fb4c48db.tar.bz2
android-libiconv-ac379c197217b32954792294269d3ec4fb4c48db.zip
Upgrade the GB18030 converter to the version from 2005.
-rw-r--r--ChangeLog17
-rw-r--r--NEWS3
-rw-r--r--lib/gb18030ext.h66
-rw-r--r--tests/GB18030-BMP.TXT50
-rw-r--r--tests/GB18030.IRREVERSIBLE.TXT50
5 files changed, 142 insertions, 44 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f07076..c6cdcec 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2011-08-06 Bruno Haible <bruno@clisp.org>
+ Upgrade the GB18030 converter to the version from 2005.
+ * lib/gb18030ext.h (gb18030ext_2uni_pagefe): Change element type to
+ 'unsigned int'. Change values for 0xFE51..0xFE53, 0xFE59, 0xFE61,
+ 0xFE66, 0xFE67, 0xFE6C, 0xFE6D, 0xFE76, 0xFE7E, 0xFE90, 0xFE91, 0xFEA0.
+ (gb18030ext_mbtowc): Change type of wc to 'unsigned int'. Change values
+ for 0xA6D9..0xA6DF, 0xA6EC..0xA6ED, 0xA6F3, 0xA8BC.
+ (gb18030ext_page9f, gb18030ext_pagefe): New constant arrays.
+ (gb18030ext_wctomb): Change values for U+1E3F, U+9FB4..U+9FBB,
+ U+FE10..U+FE19, U+20087, U+20089, U+200CC, U+215D7, U+2298F, U+241FE.
+ * tests/GB18030-BMP.TXT: Change values for 0xA6D9..0xA6DF,
+ 0xA6EC..0xA6ED, 0xA6F3, 0xA8BC, 0xFE51..0xFE53, 0xFE59, 0xFE61, 0xFE66,
+ 0xFE67, 0xFE6C, 0xFE6D, 0xFE76, 0xFE7E, 0xFE90, 0xFE91, 0xFEA0, to map
+ to now-assigned Unicode codepoints.
+ * tests/GB18030.IRREVERSIBLE.TXT: New file.
+
+2011-08-06 Bruno Haible <bruno@clisp.org>
+
Fix conversion bug in CP1258 converter.
* lib/vietcomb.h (viet_comp_table_data): Remove entry for
U+00A5 U+0301. Fix entry for U+00A8 U+0301.
diff --git a/NEWS b/NEWS
index c0f5573..3fc8dda 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
* The 'iconv' program now produces its output as soon as it can. It no longer
unnecessarily waits for more input.
+* Updated the GB18030 converter to map 25 characters to code points that have
+ been to Unicode since 2000, rather than to code points in the Private Use
+ Area.
* Updated the BIG5-HKSCS converter. The old BIG5-HKSCS converter is renamed to
BIG5-HKSCS:2004. A new converter BIG5-HKSCS:2008 is added. BIG5-HKSCS is now
an alias for BIG5-HKSCS:2008.
diff --git a/lib/gb18030ext.h b/lib/gb18030ext.h
index 14b0e45..5e59419 100644
--- a/lib/gb18030ext.h
+++ b/lib/gb18030ext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2005, 2011 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -27,20 +27,20 @@ static const unsigned short gb18030ext_2uni_pagea9[13] = {
0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6,
0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb,
};
-static const unsigned short gb18030ext_2uni_pagefe[96] = {
+static const unsigned int gb18030ext_2uni_pagefe[96] = {
/* 0xfe */
- 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
- 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
- 0x2e81, 0xe816, 0xe817, 0xe818, 0x2e84, 0x3473, 0x3447, 0x2e88,
- 0x2e8b, 0xe81e, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e,
- 0x3918, 0xe826, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0xe82b, 0xe82c,
- 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0xe831, 0xe832, 0x2eaa, 0x4056,
- 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0xe83b, 0x43b1,
- 0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0xe843, 0x4723,
- 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982,
- 0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0xe854,
- 0xe855, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13,
- 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae, 0xe864,
+ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
+ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
+ 0x2e81, 0x20087, 0x20089, 0x200cc, 0x2e84, 0x3473, 0x3447, 0x2e88,
+ 0x2e8b, 0x9fb4, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e,
+ 0x3918, 0x9fb5, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0x9fb6, 0x9fb7,
+ 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0x215d7, 0x9fb8, 0x2eaa, 0x4056,
+ 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0x2298f, 0x43b1,
+ 0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0x9fb9, 0x4723,
+ 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982,
+ 0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0x9fba,
+ 0x241fe, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13,
+ 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae, 0x9fbb,
};
static int
@@ -52,7 +52,7 @@ gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
unsigned char c2 = s[1];
if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff)) {
unsigned int i = 190 * (c1 - 0x81) + (c2 - (c2 >= 0x80 ? 0x41 : 0x40));
- unsigned short wc = 0xfffd;
+ unsigned int wc = 0xfffd;
switch (c1) {
case 0xa2:
if (i >= 6376 && i <= 6381) /* 0xA2AB..0xA2B0 */
@@ -77,12 +77,14 @@ gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
case 0xa6:
if (i >= 7150 && i <= 7157) /* 0xA6B9..0xA6C0 */
wc = 0xe785 + (i - 7150);
+ else if (i >= 7183 && i <= 7184) /* 0xA6DA..0xA6DB */
+ wc = 0xfe12 - (i - 7183);
else if (i >= 7182 && i <= 7190) /* 0xA6D9..0xA6DF */
- wc = 0xe78d + (i - 7182);
+ wc = 0xfe10 + (i - 7182);
else if (i >= 7201 && i <= 7202) /* 0xA6EC..0xA6ED */
- wc = 0xe794 + (i - 7201);
+ wc = 0xfe17 + (i - 7201);
else if (i == 7208) /* 0xA6F3 */
- wc = 0xe796;
+ wc = 0xfe19;
else if (i >= 7211 && i <= 7219) /* 0xA6F6..0xA6FE */
wc = 0xe797 + (i - 7211);
break;
@@ -96,7 +98,7 @@ gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (i >= 7495 && i <= 7505) /* 0xA896..0xA8A0 */
wc = 0xe7bc + (i - 7495);
else if (i == 7533) /* 0xA8BC */
- wc = 0xe7c7;
+ wc = 0x1e3f;
else if (i == 7536) /* 0xA8BF */
wc = 0x01f9;
else if (i >= 7538 && i <= 7541) /* 0xA8C1..0xA8C4 */
@@ -230,6 +232,14 @@ static const unsigned short gb18030ext_page4d[16] = {
0x0000, 0x0000, 0x0000, 0xfe98, 0xfe99, 0xfe9a, 0xfe9b, 0xfe9c, /*0x10-0x17*/
0xfe9d, 0xfe9e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
};
+static const unsigned short gb18030ext_page9f[16] = {
+ 0x0000, 0x0000, 0x0000, 0x0000, 0xfe59, 0xfe61, 0xfe66, 0xfe67, /*0xb0-0xb7*/
+ 0xfe6d, 0xfe7e, 0xfe90, 0xfea0, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/
+};
+static const unsigned short gb18030ext_pagefe[16] = {
+ 0xa6d9, 0xa6db, 0xa6da, 0xa6dc, 0xa6dd, 0xa6de, 0xa6df, 0xa6ec, /*0x10-0x17*/
+ 0xa6ed, 0xa6f3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
+};
static int
gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
@@ -238,6 +248,8 @@ gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
unsigned short c = 0;
if (wc == 0x01f9)
c = 0xa8bf;
+ else if (wc == 0x1e3f)
+ c = 0xa8bc;
else if (wc == 0x20ac)
c = 0xa2e3;
else if (wc >= 0x2e80 && wc < 0x2ed0)
@@ -290,6 +302,22 @@ gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
c = gb18030ext_page4d[wc-0x4d10];
else if (wc == 0x4dae)
c = 0xfe9f;
+ else if (wc >= 0x9fb4 && wc < 0x9fbc)
+ c = gb18030ext_page9f[wc-0x9fb0];
+ else if (wc >= 0xfe10 && wc < 0xfe1a)
+ c = gb18030ext_pagefe[wc-0xfe10];
+ else if (wc == 0x20087)
+ c = 0xfe51;
+ else if (wc == 0x20089)
+ c = 0xfe52;
+ else if (wc == 0x200cc)
+ c = 0xfe53;
+ else if (wc == 0x215d7)
+ c = 0xfe6c;
+ else if (wc == 0x2298f)
+ c = 0xfe76;
+ else if (wc == 0x241fe)
+ c = 0xfe91;
if (c != 0) {
r[0] = (c >> 8); r[1] = (c & 0xff);
return 2;
diff --git a/tests/GB18030-BMP.TXT b/tests/GB18030-BMP.TXT
index 4767d3e..0006e4b 100644
--- a/tests/GB18030-BMP.TXT
+++ b/tests/GB18030-BMP.TXT
@@ -46728,13 +46728,13 @@
0xA6D6 0x03C7
0xA6D7 0x03C8
0xA6D8 0x03C9
-0xA6D9 0xE78D
-0xA6DA 0xE78E
-0xA6DB 0xE78F
-0xA6DC 0xE790
-0xA6DD 0xE791
-0xA6DE 0xE792
-0xA6DF 0xE793
+0xA6D9 0xFE10
+0xA6DA 0xFE12
+0xA6DB 0xFE11
+0xA6DC 0xFE13
+0xA6DD 0xFE14
+0xA6DE 0xFE15
+0xA6DF 0xFE16
0xA6E0 0xFE35
0xA6E1 0xFE36
0xA6E2 0xFE39
@@ -46747,14 +46747,14 @@
0xA6E9 0xFE42
0xA6EA 0xFE43
0xA6EB 0xFE44
-0xA6EC 0xE794
-0xA6ED 0xE795
+0xA6EC 0xFE17
+0xA6ED 0xFE18
0xA6EE 0xFE3B
0xA6EF 0xFE3C
0xA6F0 0xFE37
0xA6F1 0xFE38
0xA6F2 0xFE31
-0xA6F3 0xE796
+0xA6F3 0xFE19
0xA6F4 0xFE33
0xA6F5 0xFE34
0xA6F6 0xE797
@@ -47079,7 +47079,7 @@
0xA8B9 0x00FC
0xA8BA 0x00EA
0xA8BB 0x0251
-0xA8BC 0xE7C7
+0xA8BC 0x1E3F
0xA8BD 0x0144
0xA8BE 0x0148
0xA8BF 0x01F9
@@ -63313,15 +63313,15 @@
0xFE4E 0xFA28
0xFE4F 0xFA29
0xFE50 0x2E81
-0xFE51 0xE816
-0xFE52 0xE817
-0xFE53 0xE818
+0xFE51 0x20087
+0xFE52 0x20089
+0xFE53 0x200CC
0xFE54 0x2E84
0xFE55 0x3473
0xFE56 0x3447
0xFE57 0x2E88
0xFE58 0x2E8B
-0xFE59 0xE81E
+0xFE59 0x9FB4
0xFE5A 0x359E
0xFE5B 0x361A
0xFE5C 0x360E
@@ -63329,19 +63329,19 @@
0xFE5E 0x2E97
0xFE5F 0x396E
0xFE60 0x3918
-0xFE61 0xE826
+0xFE61 0x9FB5
0xFE62 0x39CF
0xFE63 0x39DF
0xFE64 0x3A73
0xFE65 0x39D0
-0xFE66 0xE82B
-0xFE67 0xE82C
+0xFE66 0x9FB6
+0xFE67 0x9FB7
0xFE68 0x3B4E
0xFE69 0x3C6E
0xFE6A 0x3CE0
0xFE6B 0x2EA7
-0xFE6C 0xE831
-0xFE6D 0xE832
+0xFE6C 0x215D7
+0xFE6D 0x9FB8
0xFE6E 0x2EAA
0xFE6F 0x4056
0xFE70 0x415F
@@ -63350,7 +63350,7 @@
0xFE73 0x2EB3
0xFE74 0x2EB6
0xFE75 0x2EB7
-0xFE76 0xE83B
+0xFE76 0x2298F
0xFE77 0x43B1
0xFE78 0x43AC
0xFE79 0x2EBB
@@ -63358,7 +63358,7 @@
0xFE7B 0x44D6
0xFE7C 0x4661
0xFE7D 0x464C
-0xFE7E 0xE843
+0xFE7E 0x9FB9
0xFE80 0x4723
0xFE81 0x4729
0xFE82 0x477C
@@ -63375,8 +63375,8 @@
0xFE8D 0x499B
0xFE8E 0x49B7
0xFE8F 0x49B6
-0xFE90 0xE854
-0xFE91 0xE855
+0xFE90 0x9FBA
+0xFE91 0x241FE
0xFE92 0x4CA3
0xFE93 0x4C9F
0xFE94 0x4CA0
@@ -63391,7 +63391,7 @@
0xFE9D 0x4D18
0xFE9E 0x4D19
0xFE9F 0x4DAE
-0xFEA0 0xE864
+0xFEA0 0x9FBB
0xFEA1 0xE468
0xFEA2 0xE469
0xFEA3 0xE46A
diff --git a/tests/GB18030.IRREVERSIBLE.TXT b/tests/GB18030.IRREVERSIBLE.TXT
new file mode 100644
index 0000000..1dd1904
--- /dev/null
+++ b/tests/GB18030.IRREVERSIBLE.TXT
@@ -0,0 +1,50 @@
+0x8135F437 0x1E3F
+0x82359037 0x9FB4
+0x82359038 0x9FB5
+0x82359039 0x9FB6
+0x82359130 0x9FB7
+0x82359131 0x9FB8
+0x82359132 0x9FB9
+0x82359133 0x9FBA
+0x82359134 0x9FBB
+0x84318236 0xFE10
+0x84318237 0xFE11
+0x84318238 0xFE12
+0x84318239 0xFE13
+0x84318330 0xFE14
+0x84318331 0xFE15
+0x84318332 0xFE16
+0x84318333 0xFE17
+0x84318334 0xFE18
+0x84318335 0xFE19
+0x95329031 0x20087
+0x95329033 0x20089
+0x95329730 0x200CC
+0x9536B937 0x215D7
+0x9630BA35 0x2298F
+0x9635B630 0x241FE
+0xA6D9 0xE78D
+0xA6DA 0xE78E
+0xA6DB 0xE78F
+0xA6DC 0xE790
+0xA6DD 0xE791
+0xA6DE 0xE792
+0xA6DF 0xE793
+0xA6EC 0xE794
+0xA6ED 0xE795
+0xA6F3 0xE796
+0xA8BC 0xE7C7
+0xFE51 0xE816
+0xFE52 0xE817
+0xFE53 0xE818
+0xFE59 0xE81E
+0xFE61 0xE826
+0xFE66 0xE82B
+0xFE67 0xE82C
+0xFE6C 0xE831
+0xFE6D 0xE832
+0xFE76 0xE83B
+0xFE7E 0xE843
+0xFE90 0xE854
+0xFE91 0xE855
+0xFEA0 0xE864