aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2008-09-07 23:28:41 +0000
committerBruno Haible <bruno@clisp.org>2009-06-21 13:06:23 +0200
commitbb8f7987b00e303f690b59eb62c8eede974c2dd1 (patch)
treec10e9b65ad8e7df7c2b7413ba4337700be801f3f
parent93ddeebe1c3e13c18049eaadcfce1b6c2e16b8f4 (diff)
downloadandroid-libiconv-bb8f7987b00e303f690b59eb62c8eede974c2dd1.tar.gz
android-libiconv-bb8f7987b00e303f690b59eb62c8eede974c2dd1.tar.bz2
android-libiconv-bb8f7987b00e303f690b59eb62c8eede974c2dd1.zip
More consistent behaviour when invalid input is preceded by a shift sequence.
-rw-r--r--ChangeLog34
-rw-r--r--NEWS4
-rw-r--r--lib/converters.h12
-rw-r--r--lib/hz.h10
-rw-r--r--lib/iso2022_cn.h27
-rw-r--r--lib/iso2022_cnext.h49
-rw-r--r--lib/iso2022_jp.h24
-rw-r--r--lib/iso2022_jp1.h28
-rw-r--r--lib/iso2022_jp2.h55
-rw-r--r--lib/iso2022_jp3.h30
-rw-r--r--lib/iso2022_kr.h19
-rw-r--r--lib/loop_unicode.h25
-rw-r--r--lib/ucs2.h5
-rw-r--r--lib/ucs4.h8
-rw-r--r--lib/utf16.h10
-rw-r--r--lib/utf16be.h9
-rw-r--r--lib/utf16le.h9
-rw-r--r--lib/utf32.h8
-rw-r--r--lib/utf7.h14
-rw-r--r--tests/Makefile.in12
-rw-r--r--tests/test-shiftseq.c119
21 files changed, 371 insertions, 140 deletions
diff --git a/ChangeLog b/ChangeLog
index e292717..aea1bde 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,39 @@
2008-09-07 Bruno Haible <bruno@clisp.org>
+ Make behaviour when encountering invalid input after a shift sequence
+ more consistent.
+ * lib/converters.h (RET_SHIFT_ILSEQ): New macro.
+ (RET_ILSEQ): Define in terms of RET_SHIFT_ILSEQ.
+ (RET_TOOFEW): Change to avoid collisions with RET_SHIFT_ILSEQ.
+ (DECODE_SHIFT_ILSEQ, DECODE_TOOFEW): New macros.
+ * lib/loop_unicode.h (unicode_loop_convert): Take into account the
+ shift count contained in the negative return values.
+ * lib/hz.h (hz_mbtowc): When encountering invalid input, store the
+ modified state and return RET_SHIFT_ILSEQ.
+ * lib/iso2022_cn.h (iso2022_cn_mbtowc): Likewise.
+ * lib/iso2022_cnext.h (iso2022_cn_ext_mbtowc): Likewise.
+ * lib/iso2022_jp.h (iso2022_jp_mbtowc): Likewise.
+ * lib/iso2022_jp1.h (iso2022_jp1_mbtowc): Likewise.
+ * lib/iso2022_jp2.h (iso2022_jp2_mbtowc): Likewise.
+ * lib/iso2022_jp3.h (iso2022_jp3_mbtowc): Likewise.
+ * lib/iso2022_kr.h (iso2022_kr_mbtowc): Likewise.
+ * lib/ucs2.h (ucs2_mbtowc): Likewise.
+ * lib/ucs4.h (ucs4_mbtowc): Likewise.
+ * lib/utf16.h (utf16_mbtowc): Likewise.
+ * lib/utf32.h (utf32_mbtowc): Likewise.
+ * lib/utf7.h (utf7_mbtowc): Likewise.
+ * lib/utf16be.h (utf16be_mbtowcutf16be_mbtowc): When encountering
+ invalid input, return RET_SHIFT_ILSEQ.
+ * lib/utf16le.h (utf16le_mbtowc): Likewise.
+ * tests/test-shiftseq.c: New file.
+ * tests/Makefile.in (check): Run test-shiftseq.
+ (test-shiftseq, test-shiftseq.@OBJEXT@): New rules.
+ (clean): Remove test-shiftseq executable.
+ Reported by Roman Rybalko <roman_rybalko@users.sourceforge.net>
+ at <http://savannah.gnu.org/bugs/?24216>.
+
+2008-09-07 Bruno Haible <bruno@clisp.org>
+
* man/iconv.3: Clarify the processing of shift-sequences.
2008-09-07 Bruno Haible <bruno@clisp.org>
diff --git a/NEWS b/NEWS
index 997af69..3aa2cfc 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ New in 1.13:
* The library and the iconv program now understand platform dependent aliases,
for better compatibility with the platform's own iconv_open function.
Examples: "646" on Solaris, "iso88591" on HP-UX, "IBM-1252" on AIX.
+* For stateful encodings, when the input ends with a shift sequence followed
+ by invalid input, the iconv function now increments the input pointer past
+ the shift sequence before returning (size_t)(-1) with errno = EILSEQ. This
+ is also like GNU libc's iconv() behaves.
New in 1.12:
* The iconv program is now licensed under the GPL version 3, instead of the
diff --git a/lib/converters.h b/lib/converters.h
index 7bc032b..4d93230 100644
--- a/lib/converters.h
+++ b/lib/converters.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2002, 2004-2007 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2002, 2004-2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -51,10 +51,16 @@ struct mbtowc_funcs {
*/
};
+/* Return code if invalid input after a shift sequence of n bytes was read.
+ (xxx_mbtowc) */
+#define RET_SHIFT_ILSEQ(n) (-1-2*(n))
/* Return code if invalid. (xxx_mbtowc) */
-#define RET_ILSEQ -1
+#define RET_ILSEQ RET_SHIFT_ILSEQ(0)
/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
-#define RET_TOOFEW(n) (-2-(n))
+#define RET_TOOFEW(n) (-2-2*(n))
+/* Retrieve the n from the encoded RET_... value. */
+#define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
+#define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
/*
* Data type for conversion unicode -> multibyte
diff --git a/lib/hz.h b/lib/hz.h
index ed47485..db0b4b1 100644
--- a/lib/hz.h
+++ b/lib/hz.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -67,7 +67,7 @@ hz_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
continue;
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
break;
}
@@ -81,7 +81,7 @@ hz_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
ret = gb2312_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
conv->istate = state;
return count+2;
@@ -90,6 +90,10 @@ hz_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
none:
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/iso2022_cn.h b/lib/iso2022_cn.h
index c41cfa4..d7e3e39 100644
--- a/lib/iso2022_cn.h
+++ b/lib/iso2022_cn.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -92,26 +92,26 @@ iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (s[1] == 'N') {
switch (state3) {
case STATE3_NONE:
- return RET_ILSEQ;
+ goto ilseq;
case STATE3_DESIGNATED_CNS11643_2:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (c == SO) {
if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
- return RET_ILSEQ;
+ goto ilseq;
state1 = STATE_TWOBYTE;
s++; count++;
if (n < count+1)
@@ -132,7 +132,7 @@ iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
if (*pwc == 0x000a || *pwc == 0x000d) {
state2 = STATE2_NONE; state3 = STATE3_NONE;
@@ -141,7 +141,7 @@ iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_TWOBYTE:
if (n < count+2)
goto none;
@@ -149,7 +149,7 @@ iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
int ret;
switch (state2) {
case STATE2_NONE:
- return RET_ILSEQ;
+ goto ilseq;
case STATE2_DESIGNATED_GB2312:
ret = gb2312_mbtowc(conv,pwc,s,2); break;
case STATE2_DESIGNATED_CNS11643_1:
@@ -157,13 +157,13 @@ iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
default: abort();
}
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
@@ -171,6 +171,11 @@ none:
COMBINE_STATE;
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ COMBINE_STATE;
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/iso2022_cnext.h b/lib/iso2022_cnext.h
index 276d820..f848870 100644
--- a/lib/iso2022_cnext.h
+++ b/lib/iso2022_cnext.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -146,88 +146,88 @@ iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (s[1] == 'N') {
switch (state3) {
case STATE3_NONE:
- return RET_ILSEQ;
+ goto ilseq;
case STATE3_DESIGNATED_CNS11643_2:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
}
if (s[1] == 'O') {
switch (state4) {
case STATE4_NONE:
- return RET_ILSEQ;
+ goto ilseq;
case STATE4_DESIGNATED_CNS11643_3:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE4_DESIGNATED_CNS11643_4:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE4_DESIGNATED_CNS11643_5:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE4_DESIGNATED_CNS11643_6:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE4_DESIGNATED_CNS11643_7:
if (s[2] < 0x80 && s[3] < 0x80) {
int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+4;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (c == SO) {
if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
- return RET_ILSEQ;
+ goto ilseq;
state1 = STATE_TWOBYTE;
s++; count++;
if (n < count+1)
@@ -248,7 +248,7 @@ iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
if (*pwc == 0x000a || *pwc == 0x000d) {
state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
@@ -257,7 +257,7 @@ iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_TWOBYTE:
if (n < count+2)
goto none;
@@ -265,7 +265,7 @@ iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
int ret;
switch (state2) {
case STATE2_NONE:
- return RET_ILSEQ;
+ goto ilseq;
case STATE2_DESIGNATED_GB2312:
ret = gb2312_mbtowc(conv,pwc,s,2); break;
case STATE2_DESIGNATED_CNS11643_1:
@@ -275,13 +275,13 @@ iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
default: abort();
}
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
@@ -289,6 +289,11 @@ none:
COMBINE_STATE;
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ COMBINE_STATE;
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/iso2022_jp.h b/lib/iso2022_jp.h
index 46aa8c8..1c8abec 100644
--- a/lib/iso2022_jp.h
+++ b/lib/iso2022_jp.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -59,7 +59,7 @@ iso2022_jp_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (s[1] == '$') {
if (s[2] == '@' || s[2] == 'B') {
@@ -70,9 +70,9 @@ iso2022_jp_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
- return RET_ILSEQ;
+ goto ilseq;
}
break;
}
@@ -81,40 +81,44 @@ iso2022_jp_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0201ROMAN:
if (c < 0x80) {
int ret = jisx0201_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0208:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = jisx0208_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
none:
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/iso2022_jp1.h b/lib/iso2022_jp1.h
index a43a105..c309474 100644
--- a/lib/iso2022_jp1.h
+++ b/lib/iso2022_jp1.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -60,7 +60,7 @@ iso2022_jp1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (s[1] == '$') {
if (s[2] == '@' || s[2] == 'B') {
@@ -82,9 +82,9 @@ iso2022_jp1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
continue;
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
- return RET_ILSEQ;
+ goto ilseq;
}
break;
}
@@ -93,52 +93,56 @@ iso2022_jp1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0201ROMAN:
if (c < 0x80) {
int ret = jisx0201_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0208:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = jisx0208_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0212:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = jisx0212_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
none:
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/iso2022_jp2.h b/lib/iso2022_jp2.h
index 4fcf439..5e3ca41 100644
--- a/lib/iso2022_jp2.h
+++ b/lib/iso2022_jp2.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -84,7 +84,7 @@ iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (s[1] == '$') {
if (s[2] == '@' || s[2] == 'B') {
@@ -119,9 +119,9 @@ iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (s[1] == '.') {
if (n < count+3)
@@ -140,40 +140,40 @@ iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (s[1] == 'N') {
switch (state2) {
case STATE_G2_NONE:
- return RET_ILSEQ;
+ goto ilseq;
case STATE_G2_ISO8859_1:
if (s[2] < 0x80) {
unsigned char buf = s[2]+0x80;
int ret = iso8859_1_mbtowc(conv,pwc,&buf,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
COMBINE_STATE;
conv->istate = state;
return count+3;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_G2_ISO8859_7:
if (s[2] < 0x80) {
unsigned char buf = s[2]+0x80;
int ret = iso8859_7_mbtowc(conv,pwc,&buf,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
COMBINE_STATE;
conv->istate = state;
return count+3;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
break;
}
@@ -182,7 +182,7 @@ iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
if (*pwc == 0x000a || *pwc == 0x000d)
state2 = STATE_G2_NONE;
@@ -190,12 +190,12 @@ iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0201ROMAN:
if (c < 0x80) {
int ret = jisx0201_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
if (*pwc == 0x000a || *pwc == 0x000d)
state2 = STATE_G2_NONE;
@@ -203,71 +203,71 @@ iso2022_jp2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0201KATAKANA:
if (c < 0x80) {
unsigned char buf = c+0x80;
int ret = jisx0201_mbtowc(conv,pwc,&buf,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
COMBINE_STATE;
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0208:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = jisx0208_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0212:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = jisx0212_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_GB2312:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = gb2312_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_KSC5601:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = ksc5601_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
@@ -275,6 +275,11 @@ none:
COMBINE_STATE;
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ COMBINE_STATE;
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
#undef COMBINE_STATE
diff --git a/lib/iso2022_jp3.h b/lib/iso2022_jp3.h
index 75ef4f9..58cea1b 100644
--- a/lib/iso2022_jp3.h
+++ b/lib/iso2022_jp3.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2004 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2004, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -81,7 +81,7 @@ iso2022_jp3_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
goto none;
continue;
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (s[1] == '$') {
if (s[2] == '@' || s[2] == 'B') {
@@ -110,9 +110,9 @@ iso2022_jp3_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
continue;
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
- return RET_ILSEQ;
+ goto ilseq;
}
break;
}
@@ -121,45 +121,45 @@ iso2022_jp3_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0201ROMAN:
if (c < 0x80) {
int ret = jisx0201_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0201KATAKANA:
if (c < 0x80) {
unsigned char buf = c+0x80;
int ret = jisx0201_mbtowc(conv,pwc,&buf,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX0208:
if (n < count+2)
goto none;
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = jisx0208_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_JISX02131:
case STATE_JISX02132:
if (n < count+2)
@@ -182,12 +182,16 @@ iso2022_jp3_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
return count+2;
}
}
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
none:
conv->istate = state;
return RET_TOOFEW(count);
+
+ ilseq:
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
}
diff --git a/lib/iso2022_kr.h b/lib/iso2022_kr.h
index d610b8c..8045d4e 100644
--- a/lib/iso2022_kr.h
+++ b/lib/iso2022_kr.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -72,11 +72,11 @@ iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
}
}
}
- return RET_ILSEQ;
+ goto ilseq;
}
if (c == SO) {
if (state2 != STATE2_DESIGNATED_KSC5601)
- return RET_ILSEQ;
+ goto ilseq;
state1 = STATE_TWOBYTE;
s++; count++;
if (n < count+1)
@@ -97,7 +97,7 @@ iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (c < 0x80) {
int ret = ascii_mbtowc(conv,pwc,s,1);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 1) abort();
#if 0 /* Accept ISO-2022-KR according to CJK.INF. */
if (*pwc == 0x000a || *pwc == 0x000d)
@@ -107,7 +107,7 @@ iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
conv->istate = state;
return count+1;
} else
- return RET_ILSEQ;
+ goto ilseq;
case STATE_TWOBYTE:
if (n < count+2)
goto none;
@@ -115,13 +115,13 @@ iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (s[0] < 0x80 && s[1] < 0x80) {
int ret = ksc5601_mbtowc(conv,pwc,s,2);
if (ret == RET_ILSEQ)
- return RET_ILSEQ;
+ goto ilseq;
if (ret != 2) abort();
COMBINE_STATE;
conv->istate = state;
return count+2;
} else
- return RET_ILSEQ;
+ goto ilseq;
default: abort();
}
@@ -129,6 +129,11 @@ none:
COMBINE_STATE;
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ COMBINE_STATE;
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/loop_unicode.h b/lib/loop_unicode.h
index c5b0bf4..1c787b5 100644
--- a/lib/loop_unicode.h
+++ b/lib/loop_unicode.h
@@ -283,57 +283,62 @@ static size_t unicode_loop_convert (iconv_t icd,
int outcount;
incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
if (incount < 0) {
- if (incount == RET_ILSEQ) {
- /* Case 1: invalid input */
+ if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
+ /* Case 1: invalid input, possibly after a shift sequence */
+ incount = DECODE_SHIFT_ILSEQ(incount);
if (cd->discard_ilseq) {
switch (cd->iindex) {
case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
case ei_utf32: case ei_utf32be: case ei_utf32le:
case ei_ucs4internal: case ei_ucs4swapped:
- incount = 4; break;
+ incount += 4; break;
case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
case ei_utf16: case ei_utf16be: case ei_utf16le:
case ei_ucs2internal: case ei_ucs2swapped:
- incount = 2; break;
+ incount += 2; break;
default:
- incount = 1; break;
+ incount += 1; break;
}
goto outcount_zero;
}
#ifndef LIBICONV_PLUG
else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
+ unsigned int incount2;
struct mb_to_uc_fallback_locals locals;
switch (cd->iindex) {
case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
case ei_utf32: case ei_utf32be: case ei_utf32le:
case ei_ucs4internal: case ei_ucs4swapped:
- incount = 4; break;
+ incount2 = 4; break;
case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
case ei_utf16: case ei_utf16be: case ei_utf16le:
case ei_ucs2internal: case ei_ucs2swapped:
- incount = 2; break;
+ incount2 = 2; break;
default:
- incount = 1; break;
+ incount2 = 1; break;
}
locals.l_cd = cd;
locals.l_outbuf = outptr;
locals.l_outbytesleft = outleft;
locals.l_errno = 0;
- cd->fallbacks.mb_to_uc_fallback((const char*)inptr, incount,
+ cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
mb_to_uc_write_replacement,
&locals,
cd->fallbacks.data);
if (locals.l_errno != 0) {
+ inptr += incount; inleft -= incount;
errno = locals.l_errno;
result = -1;
break;
}
+ incount += incount2;
outptr = locals.l_outbuf;
outleft = locals.l_outbytesleft;
result += 1;
goto outcount_zero;
}
#endif
+ inptr += incount; inleft -= incount;
errno = EILSEQ;
result = -1;
break;
@@ -345,7 +350,7 @@ static size_t unicode_loop_convert (iconv_t icd,
break;
}
/* Case 3: k bytes read, but only a shift sequence */
- incount = -2-incount;
+ incount = DECODE_TOOFEW(incount);
} else {
/* Case 4: k bytes read, making up a wide character */
if (outleft == 0) {
diff --git a/lib/ucs2.h b/lib/ucs2.h
index 6afc175..206b8cc 100644
--- a/lib/ucs2.h
+++ b/lib/ucs2.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -37,7 +37,8 @@ ucs2_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
} else if (wc == 0xfffe) {
state ^= 1;
} else if (wc >= 0xd800 && wc < 0xe000) {
- return RET_ILSEQ;
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
} else {
*pwc = wc;
conv->istate = state;
diff --git a/lib/ucs4.h b/lib/ucs4.h
index f480ba8..00d08d2 100644
--- a/lib/ucs4.h
+++ b/lib/ucs4.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -42,8 +42,10 @@ ucs4_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
*pwc = wc;
conv->istate = state;
return count;
- } else
- return RET_ILSEQ;
+ } else {
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
+ }
}
conv->istate = state;
return RET_TOOFEW(count);
diff --git a/lib/utf16.h b/lib/utf16.h
index c13a233..99b5e2c 100644
--- a/lib/utf16.h
+++ b/lib/utf16.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -45,14 +45,14 @@ utf16_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (n >= 4) {
ucs4_t wc2 = (state ? s[2] + (s[3] << 8) : (s[2] << 8) + s[3]);
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
- return RET_ILSEQ;
+ goto ilseq;
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
conv->istate = state;
return count+4;
} else
break;
} else if (wc >= 0xdc00 && wc < 0xe000) {
- return RET_ILSEQ;
+ goto ilseq;
} else {
*pwc = wc;
conv->istate = state;
@@ -62,6 +62,10 @@ utf16_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
}
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
/* We output UTF-16 in big-endian order, with byte-order mark.
diff --git a/lib/utf16be.h b/lib/utf16be.h
index 62a28c6..a6d90ff 100644
--- a/lib/utf16be.h
+++ b/lib/utf16be.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -34,18 +34,21 @@ utf16be_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (n >= 4) {
ucs4_t wc2 = (s[2] << 8) + s[3];
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
- return RET_ILSEQ;
+ goto ilseq;
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
return count+4;
}
} else if (wc >= 0xdc00 && wc < 0xe000) {
- return RET_ILSEQ;
+ goto ilseq;
} else {
*pwc = wc;
return count+2;
}
}
return RET_TOOFEW(count);
+
+ilseq:
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/utf16le.h b/lib/utf16le.h
index fb339a2..5bb2b02 100644
--- a/lib/utf16le.h
+++ b/lib/utf16le.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -34,18 +34,21 @@ utf16le_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
if (n >= 4) {
ucs4_t wc2 = s[2] + (s[3] << 8);
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
- return RET_ILSEQ;
+ goto ilseq;
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
return count+4;
}
} else if (wc >= 0xdc00 && wc < 0xe000) {
- return RET_ILSEQ;
+ goto ilseq;
} else {
*pwc = wc;
return count+2;
}
}
return RET_TOOFEW(count);
+
+ilseq:
+ return RET_SHIFT_ILSEQ(count);
}
static int
diff --git a/lib/utf32.h b/lib/utf32.h
index 4be0db7..bc579ae 100644
--- a/lib/utf32.h
+++ b/lib/utf32.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -49,8 +49,10 @@ utf32_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
*pwc = wc;
conv->istate = state;
return count;
- } else
- return RET_ILSEQ;
+ } else {
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
+ }
}
s += 4; n -= 4;
}
diff --git a/lib/utf7.h b/lib/utf7.h
index 4cce995..888bfb4 100644
--- a/lib/utf7.h
+++ b/lib/utf7.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -99,7 +99,7 @@ inactive:
state = 1;
goto active;
}
- return RET_ILSEQ;
+ goto ilseq;
}
}
@@ -127,9 +127,9 @@ active:
else {
/* c terminates base64 encoding */
if (base64state & -4)
- return RET_ILSEQ; /* data must be 0, otherwise illegal */
+ goto ilseq; /* data must be 0, otherwise illegal */
if (base64count)
- return RET_ILSEQ; /* partial UTF-16 characters are invalid */
+ goto ilseq; /* partial UTF-16 characters are invalid */
if (c == '-') {
s++; count++;
}
@@ -168,7 +168,7 @@ active:
ucs4_t wc1 = wc >> 16;
ucs4_t wc2 = wc & 0xffff;
if (!(wc1 >= 0xd800 && wc1 < 0xdc00)) abort();
- if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) return RET_ILSEQ;
+ if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) goto ilseq;
*pwc = 0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00);
} else {
*pwc = wc;
@@ -180,6 +180,10 @@ active:
none:
conv->istate = state;
return RET_TOOFEW(count);
+
+ilseq:
+ conv->istate = state;
+ return RET_SHIFT_ILSEQ(count);
}
/*
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 0c5b6dd..bd236e2 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -28,7 +28,7 @@ top_builddir = ..
all :
-check : all table-from table-to ../src/iconv_no_i18n
+check : all table-from table-to ../src/iconv_no_i18n test-shiftseq
$(srcdir)/check-stateless $(srcdir) ASCII
# /* General multi-byte encodings */
$(CC) $(LDFLAGS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS) $(srcdir)/genutf8.c -o genutf8 && \
@@ -154,6 +154,8 @@ check : all table-from table-to ../src/iconv_no_i18n
$(srcdir)/check-translitfailure $(srcdir) TranslitFail1 ISO-8859-1 ASCII
# /* substitution */
$(srcdir)/check-subst
+# /* shift sequence before invalid multibyte character */
+ ./test-shiftseq
check-extra: check-extra-@USE_EXTRA_ENCODINGS@
check-extra-no:
@@ -208,6 +210,12 @@ table-to : table-to.@OBJEXT@ ../lib/libiconv.la
table-to.@OBJEXT@ : $(srcdir)/table-to.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(CPPFLAGS) $(srcdir)/table-to.c
+test-shiftseq : test-shiftseq.@OBJEXT@ ../lib/libiconv.la
+ $(LIBTOOL_LINK) $(CC) $(LDFLAGS) $(CFLAGS) test-shiftseq.@OBJEXT@ ../lib/libiconv.la -o $@
+
+test-shiftseq.@OBJEXT@ : $(srcdir)/test-shiftseq.c
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(CPPFLAGS) $(srcdir)/test-shiftseq.c
+
# The following rule is necessary to avoid a toplevel "make -n check" failure.
../lib/libiconv.la :
cd ../lib && $(MAKE) libiconv.la
@@ -215,7 +223,7 @@ table-to.@OBJEXT@ : $(srcdir)/table-to.c
mostlyclean : clean
clean : force
- $(RM) *.@OBJEXT@ *.lo table-from table-from@EXEEXT@ table-to table-to@EXEEXT@ tmp-* genutf8 genutf8@EXEEXT@ UTF-8.TXT gengb18030z gengb18030z@EXEEXT@ GB18030.TXT core *.stackdump
+ $(RM) *.@OBJEXT@ *.lo table-from table-from@EXEEXT@ table-to table-to@EXEEXT@ test-shiftseq test-shiftseq@EXEEXT@ tmp-* genutf8 genutf8@EXEEXT@ UTF-8.TXT gengb18030z gengb18030z@EXEEXT@ GB18030.TXT core *.stackdump
$(RM) -r .libs _libs
distclean : clean
diff --git a/tests/test-shiftseq.c b/tests/test-shiftseq.c
new file mode 100644
index 0000000..f887f0c
--- /dev/null
+++ b/tests/test-shiftseq.c
@@ -0,0 +1,119 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+ This file is part of the GNU LIBICONV Library.
+
+ The GNU LIBICONV Library is free software; you can redistribute it
+ and/or modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ The GNU LIBICONV Library is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU LIBICONV Library; see the file COPYING.LIB.
+ If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
+ Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <iconv.h>
+#include <errno.h>
+
+/* This test checks that the behaviour of iconv() in the situation of an
+ invalid multibyte character after a shift sequence is consistent whether
+ the entire buffer is passed at once, or whether it is passed in two
+ subsequent calls. Based on a bug report from
+ Roman Rybalko <roman_rybalko@users.sourceforge.net>
+ at <http://savannah.gnu.org/bugs/?24216>. */
+
+void main1 (void)
+{
+ static const char input[] = "+2D/YQNhB";
+ iconv_t cd;
+ char buf[20];
+
+ const char * inptr;
+ size_t inleft;
+ char * outptr;
+ size_t outleft;
+
+ cd = iconv_open ("UTF-8", "UTF-7");
+ {
+ size_t r;
+
+ inptr = input;
+ inleft = 9;
+ outptr = buf;
+ outleft = sizeof (buf);
+ r = iconv (cd, (char **) &inptr, &inleft, &outptr, &outleft);
+ /*
+ printf ("r = %d errno = %d inconsumed = %d outproduced = %d\n",
+ r, errno, inptr - input, outptr - buf);
+ // glibc:
+ // r = -1 errno = 84 inconsumed = 4 outproduced = 0
+ // libiconv:
+ // r = -1 errno = 84 inconsumed = 1 outproduced = 0
+ */
+ if (!(r == (size_t)(-1) && errno == EILSEQ
+ && inptr - input == 1 && outptr - buf == 0))
+ abort();
+ }
+}
+
+void main2 (void)
+{
+ static const char input[] = "+2D/YQNhB";
+ iconv_t cd;
+ char buf[20];
+
+ const char * inptr;
+ size_t inleft;
+ char * outptr;
+ size_t outleft;
+
+ cd = iconv_open ("UTF-8", "UTF-7");
+ {
+ size_t r;
+
+ inptr = input;
+ inleft = 5;
+ outptr = buf;
+ outleft = sizeof (buf);
+ r = iconv (cd, (char **) &inptr, &inleft, &outptr, &outleft);
+ /*
+ printf ("r = %d errno = %d inconsumed = %d outproduced = %d\n",
+ r, errno, inptr - input, outptr - buf);
+ // glibc:
+ // r = -1 errno = 84 (EILSEQ) inconsumed = 4 outproduced = 0
+ // libiconv:
+ // r = -1 errno = 22 (EINVAL) inconsumed = 1 outproduced = 0
+ */
+ if (!(r == (size_t)(-1) && errno == EINVAL
+ && inptr - input == 1 && outptr - buf == 0))
+ abort();
+
+ inleft = input + 20 - inptr;
+ r = iconv (cd, (char **) &inptr, &inleft, &outptr, &outleft);
+ /*
+ printf ("r = %d errno = %d inconsumed = %d outproduced = %d\n",
+ r, errno, inptr - input, outptr - buf);
+ // glibc:
+ // r = -1 errno = 84 (EILSEQ) inconsumed = 4 outproduced = 0
+ // libiconv:
+ // r = -1 errno = 84 (EILSEQ) inconsumed = 1 outproduced = 0
+ */
+ if (!(r == (size_t)(-1) && errno == EILSEQ
+ && inptr - input == 1 && outptr - buf == 0))
+ abort();
+ }
+}
+
+int main ()
+{
+ main1 ();
+ main2 ();
+ return 0;
+}