diff options
author | Elliott Hughes <enh@google.com> | 2016-10-24 14:50:31 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2016-10-24 14:50:31 -0700 |
commit | 53de874c3cd5b8aa243546bb1c206f5a709db1fd (patch) | |
tree | 982316adf0c878fb85bf7c4579a04c4f631424dc | |
parent | bd839d100d1c4802cf53f3d27bb73e6032719762 (diff) | |
download | android_bionic-53de874c3cd5b8aa243546bb1c206f5a709db1fd.tar.gz android_bionic-53de874c3cd5b8aa243546bb1c206f5a709db1fd.tar.bz2 android_bionic-53de874c3cd5b8aa243546bb1c206f5a709db1fd.zip |
The default locale "" should be a UTF-8 locale.
"ls -q" (or "adb shell -tt ls") was mangling non-ASCII because mbrtowc
was returning multibyte characters as their individual bytes. This was
because toybox asks for "" rather than "C.UTF-8", and for some reason
we were interpreting that as "C" rather than "C.UTF-8".
Test: bionic tests, ls
Change-Id: Ic60e3b90cd5fe689e5489fad0d5d91062b9594ed
-rw-r--r-- | libc/bionic/locale.cpp | 23 | ||||
-rw-r--r-- | libc/bionic/wchar.cpp | 2 | ||||
-rw-r--r-- | tests/locale_test.cpp | 5 |
3 files changed, 18 insertions, 12 deletions
diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp index e51b38c2f..113118d22 100644 --- a/libc/bionic/locale.cpp +++ b/libc/bionic/locale.cpp @@ -37,7 +37,8 @@ #include "private/bionic_macros.h" -// We currently support a single locale, the "C" locale (also known as "POSIX"). +// We only support two locales, the "C" locale (also known as "POSIX"), +// and the "C.UTF-8" locale (also known as "en_US.UTF-8"). static bool __bionic_current_locale_is_utf8 = true; @@ -100,12 +101,16 @@ static void __locale_init() { g_locale.int_n_sign_posn = CHAR_MAX; } -static bool __is_supported_locale(const char* locale) { - return (strcmp(locale, "") == 0 || - strcmp(locale, "C") == 0 || - strcmp(locale, "C.UTF-8") == 0 || - strcmp(locale, "en_US.UTF-8") == 0 || - strcmp(locale, "POSIX") == 0); +static bool __is_supported_locale(const char* locale_name) { + return (strcmp(locale_name, "") == 0 || + strcmp(locale_name, "C") == 0 || + strcmp(locale_name, "C.UTF-8") == 0 || + strcmp(locale_name, "en_US.UTF-8") == 0 || + strcmp(locale_name, "POSIX") == 0); +} + +static bool __is_utf8_locale(const char* locale_name) { + return (*locale_name == '\0' || strstr(locale_name, "UTF-8")); } lconv* localeconv() { @@ -133,7 +138,7 @@ locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/ return NULL; } - return new __locale_t(strstr(locale_name, "UTF-8") != NULL ? 4 : 1); + return new __locale_t(__is_utf8_locale(locale_name) ? 4 : 1); } char* setlocale(int category, const char* locale_name) { @@ -150,7 +155,7 @@ char* setlocale(int category, const char* locale_name) { errno = ENOENT; return NULL; } - __bionic_current_locale_is_utf8 = (strstr(locale_name, "UTF-8") != NULL); + __bionic_current_locale_is_utf8 = __is_utf8_locale(locale_name); } return const_cast<char*>(__bionic_current_locale_is_utf8 ? "C.UTF-8" : "C"); diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp index d28888d36..7717e10c6 100644 --- a/libc/bionic/wchar.cpp +++ b/libc/bionic/wchar.cpp @@ -61,7 +61,7 @@ size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps) { static mbstate_t __private_state; mbstate_t* state = (ps == NULL) ? &__private_state : ps; - // Our wchar_t is UTF-32 + // Our wchar_t is UTF-32. return mbrtoc32(reinterpret_cast<char32_t*>(pwc), s, n, state); } diff --git a/tests/locale_test.cpp b/tests/locale_test.cpp index f308af534..8b38c407e 100644 --- a/tests/locale_test.cpp +++ b/tests/locale_test.cpp @@ -59,9 +59,10 @@ TEST(locale, setlocale) { EXPECT_EQ(EINVAL, errno); #if defined(__BIONIC__) - // The "" locale is implementation-defined. For bionic, it's the C locale. + // The "" locale is implementation-defined. For bionic, it's the C.UTF-8 locale, which is + // pretty much all we support anyway. // glibc will give us something like "en_US.UTF-8", depending on the user's configuration. - EXPECT_STREQ("C", setlocale(LC_ALL, "")); + EXPECT_STREQ("C.UTF-8", setlocale(LC_ALL, "")); #endif EXPECT_STREQ("C", setlocale(LC_ALL, "C")); EXPECT_STREQ("C", setlocale(LC_ALL, "POSIX")); |