aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2016-10-24 14:50:31 -0700
committerElliott Hughes <enh@google.com>2016-10-24 14:50:31 -0700
commit53de874c3cd5b8aa243546bb1c206f5a709db1fd (patch)
tree982316adf0c878fb85bf7c4579a04c4f631424dc
parentbd839d100d1c4802cf53f3d27bb73e6032719762 (diff)
downloadandroid_bionic-53de874c3cd5b8aa243546bb1c206f5a709db1fd.tar.gz
android_bionic-53de874c3cd5b8aa243546bb1c206f5a709db1fd.tar.bz2
android_bionic-53de874c3cd5b8aa243546bb1c206f5a709db1fd.zip
The default locale "" should be a UTF-8 locale.
"ls -q" (or "adb shell -tt ls") was mangling non-ASCII because mbrtowc was returning multibyte characters as their individual bytes. This was because toybox asks for "" rather than "C.UTF-8", and for some reason we were interpreting that as "C" rather than "C.UTF-8". Test: bionic tests, ls Change-Id: Ic60e3b90cd5fe689e5489fad0d5d91062b9594ed
-rw-r--r--libc/bionic/locale.cpp23
-rw-r--r--libc/bionic/wchar.cpp2
-rw-r--r--tests/locale_test.cpp5
3 files changed, 18 insertions, 12 deletions
diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp
index e51b38c2f..113118d22 100644
--- a/libc/bionic/locale.cpp
+++ b/libc/bionic/locale.cpp
@@ -37,7 +37,8 @@
#include "private/bionic_macros.h"
-// We currently support a single locale, the "C" locale (also known as "POSIX").
+// We only support two locales, the "C" locale (also known as "POSIX"),
+// and the "C.UTF-8" locale (also known as "en_US.UTF-8").
static bool __bionic_current_locale_is_utf8 = true;
@@ -100,12 +101,16 @@ static void __locale_init() {
g_locale.int_n_sign_posn = CHAR_MAX;
}
-static bool __is_supported_locale(const char* locale) {
- return (strcmp(locale, "") == 0 ||
- strcmp(locale, "C") == 0 ||
- strcmp(locale, "C.UTF-8") == 0 ||
- strcmp(locale, "en_US.UTF-8") == 0 ||
- strcmp(locale, "POSIX") == 0);
+static bool __is_supported_locale(const char* locale_name) {
+ return (strcmp(locale_name, "") == 0 ||
+ strcmp(locale_name, "C") == 0 ||
+ strcmp(locale_name, "C.UTF-8") == 0 ||
+ strcmp(locale_name, "en_US.UTF-8") == 0 ||
+ strcmp(locale_name, "POSIX") == 0);
+}
+
+static bool __is_utf8_locale(const char* locale_name) {
+ return (*locale_name == '\0' || strstr(locale_name, "UTF-8"));
}
lconv* localeconv() {
@@ -133,7 +138,7 @@ locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/
return NULL;
}
- return new __locale_t(strstr(locale_name, "UTF-8") != NULL ? 4 : 1);
+ return new __locale_t(__is_utf8_locale(locale_name) ? 4 : 1);
}
char* setlocale(int category, const char* locale_name) {
@@ -150,7 +155,7 @@ char* setlocale(int category, const char* locale_name) {
errno = ENOENT;
return NULL;
}
- __bionic_current_locale_is_utf8 = (strstr(locale_name, "UTF-8") != NULL);
+ __bionic_current_locale_is_utf8 = __is_utf8_locale(locale_name);
}
return const_cast<char*>(__bionic_current_locale_is_utf8 ? "C.UTF-8" : "C");
diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp
index d28888d36..7717e10c6 100644
--- a/libc/bionic/wchar.cpp
+++ b/libc/bionic/wchar.cpp
@@ -61,7 +61,7 @@ size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps) {
static mbstate_t __private_state;
mbstate_t* state = (ps == NULL) ? &__private_state : ps;
- // Our wchar_t is UTF-32
+ // Our wchar_t is UTF-32.
return mbrtoc32(reinterpret_cast<char32_t*>(pwc), s, n, state);
}
diff --git a/tests/locale_test.cpp b/tests/locale_test.cpp
index f308af534..8b38c407e 100644
--- a/tests/locale_test.cpp
+++ b/tests/locale_test.cpp
@@ -59,9 +59,10 @@ TEST(locale, setlocale) {
EXPECT_EQ(EINVAL, errno);
#if defined(__BIONIC__)
- // The "" locale is implementation-defined. For bionic, it's the C locale.
+ // The "" locale is implementation-defined. For bionic, it's the C.UTF-8 locale, which is
+ // pretty much all we support anyway.
// glibc will give us something like "en_US.UTF-8", depending on the user's configuration.
- EXPECT_STREQ("C", setlocale(LC_ALL, ""));
+ EXPECT_STREQ("C.UTF-8", setlocale(LC_ALL, ""));
#endif
EXPECT_STREQ("C", setlocale(LC_ALL, "C"));
EXPECT_STREQ("C", setlocale(LC_ALL, "POSIX"));