aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2019-09-27 14:42:39 -0700
committerMichael Bestas <mkbestas@lineageos.org>2019-12-11 19:06:53 +0200
commitedb3c4489ee8f2f0cf1fbca5f7111086f7318aac (patch)
tree0b0d3b1e9a0dbc4ab5093286589275d2fc74c791
parentf21a5b97f8d849c074f82ee7f7ea70ab2285d8bc (diff)
downloadandroid_bionic-staging/lineage-17.0_merge-android-10.0.0_r9.tar.gz
android_bionic-staging/lineage-17.0_merge-android-10.0.0_r9.tar.bz2
android_bionic-staging/lineage-17.0_merge-android-10.0.0_r9.zip
Optimize tolower(3)/toupper(3) from <ctype.h>.staging/lineage-17.0_merge-android-10.0.0_r9
The tables in the BSD tolower/toupper are slower for ASCII than just doing the bit twiddling. We can't actually remove the tables on LP32, so move them into the "cruft" we keep around for backwards compatibility (but remove them for LP64 where they were never exposed). I noticed that the new bit-twiddling tolower(3) was performing better on arm64 than toupper(3). The 0xdf constant was requiring an extra MOV, and there isn't a BIC that takes an immediate value. Since we've already done the comparison to check that we're in the right range (where the bit is always set), though, we can EOR 0x20 to get the same result as the missing BIC 0x20 in just one instruction. I've applied that same optimization to towupper(3) too. Before: BM_ctype_tolower_n 3.30 ns 3.30 ns 212353035 BM_ctype_tolower_y 3.31 ns 3.30 ns 211234204 BM_ctype_toupper_n 3.30 ns 3.29 ns 214161246 BM_ctype_toupper_y 3.29 ns 3.28 ns 207643473 BM_wctype_towupper_ascii_n 3.53 ns 3.53 ns 195944444 BM_wctype_towupper_ascii_y 3.48 ns 3.48 ns 199233248 After: BM_ctype_tolower_n 2.93 ns 2.92 ns 242373703 BM_ctype_tolower_y 2.88 ns 2.87 ns 245365309 BM_ctype_toupper_n 2.93 ns 2.93 ns 243049353 BM_ctype_toupper_y 2.89 ns 2.89 ns 245072521 BM_wctype_towupper_ascii_n 3.34 ns 3.33 ns 212951912 BM_wctype_towupper_ascii_y 3.29 ns 3.29 ns 214651254 (Why do both the "y" and "n" variants speed up with the EOR change? Because the compiler transforms the code so that we unconditionally do the bit twiddling and then use CSEL to decide whether or not to actually use the result.) We also save 1028 bytes of data in the LP64 libc.so. Test: ran the bionic benchmarks and tests Change-Id: I7829339f8cb89a58efe539c2a01c51807413aa2d
-rw-r--r--libc/Android.bp3
-rw-r--r--libc/bionic/ctype.cpp12
-rw-r--r--libc/bionic/ndk_cruft_data.cpp94
-rw-r--r--libc/bionic/wctype.cpp4
-rw-r--r--libc/upstream-openbsd/android/include/openbsd-compat.h7
-rw-r--r--libc/upstream-openbsd/lib/libc/gen/tolower_.c61
-rw-r--r--libc/upstream-openbsd/lib/libc/gen/toupper_.c62
7 files changed, 110 insertions, 133 deletions
diff --git a/libc/Android.bp b/libc/Android.bp
index 8103354f4..6031b1661 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -24,6 +24,7 @@ libc_common_src_files = [
// ========================================================
libc_common_src_files += [
"bionic/ndk_cruft.cpp",
+ "bionic/ndk_cruft_data.cpp",
]
libc_common_src_files_32 = [
@@ -386,8 +387,6 @@ cc_library_static {
"upstream-openbsd/lib/libc/gen/getprogname.c",
"upstream-openbsd/lib/libc/gen/isctype.c",
"upstream-openbsd/lib/libc/gen/setprogname.c",
- "upstream-openbsd/lib/libc/gen/tolower_.c",
- "upstream-openbsd/lib/libc/gen/toupper_.c",
"upstream-openbsd/lib/libc/gen/verr.c",
"upstream-openbsd/lib/libc/gen/verrx.c",
"upstream-openbsd/lib/libc/gen/vwarn.c",
diff --git a/libc/bionic/ctype.cpp b/libc/bionic/ctype.cpp
index 2b31d5268..b72935ba4 100644
--- a/libc/bionic/ctype.cpp
+++ b/libc/bionic/ctype.cpp
@@ -83,3 +83,15 @@ int toupper_l(int c, locale_t) {
int tolower_l(int c, locale_t) {
return tolower(c);
}
+
+int tolower(int c) {
+ if (c >= 'A' && c <= 'Z') return c | 0x20;
+ return c;
+}
+
+int toupper(int c) {
+ // Using EOR rather than AND makes no difference on arm, but saves an
+ // instruction on arm64.
+ if (c >= 'a' && c <= 'z') return c ^ 0x20;
+ return c;
+}
diff --git a/libc/bionic/ndk_cruft_data.cpp b/libc/bionic/ndk_cruft_data.cpp
new file mode 100644
index 000000000..e512b4a57
--- /dev/null
+++ b/libc/bionic/ndk_cruft_data.cpp
@@ -0,0 +1,94 @@
+
+// Ancient NDKs' <ctype.h> contained inline references to these tables.
+
+#if !defined(__LP64__)
+
+/* $OpenBSD: tolower_.c,v 1.11 2015/09/19 04:02:21 guenther Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+static const short _C_tolower_[] = {
+ -1,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+const short *_tolower_tab_ = _C_tolower_;
+
+/* $OpenBSD: toupper_.c,v 1.12 2015/09/19 04:02:21 guenther Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+static const short _C_toupper_[] = {
+ -1,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+const short *_toupper_tab_ = _C_toupper_;
+
+#endif
diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp
index 061f55a90..082dadaf4 100644
--- a/libc/bionic/wctype.cpp
+++ b/libc/bionic/wctype.cpp
@@ -129,7 +129,9 @@ wint_t towlower(wint_t wc) {
wint_t towupper(wint_t wc) {
if (wc < 0x80) {
- if (wc >= 'a' && wc <= 'z') return wc & 0xdf;
+ // Using EOR rather than AND makes no difference on arm, but saves an
+ // instruction on arm64.
+ if (wc >= 'a' && wc <= 'z') return wc ^ 0x20;
return wc;
}
diff --git a/libc/upstream-openbsd/android/include/openbsd-compat.h b/libc/upstream-openbsd/android/include/openbsd-compat.h
index c99e2ce9d..2c8c7359f 100644
--- a/libc/upstream-openbsd/android/include/openbsd-compat.h
+++ b/libc/upstream-openbsd/android/include/openbsd-compat.h
@@ -80,13 +80,6 @@ extern const char* __progname;
__LIBC_HIDDEN__ extern const char* __bionic_get_shell_path();
#define _PATH_BSHELL __bionic_get_shell_path()
-/* LP32 NDK ctype.h contained references to these. */
-__LIBC32_LEGACY_PUBLIC__ extern const short* _tolower_tab_;
-__LIBC32_LEGACY_PUBLIC__ extern const short* _toupper_tab_;
-
-__LIBC_HIDDEN__ extern const char _C_ctype_[];
-__LIBC_HIDDEN__ extern const short _C_toupper_[];
-__LIBC_HIDDEN__ extern const short _C_tolower_[];
__LIBC_HIDDEN__ extern char* __findenv(const char*, int, int*);
__LIBC_HIDDEN__ extern char* _mktemp(char*);
diff --git a/libc/upstream-openbsd/lib/libc/gen/tolower_.c b/libc/upstream-openbsd/lib/libc/gen/tolower_.c
deleted file mode 100644
index 2402c42fc..000000000
--- a/libc/upstream-openbsd/lib/libc/gen/tolower_.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* $OpenBSD: tolower_.c,v 1.11 2015/09/19 04:02:21 guenther Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#define _ANSI_LIBRARY
-#include <ctype.h>
-#include <stdio.h>
-#include "ctype_private.h"
-
-const short _C_tolower_[1 + CTYPE_NUM_CHARS] = {
- EOF,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
- 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
- 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
- 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
- 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
- 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
- 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
- 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
- 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
- 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
- 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
- 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
- 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
- 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
- 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
- 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
- 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
- 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
- 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
- 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
- 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
-
-const short *_tolower_tab_ = _C_tolower_;
-#if 0
-DEF_STRONG(_tolower_tab_);
-#endif
-
-#undef tolower
-int
-tolower(int c)
-{
- if ((unsigned int)c > 255)
- return(c);
- return((_tolower_tab_ + 1)[c]);
-}
-DEF_STRONG(tolower);
diff --git a/libc/upstream-openbsd/lib/libc/gen/toupper_.c b/libc/upstream-openbsd/lib/libc/gen/toupper_.c
deleted file mode 100644
index 8408f9e0e..000000000
--- a/libc/upstream-openbsd/lib/libc/gen/toupper_.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* $OpenBSD: toupper_.c,v 1.12 2015/09/19 04:02:21 guenther Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#define _ANSI_LIBRARY
-#include <ctype.h>
-#include <stdio.h>
-
-#include "ctype_private.h"
-
-const short _C_toupper_[1 + CTYPE_NUM_CHARS] = {
- EOF,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
- 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
- 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
- 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
- 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
- 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
- 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
- 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
- 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
- 'X', 'Y', 'Z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
- 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
- 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
- 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
- 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
- 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
- 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
- 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
- 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
- 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
- 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
- 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
- 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
-
-const short *_toupper_tab_ = _C_toupper_;
-#if 0
-DEF_STRONG(_toupper_tab_);
-#endif
-
-#undef toupper
-int
-toupper(int c)
-{
- if ((unsigned int)c > 255)
- return(c);
- return((_toupper_tab_ + 1)[c]);
-}
-DEF_STRONG(toupper);