summaryrefslogtreecommitdiffstats
path: root/libs
diff options
context:
space:
mode:
authorRoozbeh Pournader <roozbeh@google.com>2015-07-15 12:19:19 -0700
committerRoozbeh Pournader <roozbeh@google.com>2015-07-15 16:31:07 -0700
commitcdd19dadd11a611409c24bb69e6629eab6812d98 (patch)
treeb6b691585f0bfbac92d167a09a0116231d12597a /libs
parentdaf62d0eef9e339c9d3269e5eaa2007d25c18194 (diff)
downloadandroid_frameworks_minikin-cdd19dadd11a611409c24bb69e6629eab6812d98.tar.gz
android_frameworks_minikin-cdd19dadd11a611409c24bb69e6629eab6812d98.tar.bz2
android_frameworks_minikin-cdd19dadd11a611409c24bb69e6629eab6812d98.zip
Use ICU to lowercase words to hyphenate.
Previously, the standard C tolower() function was used, which didn't support any characters beyond the basic ASCII letters. Bug: 22506121 Change-Id: Ibb81121caa29be44fbb59aa98891e9faafc57592
Diffstat (limited to 'libs')
-rw-r--r--libs/minikin/Hyphenator.cpp17
1 files changed, 14 insertions, 3 deletions
diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp
index c50b386..3eb151b 100644
--- a/libs/minikin/Hyphenator.cpp
+++ b/libs/minikin/Hyphenator.cpp
@@ -16,9 +16,9 @@
#include <vector>
#include <memory>
-#include <cctype>
#include <algorithm>
#include <string>
+#include <unicode/uchar.h>
// HACK: for reading pattern file
#include <fcntl.h>
@@ -95,8 +95,19 @@ void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t
hyphenateSoft(result, word, len);
return;
}
- // TODO: use locale-sensitive case folding from ICU.
- c = tolower(c);
+ // TODO: This uses ICU's simple character to character lowercasing, which ignores
+ // the locale, and ignores cases when lowercasing a character results in more than
+ // one character. It should be fixed to consider the locale (in order for it to work
+ // correctly for Turkish and Azerbaijani), as well as support one-to-many, and
+ // many-to-many case conversions (including non-BMP cases).
+ if (c < 0x00C0) { // U+00C0 is the lowest uppercase non-ASCII character
+ // Convert uppercase ASCII to lowercase ASCII, but keep other characters as-is
+ if (0x0041 <= c && c <= 0x005A) {
+ c += 0x0020;
+ }
+ } else {
+ c = u_tolower(c);
+ }
}
auto search = node->succ.find(c);
if (search != node->succ.end()) {