diff options
author | Roozbeh Pournader <roozbeh@google.com> | 2015-07-15 12:19:19 -0700 |
---|---|---|
committer | Roozbeh Pournader <roozbeh@google.com> | 2015-07-15 16:31:07 -0700 |
commit | cdd19dadd11a611409c24bb69e6629eab6812d98 (patch) | |
tree | b6b691585f0bfbac92d167a09a0116231d12597a /libs | |
parent | daf62d0eef9e339c9d3269e5eaa2007d25c18194 (diff) | |
download | android_frameworks_minikin-cdd19dadd11a611409c24bb69e6629eab6812d98.tar.gz android_frameworks_minikin-cdd19dadd11a611409c24bb69e6629eab6812d98.tar.bz2 android_frameworks_minikin-cdd19dadd11a611409c24bb69e6629eab6812d98.zip |
Use ICU to lowercase words to hyphenate.
Previously, the standard C tolower() function was used, which
didn't support any characters beyond the basic ASCII letters.
Bug: 22506121
Change-Id: Ibb81121caa29be44fbb59aa98891e9faafc57592
Diffstat (limited to 'libs')
-rw-r--r-- | libs/minikin/Hyphenator.cpp | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp index c50b386..3eb151b 100644 --- a/libs/minikin/Hyphenator.cpp +++ b/libs/minikin/Hyphenator.cpp @@ -16,9 +16,9 @@ #include <vector> #include <memory> -#include <cctype> #include <algorithm> #include <string> +#include <unicode/uchar.h> // HACK: for reading pattern file #include <fcntl.h> @@ -95,8 +95,19 @@ void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t hyphenateSoft(result, word, len); return; } - // TODO: use locale-sensitive case folding from ICU. - c = tolower(c); + // TODO: This uses ICU's simple character to character lowercasing, which ignores + // the locale, and ignores cases when lowercasing a character results in more than + // one character. It should be fixed to consider the locale (in order for it to work + // correctly for Turkish and Azerbaijani), as well as support one-to-many, and + // many-to-many case conversions (including non-BMP cases). + if (c < 0x00C0) { // U+00C0 is the lowest uppercase non-ASCII character + // Convert uppercase ASCII to lowercase ASCII, but keep other characters as-is + if (0x0041 <= c && c <= 0x005A) { + c += 0x0020; + } + } else { + c = u_tolower(c); + } } auto search = node->succ.find(c); if (search != node->succ.end()) { |