diff options
author | Roozbeh Pournader <roozbeh@google.com> | 2016-05-26 01:44:33 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2016-05-26 01:44:33 +0000 |
commit | 8614b0420cef56449c0db72f5098360c31867b06 (patch) | |
tree | 83100501579fcb63a6cbcb18c7c6dfe856d85a69 | |
parent | 3bee494eead989500c38d98f7f863986e95e25e0 (diff) | |
parent | 74b56175e5d41c1c1dc992208842b5576973d452 (diff) | |
download | android_frameworks_minikin-8614b0420cef56449c0db72f5098360c31867b06.tar.gz android_frameworks_minikin-8614b0420cef56449c0db72f5098360c31867b06.tar.bz2 android_frameworks_minikin-8614b0420cef56449c0db72f5098360c31867b06.zip |
Do not break after Myanmar viramas
am: 74b56175e5
* commit '74b56175e5d41c1c1dc992208842b5576973d452':
Do not break after Myanmar viramas
Change-Id: Idb9303889ac87853a730cdb25fba7faaaf352b93
-rw-r--r-- | libs/minikin/WordBreaker.cpp | 10 | ||||
-rw-r--r-- | tests/WordBreakerTests.cpp | 13 |
2 files changed, 22 insertions, 1 deletions
diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp index 34e7a93..38f03ca 100644 --- a/libs/minikin/WordBreaker.cpp +++ b/libs/minikin/WordBreaker.cpp @@ -76,12 +76,20 @@ static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) { if (codePoint == CHAR_SOFT_HYPHEN) { return false; } + // For Myanmar kinzi sequences, created by <consonant, ASAT, VIRAMA, consonant>. This is to go + // around a bug in ICU line breaking: http://bugs.icu-project.org/trac/ticket/12561. To avoid + // too much looking around in the strings, we simply avoid breaking after any Myanmar virama, + // where no line break could be imagined, since the Myanmar virama is a pure stacker. + if (codePoint == 0x1039) { // MYANMAR SIGN VIRAMA + return false; + } + uint32_t next_codepoint; size_t next_offset = i; U16_NEXT(buf, next_offset, bufEnd, next_codepoint); // Proposed change to LB24 from http://www.unicode.org/L2/L2016/16043r-line-break-pr-po.txt - //(AL | HL) × (PR | PO) + // (AL | HL) × (PR | PO) int32_t lineBreak = u_getIntPropertyValue(codePoint, UCHAR_LINE_BREAK); if (lineBreak == U_LB_ALPHABETIC || lineBreak == U_LB_HEBREW_LETTER) { lineBreak = u_getIntPropertyValue(next_codepoint, UCHAR_LINE_BREAK); diff --git a/tests/WordBreakerTests.cpp b/tests/WordBreakerTests.cpp index 9fa9da3..8ed87cc 100644 --- a/tests/WordBreakerTests.cpp +++ b/tests/WordBreakerTests.cpp @@ -85,6 +85,19 @@ TEST_F(WordBreakerTest, postfixAndPrefix) { EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); } +TEST_F(WordBreakerTest, MyanmarKinzi) { + uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C}; // NGA, ASAT, VIRAMA, KA, UU + WordBreaker breaker; + icu::Locale burmese("my"); + breaker.setLocale(burmese); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); +} + TEST_F(WordBreakerTest, zwjEmojiSequences) { uint16_t buf[] = { // man + zwj + heart + zwj + man |