Do not break after Myanmar viramas

This is to work around a bug in ICU's line breaker, which thinks there is a valid line break between a Myanmar kinzi and a consonant. See http://bugs.icu-project.org/trac/ticket/12561 for the ICU bug. Bug: 28964845 Change-Id: I076ac15077e5627cbccf6732900bcc60d8596dda
author: Roozbeh Pournader <roozbeh@google.com> 2016-05-25 16:46:56 -0700
committer: Roozbeh Pournader <roozbeh@google.com> 2016-05-25 16:46:56 -0700
commit: 74b56175e5d41c1c1dc992208842b5576973d452 (patch)
tree: 83100501579fcb63a6cbcb18c7c6dfe856d85a69
parent: 77f488345316fba46c271fc04bea470819ae1712 (diff)
download: android_frameworks_minikin-74b56175e5d41c1c1dc992208842b5576973d452.tar.gz
android_frameworks_minikin-74b56175e5d41c1c1dc992208842b5576973d452.tar.bz2
android_frameworks_minikin-74b56175e5d41c1c1dc992208842b5576973d452.zip
2 files changed, 22 insertions, 1 deletions
diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp
index 34e7a93..38f03ca 100644
--- a/libs/minikin/WordBreaker.cpp
+++ b/libs/minikin/WordBreaker.cpp
@@ -76,12 +76,20 @@ static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) {
     if (codePoint == CHAR_SOFT_HYPHEN) {
         return false;
     }
+    // For Myanmar kinzi sequences, created by <consonant, ASAT, VIRAMA, consonant>. This is to go
+    // around a bug in ICU line breaking: http://bugs.icu-project.org/trac/ticket/12561. To avoid
+    // too much looking around in the strings, we simply avoid breaking after any Myanmar virama,
+    // where no line break could be imagined, since the Myanmar virama is a pure stacker.
+    if (codePoint == 0x1039) {  // MYANMAR SIGN VIRAMA
+        return false;
+    }
+
     uint32_t next_codepoint;
     size_t next_offset = i;
     U16_NEXT(buf, next_offset, bufEnd, next_codepoint);
 
     // Proposed change to LB24 from http://www.unicode.org/L2/L2016/16043r-line-break-pr-po.txt
-    //(AL | HL) × (PR | PO)
+    // (AL | HL) × (PR | PO)
     int32_t lineBreak = u_getIntPropertyValue(codePoint, UCHAR_LINE_BREAK);
     if (lineBreak == U_LB_ALPHABETIC || lineBreak == U_LB_HEBREW_LETTER) {
         lineBreak = u_getIntPropertyValue(next_codepoint, UCHAR_LINE_BREAK);
diff --git a/tests/WordBreakerTests.cpp b/tests/WordBreakerTests.cpp
index 9fa9da3..8ed87cc 100644
--- a/tests/WordBreakerTests.cpp
+++ b/tests/WordBreakerTests.cpp
@@ -85,6 +85,19 @@ TEST_F(WordBreakerTest, postfixAndPrefix) {
     EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd());
 }
 
+TEST_F(WordBreakerTest, MyanmarKinzi) {
+    uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C};  // NGA, ASAT, VIRAMA, KA, UU
+    WordBreaker breaker;
+    icu::Locale burmese("my");
+    breaker.setLocale(burmese);
+    breaker.setText(buf, NELEM(buf));
+    EXPECT_EQ(0, breaker.current());
+
+    EXPECT_EQ((ssize_t)NELEM(buf), breaker.next());  // end of string
+    EXPECT_EQ(0, breaker.wordStart());
+    EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd());
+}
+
 TEST_F(WordBreakerTest, zwjEmojiSequences) {
     uint16_t buf[] = {
         // man + zwj + heart + zwj + man
author	Roozbeh Pournader <roozbeh@google.com>	2016-05-25 16:46:56 -0700
committer	Roozbeh Pournader <roozbeh@google.com>	2016-05-25 16:46:56 -0700
commit	74b56175e5d41c1c1dc992208842b5576973d452 (patch)
tree	83100501579fcb63a6cbcb18c7c6dfe856d85a69
parent	77f488345316fba46c271fc04bea470819ae1712 (diff)
download	android_frameworks_minikin-74b56175e5d41c1c1dc992208842b5576973d452.tar.gz android_frameworks_minikin-74b56175e5d41c1c1dc992208842b5576973d452.tar.bz2 android_frameworks_minikin-74b56175e5d41c1c1dc992208842b5576973d452.zip