From 2e98eb6be123ec64070f7524ff24df538695d7c8 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 29 Oct 2015 14:06:07 -0700 Subject: Tailor grapheme boundaries so sequence emoji are one grapheme - DO NOT MERGE Make it so it's not possible to position the cursor inside an emoji formed by a sequence including zero-width joiners. Bug: 25368653 Change-Id: I67ec0874cd1505f3c82ab91492ffc3d39a52fae6 --- libs/minikin/GraphemeBreak.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp index f8f386c..56d5b23 100644 --- a/libs/minikin/GraphemeBreak.cpp +++ b/libs/minikin/GraphemeBreak.cpp @@ -22,6 +22,19 @@ namespace android { +// Returns true if the character appears before or after zwj in a zwj emoji sequence. See +// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html +bool isZwjEmoji(uint32_t c) { + return (c == 0x2764 // HEAVY BLACK HEART + || c == 0x1F468 // MAN + || c == 0x1F469 // WOMAN + || c == 0x1F48B // KISS MARK + || c == 0x1F466 // BOY + || c == 0x1F467 // GIRL + || c == 0x1F441 // EYE + || c == 0x1F5E8); // LEFT SPEECH BUBBLE +} + bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset) { // This implementation closely follows Unicode Standard Annex #29 on @@ -93,6 +106,19 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { return false; } + // Tailoring: make emoji sequences with ZWJ a single grapheme cluster + if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) { + // look at character before ZWJ to see that both can participate in an emoji zwj sequence + uint32_t c0 = 0; + U16_PREV(buf, start, offset_back, c0); + if (c0 == 0xFE0F && offset_back > start) { + // skip over emoji variation selector + U16_PREV(buf, start, offset_back, c0); + } + if (isZwjEmoji(c0)) { + return false; + } + } // Rule GB10, Any / Any return true; } -- cgit v1.2.3