From adfa580f1f067c846509b4346e5be2cb19177c1b Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 22 Feb 2016 13:28:44 -0800 Subject: Suppress grapheme cluster breaks in emoji with modifiers An emoji with a modifier should be treated as a single grapheme, i.e. it should not be possible to place the cursor between the base and modifier. This patch implements the proposed Rule GB9c from Mark Davis's proposal entitled "Fixing breaking properties for emoji", L2/16-011R3. The patch also skips over variation sequences attached the to the preceding character, for computing grapheme cluster boundaries. Bug: 26829153 Change-Id: Iff5bc2bb8e5246223a017c7cf33acfbf63817f16 --- libs/minikin/GraphemeBreak.cpp | 53 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'libs') diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp index 7865d1d..4141091 100644 --- a/libs/minikin/GraphemeBreak.cpp +++ b/libs/minikin/GraphemeBreak.cpp @@ -77,6 +77,48 @@ bool isZwjEmoji(uint32_t c) { || c == 0x1F5E8); // LEFT SPEECH BUBBLE } +// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt +bool isEmojiModifier(uint32_t c) { + return (0x1F3FB <= c && c <= 0x1F3FF); +} + +// Based on Emoji_Modifier_Base from +// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt +bool isEmojiBase(uint32_t c) { + if (0x261D <= c && c <= 0x270D) { + return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D)); + } else if (0x1F385 <= c && c <= 0x1F93E) { + return (c == 0x1F385 + || (0x1F3C3 <= c || c <= 0x1F3C4) + || (0x1F3CA <= c || c <= 0x1F3CB) + || (0x1F442 <= c || c <= 0x1F443) + || (0x1F446 <= c || c <= 0x1F450) + || (0x1F466 <= c || c <= 0x1F469) + || c == 0x1F46E + || (0x1F470 <= c || c <= 0x1F478) + || c == 0x1F47C + || (0x1F481 <= c || c <= 0x1F483) + || (0x1F485 <= c || c <= 0x1F487) + || c == 0x1F4AA + || c == 0x1F575 + || c == 0x1F57A + || c == 0x1F590 + || (0x1F595 <= c || c <= 0x1F596) + || (0x1F645 <= c || c <= 0x1F647) + || (0x1F64B <= c || c <= 0x1F64F) + || c == 0x1F6A3 + || (0x1F6B4 <= c || c <= 0x1F6B6) + || c == 0x1F6C0 + || (0x1F918 <= c || c <= 0x1F91E) + || c == 0x1F926 + || c == 0x1F930 + || (0x1F933 <= c || c <= 0x1F939) + || (0x1F93B <= c || c <= 0x1F93E)); + } else { + return false; + } +} + bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset) { // This implementation closely follows Unicode Standard Annex #29 on @@ -165,6 +207,17 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co return false; } } + // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf + // E_Base x E_Modifier + if (isEmojiModifier(c2)) { + if (c1 == 0xFE0F && offset_back > start) { + // skip over emoji variation selector + U16_PREV(buf, start, offset_back, c1); + } + if (isEmojiBase(c1)) { + return false; + } + } // Rule GB10, Any รท Any return true; } -- cgit v1.2.3