summaryrefslogtreecommitdiffstats
path: root/libs/minikin/GraphemeBreak.cpp
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2016-02-22 13:28:44 -0800
committerRaph Levien <raph@google.com>2016-02-24 12:43:36 -0800
commitadfa580f1f067c846509b4346e5be2cb19177c1b (patch)
tree1f91289c4abbff4b3d971327d0973ae6d46a4aff /libs/minikin/GraphemeBreak.cpp
parent30bf8a7c05925a970f1cab87c38bec8dd97fa82e (diff)
downloadandroid_frameworks_minikin-adfa580f1f067c846509b4346e5be2cb19177c1b.tar.gz
android_frameworks_minikin-adfa580f1f067c846509b4346e5be2cb19177c1b.tar.bz2
android_frameworks_minikin-adfa580f1f067c846509b4346e5be2cb19177c1b.zip
Suppress grapheme cluster breaks in emoji with modifiers
An emoji with a modifier should be treated as a single grapheme, i.e. it should not be possible to place the cursor between the base and modifier. This patch implements the proposed Rule GB9c from Mark Davis's proposal entitled "Fixing breaking properties for emoji", L2/16-011R3. The patch also skips over variation sequences attached the to the preceding character, for computing grapheme cluster boundaries. Bug: 26829153 Change-Id: Iff5bc2bb8e5246223a017c7cf33acfbf63817f16
Diffstat (limited to 'libs/minikin/GraphemeBreak.cpp')
-rw-r--r--libs/minikin/GraphemeBreak.cpp53
1 files changed, 53 insertions, 0 deletions
diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
index 7865d1d..4141091 100644
--- a/libs/minikin/GraphemeBreak.cpp
+++ b/libs/minikin/GraphemeBreak.cpp
@@ -77,6 +77,48 @@ bool isZwjEmoji(uint32_t c) {
|| c == 0x1F5E8); // LEFT SPEECH BUBBLE
}
+// Based on Modifiers from http://www.unicode.org/L2/L2016/16011-data-file.txt
+bool isEmojiModifier(uint32_t c) {
+ return (0x1F3FB <= c && c <= 0x1F3FF);
+}
+
+// Based on Emoji_Modifier_Base from
+// http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
+bool isEmojiBase(uint32_t c) {
+ if (0x261D <= c && c <= 0x270D) {
+ return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
+ } else if (0x1F385 <= c && c <= 0x1F93E) {
+ return (c == 0x1F385
+ || (0x1F3C3 <= c || c <= 0x1F3C4)
+ || (0x1F3CA <= c || c <= 0x1F3CB)
+ || (0x1F442 <= c || c <= 0x1F443)
+ || (0x1F446 <= c || c <= 0x1F450)
+ || (0x1F466 <= c || c <= 0x1F469)
+ || c == 0x1F46E
+ || (0x1F470 <= c || c <= 0x1F478)
+ || c == 0x1F47C
+ || (0x1F481 <= c || c <= 0x1F483)
+ || (0x1F485 <= c || c <= 0x1F487)
+ || c == 0x1F4AA
+ || c == 0x1F575
+ || c == 0x1F57A
+ || c == 0x1F590
+ || (0x1F595 <= c || c <= 0x1F596)
+ || (0x1F645 <= c || c <= 0x1F647)
+ || (0x1F64B <= c || c <= 0x1F64F)
+ || c == 0x1F6A3
+ || (0x1F6B4 <= c || c <= 0x1F6B6)
+ || c == 0x1F6C0
+ || (0x1F918 <= c || c <= 0x1F91E)
+ || c == 0x1F926
+ || c == 0x1F930
+ || (0x1F933 <= c || c <= 0x1F939)
+ || (0x1F93B <= c || c <= 0x1F93E));
+ } else {
+ return false;
+ }
+}
+
bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
size_t offset) {
// This implementation closely follows Unicode Standard Annex #29 on
@@ -165,6 +207,17 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
return false;
}
}
+ // Proposed Rule GB9c from http://www.unicode.org/L2/L2016/16011r3-break-prop-emoji.pdf
+ // E_Base x E_Modifier
+ if (isEmojiModifier(c2)) {
+ if (c1 == 0xFE0F && offset_back > start) {
+ // skip over emoji variation selector
+ U16_PREV(buf, start, offset_back, c1);
+ }
+ if (isEmojiBase(c1)) {
+ return false;
+ }
+ }
// Rule GB10, Any รท Any
return true;
}