summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2014-06-15 17:33:29 -0700
committerRaph Levien <raph@google.com>2014-06-19 09:49:50 -0700
commit3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c (patch)
treef0de08356315147c20a916d6dc56f66a4774a0e7
parentbb601b67dd05947f92cc23092bfb8a059c2e3377 (diff)
downloadandroid_frameworks_minikin-3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c.tar.gz
android_frameworks_minikin-3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c.tar.bz2
android_frameworks_minikin-3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c.zip
Implement grapheme cluster breaking
This patch includes an implementation of grapheme cluster breaking, which is especially useful for repositioning the cursor for left and right arrow key presses. The implementation is closely based on Unicode TR29, and uses the ICU grapheme cluster break property, but is tailored to more closely match the existing implementation and expected behavior. Part of a fix for b/15653110 Improve behavior of arrow keys in EditText Change-Id: I8eb742f77039c9ab7b2838285018cf8a8fc88343
-rw-r--r--include/minikin/GraphemeBreak.h47
-rw-r--r--libs/minikin/Android.mk1
-rw-r--r--libs/minikin/GraphemeBreak.cpp132
3 files changed, 180 insertions, 0 deletions
diff --git a/include/minikin/GraphemeBreak.h b/include/minikin/GraphemeBreak.h
new file mode 100644
index 0000000..3120101
--- /dev/null
+++ b/include/minikin/GraphemeBreak.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINIKIN_GRAPHEME_BREAK_H
+#define MINIKIN_GRAPHEME_BREAK_H
+
+namespace android {
+
+class GraphemeBreak {
+public:
+ // These values must be kept in sync with CURSOR_AFTER etc in Paint.java
+ enum MoveOpt {
+ AFTER = 0,
+ AT_OR_AFTER = 1,
+ BEFORE = 2,
+ AT_OR_BEFORE = 3,
+ AT = 4
+ };
+
+ // Determine whether the given offset is a grapheme break.
+ // This implementation generally follows Unicode TR29 extended
+ // grapheme break, but with some tweaks to more closely match
+ // existing implementations.
+ static bool isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset);
+
+ // Matches Android's Java API. Note, return (size_t)-1 for AT to
+ // signal non-break because unsigned return type.
+ static size_t getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
+ size_t offset, MoveOpt opt);
+};
+
+} // namespace android
+
+#endif // MINIKIN_GRAPHEME_BREAK_H \ No newline at end of file
diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk
index a1d88c2..fd949c2 100644
--- a/libs/minikin/Android.mk
+++ b/libs/minikin/Android.mk
@@ -23,6 +23,7 @@ LOCAL_SRC_FILES := \
CssParse.cpp \
FontCollection.cpp \
FontFamily.cpp \
+ GraphemeBreak.cpp \
Layout.cpp \
MinikinInternal.cpp \
MinikinRefCounted.cpp \
diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
new file mode 100644
index 0000000..5d8978d
--- /dev/null
+++ b/libs/minikin/GraphemeBreak.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+#include <unicode/uchar.h>
+#include <unicode/utf16.h>
+
+#include <minikin/GraphemeBreak.h>
+
+namespace android {
+
+bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
+ size_t offset) {
+ // This implementation closely follows Unicode Standard Annex #29 on
+ // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
+ // implementing a tailored version of extended grapheme clusters.
+ // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
+
+ // Rule GB1, sot /; Rule GB2, / eot
+ if (offset <= start || offset >= start + count) {
+ return true;
+ }
+ if (U16_IS_TRAIL(buf[offset])) {
+ // Don't break a surrogate pair
+ return false;
+ }
+ uint32_t c1 = 0;
+ uint32_t c2 = 0;
+ size_t offset_back = offset;
+ U16_PREV(buf, start, offset_back, c1);
+ U16_NEXT(buf, offset, count, c2);
+ int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ // Rule GB3, CR x LF
+ if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
+ return false;
+ }
+ // Rule GB4, (Control | CR | LF) /
+ if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
+ return true;
+ }
+ // Rule GB5, / (Control | CR | LF)
+ if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
+ // exclude zero-width control characters from breaking (tailoring of TR29)
+ if (c2 == 0x00ad
+ || (c2 >= 0x200b && c2 <= 0x200f)
+ || (c2 >= 0x2028 && c2 <= 0x202e)
+ || (c2 >= 0x2060 && c2 <= 0x206f)) {
+ return false;
+ }
+ return true;
+ }
+ // Rule GB6, L x ( L | V | LV | LVT )
+ if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
+ return false;
+ }
+ // Rule GB7, ( LV | V ) x ( V | T )
+ if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
+ return false;
+ }
+ // Rule GB8, ( LVT | T ) x T
+ if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) {
+ return false;
+ }
+ // Rule GB8a, Regional_Indicator x Regional_Indicator
+ if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
+ return false;
+ }
+ // Rule GB9, x Extend; Rule GB9a, x SpacingMark
+ if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) {
+ if (c2 == 0xe33) {
+ // most other implementations break THAI CHARACTER SARA AM
+ // (tailoring of TR29)
+ return true;
+ }
+ return false;
+ }
+ // Cluster indic syllables togeter (tailoring of TR29)
+ if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama
+ && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
+ return false;
+ }
+ // Rule GB10, Any / Any
+ return true;
+}
+
+size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
+ size_t offset, MoveOpt opt) {
+ switch (opt) {
+ case AFTER:
+ if (offset < start + count) {
+ offset++;
+ }
+ // fall through
+ case AT_OR_AFTER:
+ while (!isGraphemeBreak(buf, start, count, offset)) {
+ offset++;
+ }
+ break;
+ case BEFORE:
+ if (offset > start) {
+ offset--;
+ }
+ // fall through
+ case AT_OR_BEFORE:
+ while (!isGraphemeBreak(buf, start, count, offset)) {
+ offset--;
+ }
+ break;
+ case AT:
+ if (!isGraphemeBreak(buf, start, count, offset)) {
+ offset = (size_t)-1;
+ }
+ break;
+ }
+ return offset;
+}
+
+} // namespace android