summaryrefslogtreecommitdiffstats
path: root/libs/minikin/GraphemeBreak.cpp
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2014-06-15 17:33:29 -0700
committerRaph Levien <raph@google.com>2014-06-19 09:49:50 -0700
commit3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c (patch)
treef0de08356315147c20a916d6dc56f66a4774a0e7 /libs/minikin/GraphemeBreak.cpp
parentbb601b67dd05947f92cc23092bfb8a059c2e3377 (diff)
downloadandroid_frameworks_minikin-3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c.tar.gz
android_frameworks_minikin-3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c.tar.bz2
android_frameworks_minikin-3d28a3fcebfed4744d1ef0307a8bdc8fc01e364c.zip
Implement grapheme cluster breaking
This patch includes an implementation of grapheme cluster breaking, which is especially useful for repositioning the cursor for left and right arrow key presses. The implementation is closely based on Unicode TR29, and uses the ICU grapheme cluster break property, but is tailored to more closely match the existing implementation and expected behavior. Part of a fix for b/15653110 Improve behavior of arrow keys in EditText Change-Id: I8eb742f77039c9ab7b2838285018cf8a8fc88343
Diffstat (limited to 'libs/minikin/GraphemeBreak.cpp')
-rw-r--r--libs/minikin/GraphemeBreak.cpp132
1 files changed, 132 insertions, 0 deletions
diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
new file mode 100644
index 0000000..5d8978d
--- /dev/null
+++ b/libs/minikin/GraphemeBreak.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+#include <unicode/uchar.h>
+#include <unicode/utf16.h>
+
+#include <minikin/GraphemeBreak.h>
+
+namespace android {
+
+bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
+ size_t offset) {
+ // This implementation closely follows Unicode Standard Annex #29 on
+ // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
+ // implementing a tailored version of extended grapheme clusters.
+ // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
+
+ // Rule GB1, sot /; Rule GB2, / eot
+ if (offset <= start || offset >= start + count) {
+ return true;
+ }
+ if (U16_IS_TRAIL(buf[offset])) {
+ // Don't break a surrogate pair
+ return false;
+ }
+ uint32_t c1 = 0;
+ uint32_t c2 = 0;
+ size_t offset_back = offset;
+ U16_PREV(buf, start, offset_back, c1);
+ U16_NEXT(buf, offset, count, c2);
+ int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ // Rule GB3, CR x LF
+ if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
+ return false;
+ }
+ // Rule GB4, (Control | CR | LF) /
+ if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
+ return true;
+ }
+ // Rule GB5, / (Control | CR | LF)
+ if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
+ // exclude zero-width control characters from breaking (tailoring of TR29)
+ if (c2 == 0x00ad
+ || (c2 >= 0x200b && c2 <= 0x200f)
+ || (c2 >= 0x2028 && c2 <= 0x202e)
+ || (c2 >= 0x2060 && c2 <= 0x206f)) {
+ return false;
+ }
+ return true;
+ }
+ // Rule GB6, L x ( L | V | LV | LVT )
+ if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
+ return false;
+ }
+ // Rule GB7, ( LV | V ) x ( V | T )
+ if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
+ return false;
+ }
+ // Rule GB8, ( LVT | T ) x T
+ if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) {
+ return false;
+ }
+ // Rule GB8a, Regional_Indicator x Regional_Indicator
+ if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
+ return false;
+ }
+ // Rule GB9, x Extend; Rule GB9a, x SpacingMark
+ if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) {
+ if (c2 == 0xe33) {
+ // most other implementations break THAI CHARACTER SARA AM
+ // (tailoring of TR29)
+ return true;
+ }
+ return false;
+ }
+ // Cluster indic syllables togeter (tailoring of TR29)
+ if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama
+ && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
+ return false;
+ }
+ // Rule GB10, Any / Any
+ return true;
+}
+
+size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
+ size_t offset, MoveOpt opt) {
+ switch (opt) {
+ case AFTER:
+ if (offset < start + count) {
+ offset++;
+ }
+ // fall through
+ case AT_OR_AFTER:
+ while (!isGraphemeBreak(buf, start, count, offset)) {
+ offset++;
+ }
+ break;
+ case BEFORE:
+ if (offset > start) {
+ offset--;
+ }
+ // fall through
+ case AT_OR_BEFORE:
+ while (!isGraphemeBreak(buf, start, count, offset)) {
+ offset--;
+ }
+ break;
+ case AT:
+ if (!isGraphemeBreak(buf, start, count, offset)) {
+ offset = (size_t)-1;
+ }
+ break;
+ }
+ return offset;
+}
+
+} // namespace android