summaryrefslogtreecommitdiffstats
path: root/libs/minikin/GraphemeBreak.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/minikin/GraphemeBreak.cpp')
-rw-r--r--libs/minikin/GraphemeBreak.cpp132
1 files changed, 132 insertions, 0 deletions
diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
new file mode 100644
index 0000000..5d8978d
--- /dev/null
+++ b/libs/minikin/GraphemeBreak.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+#include <unicode/uchar.h>
+#include <unicode/utf16.h>
+
+#include <minikin/GraphemeBreak.h>
+
+namespace android {
+
+bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
+ size_t offset) {
+ // This implementation closely follows Unicode Standard Annex #29 on
+ // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
+ // implementing a tailored version of extended grapheme clusters.
+ // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
+
+ // Rule GB1, sot /; Rule GB2, / eot
+ if (offset <= start || offset >= start + count) {
+ return true;
+ }
+ if (U16_IS_TRAIL(buf[offset])) {
+ // Don't break a surrogate pair
+ return false;
+ }
+ uint32_t c1 = 0;
+ uint32_t c2 = 0;
+ size_t offset_back = offset;
+ U16_PREV(buf, start, offset_back, c1);
+ U16_NEXT(buf, offset, count, c2);
+ int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ // Rule GB3, CR x LF
+ if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
+ return false;
+ }
+ // Rule GB4, (Control | CR | LF) /
+ if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
+ return true;
+ }
+ // Rule GB5, / (Control | CR | LF)
+ if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
+ // exclude zero-width control characters from breaking (tailoring of TR29)
+ if (c2 == 0x00ad
+ || (c2 >= 0x200b && c2 <= 0x200f)
+ || (c2 >= 0x2028 && c2 <= 0x202e)
+ || (c2 >= 0x2060 && c2 <= 0x206f)) {
+ return false;
+ }
+ return true;
+ }
+ // Rule GB6, L x ( L | V | LV | LVT )
+ if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
+ return false;
+ }
+ // Rule GB7, ( LV | V ) x ( V | T )
+ if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
+ return false;
+ }
+ // Rule GB8, ( LVT | T ) x T
+ if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) {
+ return false;
+ }
+ // Rule GB8a, Regional_Indicator x Regional_Indicator
+ if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
+ return false;
+ }
+ // Rule GB9, x Extend; Rule GB9a, x SpacingMark
+ if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) {
+ if (c2 == 0xe33) {
+ // most other implementations break THAI CHARACTER SARA AM
+ // (tailoring of TR29)
+ return true;
+ }
+ return false;
+ }
+ // Cluster indic syllables togeter (tailoring of TR29)
+ if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama
+ && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
+ return false;
+ }
+ // Rule GB10, Any / Any
+ return true;
+}
+
+size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
+ size_t offset, MoveOpt opt) {
+ switch (opt) {
+ case AFTER:
+ if (offset < start + count) {
+ offset++;
+ }
+ // fall through
+ case AT_OR_AFTER:
+ while (!isGraphemeBreak(buf, start, count, offset)) {
+ offset++;
+ }
+ break;
+ case BEFORE:
+ if (offset > start) {
+ offset--;
+ }
+ // fall through
+ case AT_OR_BEFORE:
+ while (!isGraphemeBreak(buf, start, count, offset)) {
+ offset--;
+ }
+ break;
+ case AT:
+ if (!isGraphemeBreak(buf, start, count, offset)) {
+ offset = (size_t)-1;
+ }
+ break;
+ }
+ return offset;
+}
+
+} // namespace android