diff options
Diffstat (limited to 'libs/minikin/GraphemeBreak.cpp')
-rw-r--r-- | libs/minikin/GraphemeBreak.cpp | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp new file mode 100644 index 0000000..5d8978d --- /dev/null +++ b/libs/minikin/GraphemeBreak.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> +#include <unicode/uchar.h> +#include <unicode/utf16.h> + +#include <minikin/GraphemeBreak.h> + +namespace android { + +bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, + size_t offset) { + // This implementation closely follows Unicode Standard Annex #29 on + // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), + // implementing a tailored version of extended grapheme clusters. + // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. + + // Rule GB1, sot /; Rule GB2, / eot + if (offset <= start || offset >= start + count) { + return true; + } + if (U16_IS_TRAIL(buf[offset])) { + // Don't break a surrogate pair + return false; + } + uint32_t c1 = 0; + uint32_t c2 = 0; + size_t offset_back = offset; + U16_PREV(buf, start, offset_back, c1); + U16_NEXT(buf, offset, count, c2); + int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK); + int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK); + // Rule GB3, CR x LF + if (p1 == U_GCB_CR && p2 == U_GCB_LF) { + return false; + } + // Rule GB4, (Control | CR | LF) / + if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { + return true; + } + // Rule GB5, / (Control | CR | LF) + if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { + // exclude zero-width control characters from breaking (tailoring of TR29) + if (c2 == 0x00ad + || (c2 >= 0x200b && c2 <= 0x200f) + || (c2 >= 0x2028 && c2 <= 0x202e) + || (c2 >= 0x2060 && c2 <= 0x206f)) { + return false; + } + return true; + } + // Rule GB6, L x ( L | V | LV | LVT ) + if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { + return false; + } + // Rule GB7, ( LV | V ) x ( V | T ) + if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { + return false; + } + // Rule GB8, ( LVT | T ) x T + if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) { + return false; + } + // Rule GB8a, Regional_Indicator x Regional_Indicator + if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { + return false; + } + // Rule GB9, x Extend; Rule GB9a, x SpacingMark + if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) { + if (c2 == 0xe33) { + // most other implementations break THAI CHARACTER SARA AM + // (tailoring of TR29) + return true; + } + return false; + } + // Cluster indic syllables togeter (tailoring of TR29) + if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama + && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { + return false; + } + // Rule GB10, Any / Any + return true; +} + +size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count, + size_t offset, MoveOpt opt) { + switch (opt) { + case AFTER: + if (offset < start + count) { + offset++; + } + // fall through + case AT_OR_AFTER: + while (!isGraphemeBreak(buf, start, count, offset)) { + offset++; + } + break; + case BEFORE: + if (offset > start) { + offset--; + } + // fall through + case AT_OR_BEFORE: + while (!isGraphemeBreak(buf, start, count, offset)) { + offset--; + } + break; + case AT: + if (!isGraphemeBreak(buf, start, count, offset)) { + offset = (size_t)-1; + } + break; + } + return offset; +} + +} // namespace android |