diff options
-rw-r--r-- | include/minikin/GraphemeBreak.h | 47 | ||||
-rw-r--r-- | libs/minikin/Android.mk | 1 | ||||
-rw-r--r-- | libs/minikin/GraphemeBreak.cpp | 132 |
3 files changed, 180 insertions, 0 deletions
diff --git a/include/minikin/GraphemeBreak.h b/include/minikin/GraphemeBreak.h new file mode 100644 index 0000000..3120101 --- /dev/null +++ b/include/minikin/GraphemeBreak.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINIKIN_GRAPHEME_BREAK_H +#define MINIKIN_GRAPHEME_BREAK_H + +namespace android { + +class GraphemeBreak { +public: + // These values must be kept in sync with CURSOR_AFTER etc in Paint.java + enum MoveOpt { + AFTER = 0, + AT_OR_AFTER = 1, + BEFORE = 2, + AT_OR_BEFORE = 3, + AT = 4 + }; + + // Determine whether the given offset is a grapheme break. + // This implementation generally follows Unicode TR29 extended + // grapheme break, but with some tweaks to more closely match + // existing implementations. + static bool isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset); + + // Matches Android's Java API. Note, return (size_t)-1 for AT to + // signal non-break because unsigned return type. + static size_t getTextRunCursor(const uint16_t* buf, size_t start, size_t count, + size_t offset, MoveOpt opt); +}; + +} // namespace android + +#endif // MINIKIN_GRAPHEME_BREAK_H
\ No newline at end of file diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk index a1d88c2..fd949c2 100644 --- a/libs/minikin/Android.mk +++ b/libs/minikin/Android.mk @@ -23,6 +23,7 @@ LOCAL_SRC_FILES := \ CssParse.cpp \ FontCollection.cpp \ FontFamily.cpp \ + GraphemeBreak.cpp \ Layout.cpp \ MinikinInternal.cpp \ MinikinRefCounted.cpp \ diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp new file mode 100644 index 0000000..5d8978d --- /dev/null +++ b/libs/minikin/GraphemeBreak.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> +#include <unicode/uchar.h> +#include <unicode/utf16.h> + +#include <minikin/GraphemeBreak.h> + +namespace android { + +bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, + size_t offset) { + // This implementation closely follows Unicode Standard Annex #29 on + // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), + // implementing a tailored version of extended grapheme clusters. + // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. + + // Rule GB1, sot /; Rule GB2, / eot + if (offset <= start || offset >= start + count) { + return true; + } + if (U16_IS_TRAIL(buf[offset])) { + // Don't break a surrogate pair + return false; + } + uint32_t c1 = 0; + uint32_t c2 = 0; + size_t offset_back = offset; + U16_PREV(buf, start, offset_back, c1); + U16_NEXT(buf, offset, count, c2); + int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK); + int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK); + // Rule GB3, CR x LF + if (p1 == U_GCB_CR && p2 == U_GCB_LF) { + return false; + } + // Rule GB4, (Control | CR | LF) / + if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { + return true; + } + // Rule GB5, / (Control | CR | LF) + if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { + // exclude zero-width control characters from breaking (tailoring of TR29) + if (c2 == 0x00ad + || (c2 >= 0x200b && c2 <= 0x200f) + || (c2 >= 0x2028 && c2 <= 0x202e) + || (c2 >= 0x2060 && c2 <= 0x206f)) { + return false; + } + return true; + } + // Rule GB6, L x ( L | V | LV | LVT ) + if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { + return false; + } + // Rule GB7, ( LV | V ) x ( V | T ) + if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { + return false; + } + // Rule GB8, ( LVT | T ) x T + if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) { + return false; + } + // Rule GB8a, Regional_Indicator x Regional_Indicator + if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { + return false; + } + // Rule GB9, x Extend; Rule GB9a, x SpacingMark + if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) { + if (c2 == 0xe33) { + // most other implementations break THAI CHARACTER SARA AM + // (tailoring of TR29) + return true; + } + return false; + } + // Cluster indic syllables togeter (tailoring of TR29) + if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama + && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { + return false; + } + // Rule GB10, Any / Any + return true; +} + +size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count, + size_t offset, MoveOpt opt) { + switch (opt) { + case AFTER: + if (offset < start + count) { + offset++; + } + // fall through + case AT_OR_AFTER: + while (!isGraphemeBreak(buf, start, count, offset)) { + offset++; + } + break; + case BEFORE: + if (offset > start) { + offset--; + } + // fall through + case AT_OR_BEFORE: + while (!isGraphemeBreak(buf, start, count, offset)) { + offset--; + } + break; + case AT: + if (!isGraphemeBreak(buf, start, count, offset)) { + offset = (size_t)-1; + } + break; + } + return offset; +} + +} // namespace android |