diff options
Diffstat (limited to 'libs')
-rw-r--r-- | libs/minikin/Android.mk | 3 | ||||
-rw-r--r-- | libs/minikin/FontCollection.cpp | 7 | ||||
-rw-r--r-- | libs/minikin/GraphemeBreak.cpp | 8 | ||||
-rw-r--r-- | libs/minikin/Hyphenator.cpp | 163 | ||||
-rw-r--r-- | libs/minikin/Layout.cpp | 196 | ||||
-rw-r--r-- | libs/minikin/LineBreaker.cpp | 450 | ||||
-rw-r--r-- | libs/minikin/Measurement.cpp | 122 |
7 files changed, 872 insertions, 77 deletions
diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk index d9c973d..873f279 100644 --- a/libs/minikin/Android.mk +++ b/libs/minikin/Android.mk @@ -22,7 +22,10 @@ LOCAL_SRC_FILES := \ FontCollection.cpp \ FontFamily.cpp \ GraphemeBreak.cpp \ + Hyphenator.cpp \ Layout.cpp \ + LineBreaker.cpp \ + Measurement.cpp \ MinikinInternal.cpp \ MinikinRefCounted.cpp \ MinikinFontFreeType.cpp \ diff --git a/libs/minikin/FontCollection.cpp b/libs/minikin/FontCollection.cpp index e3911c5..b4bfe31 100644 --- a/libs/minikin/FontCollection.cpp +++ b/libs/minikin/FontCollection.cpp @@ -122,7 +122,7 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch, return family; } int score = lang.match(family->lang()) * 2; - if (variant != 0 && variant == family->variant()) { + if (family->variant() == 0 || family->variant() == variant) { score++; } if (score > bestScore) { @@ -152,10 +152,13 @@ const uint32_t NBSP = 0xa0; const uint32_t ZWJ = 0x200c; const uint32_t ZWNJ = 0x200d; const uint32_t KEYCAP = 0x20e3; +const uint32_t HYPHEN = 0x2010; +const uint32_t NB_HYPHEN = 0x2011; // Characters where we want to continue using existing font run instead of // recomputing the best match in the fallback list. -static const uint32_t stickyWhitelist[] = { '!', ',', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, KEYCAP }; +static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, + KEYCAP, HYPHEN, NB_HYPHEN }; static bool isStickyWhitelisted(uint32_t c) { for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp index 5d8978d..f8f386c 100644 --- a/libs/minikin/GraphemeBreak.cpp +++ b/libs/minikin/GraphemeBreak.cpp @@ -41,7 +41,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co uint32_t c2 = 0; size_t offset_back = offset; U16_PREV(buf, start, offset_back, c1); - U16_NEXT(buf, offset, count, c2); + U16_NEXT(buf, offset, start + count, c2); int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK); int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK); // Rule GB3, CR x LF @@ -54,7 +54,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co } // Rule GB5, / (Control | CR | LF) if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { - // exclude zero-width control characters from breaking (tailoring of TR29) + // exclude zero-width control characters from breaking (tailoring of UAX #29) if (c2 == 0x00ad || (c2 >= 0x200b && c2 <= 0x200f) || (c2 >= 0x2028 && c2 <= 0x202e) @@ -83,12 +83,12 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) { if (c2 == 0xe33) { // most other implementations break THAI CHARACTER SARA AM - // (tailoring of TR29) + // (tailoring of UAX #29) return true; } return false; } - // Cluster indic syllables togeter (tailoring of TR29) + // Cluster indic syllables together (tailoring of UAX #29) if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { return false; diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp new file mode 100644 index 0000000..3eb151b --- /dev/null +++ b/libs/minikin/Hyphenator.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> +#include <memory> +#include <algorithm> +#include <string> +#include <unicode/uchar.h> + +// HACK: for reading pattern file +#include <fcntl.h> + +#define LOG_TAG "Minikin" +#include "utils/Log.h" + +#include "minikin/Hyphenator.h" + +using std::vector; + +namespace android { + +static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; + +void Hyphenator::addPattern(const uint16_t* pattern, size_t size) { + vector<uint16_t> word; + vector<uint8_t> result; + + // start by parsing the Liang-format pattern into a word and a result vector, the + // vector right-aligned but without leading zeros. Examples: + // a1bc2d -> abcd [1, 0, 2, 0] + // abc1 -> abc [1] + // 1a2b3c4d5 -> abcd [1, 2, 3, 4, 5] + bool lastWasLetter = false; + bool haveSeenNumber = false; + for (size_t i = 0; i < size; i++) { + uint16_t c = pattern[i]; + if (isdigit(c)) { + result.push_back(c - '0'); + lastWasLetter = false; + haveSeenNumber = true; + } else { + word.push_back(c); + if (lastWasLetter && haveSeenNumber) { + result.push_back(0); + } + lastWasLetter = true; + } + } + if (lastWasLetter) { + result.push_back(0); + } + Trie* t = &root; + for (size_t i = 0; i < word.size(); i++) { + t = &t->succ[word[i]]; + } + t->result = result; +} + +// If any soft hyphen is present in the word, use soft hyphens to decide hyphenation, +// as recommended in UAX #14 (Use of Soft Hyphen) +void Hyphenator::hyphenateSoft(vector<uint8_t>* result, const uint16_t* word, size_t len) { + (*result)[0] = 0; + for (size_t i = 1; i < len; i++) { + (*result)[i] = word[i - 1] == CHAR_SOFT_HYPHEN; + } +} + +void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t len) { + result->clear(); + result->resize(len); + if (len < MIN_PREFIX + MIN_SUFFIX) return; + size_t maxOffset = len - MIN_SUFFIX + 1; + for (size_t i = 0; i < len + 1; i++) { + const Trie* node = &root; + for (size_t j = i; j < len + 2; j++) { + uint16_t c; + if (j == 0 || j == len + 1) { + c = '.'; // word boundary character in pattern data files + } else { + c = word[j - 1]; + if (c == CHAR_SOFT_HYPHEN) { + hyphenateSoft(result, word, len); + return; + } + // TODO: This uses ICU's simple character to character lowercasing, which ignores + // the locale, and ignores cases when lowercasing a character results in more than + // one character. It should be fixed to consider the locale (in order for it to work + // correctly for Turkish and Azerbaijani), as well as support one-to-many, and + // many-to-many case conversions (including non-BMP cases). + if (c < 0x00C0) { // U+00C0 is the lowest uppercase non-ASCII character + // Convert uppercase ASCII to lowercase ASCII, but keep other characters as-is + if (0x0041 <= c && c <= 0x005A) { + c += 0x0020; + } + } else { + c = u_tolower(c); + } + } + auto search = node->succ.find(c); + if (search != node->succ.end()) { + node = &search->second; + } else { + break; + } + if (!node->result.empty()) { + int resultLen = node->result.size(); + int offset = j + 1 - resultLen; + int start = std::max(MIN_PREFIX - offset, 0); + int end = std::min(resultLen, (int)maxOffset - offset); + // TODO performance: this inner loop can profitably be optimized + for (int k = start; k < end; k++) { + (*result)[offset + k] = std::max((*result)[offset + k], node->result[k]); + } +#if 0 + // debug printing of matched patterns + std::string dbg; + for (size_t k = i; k <= j + 1; k++) { + int off = k - j - 2 + resultLen; + if (off >= 0 && node->result[off] != 0) { + dbg.push_back((char)('0' + node->result[off])); + } + if (k < j + 1) { + uint16_t c = (k == 0 || k == len + 1) ? '.' : word[k - 1]; + dbg.push_back((char)c); + } + } + ALOGD("%d:%d %s", i, j, dbg.c_str()); +#endif + } + } + } + // Since the above calculation does not modify values outside + // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0. + for (size_t i = MIN_PREFIX; i < maxOffset; i++) { + (*result)[i] &= 1; + } +} + +Hyphenator* Hyphenator::load(const uint16_t *patternData, size_t size) { + Hyphenator* result = new Hyphenator; + for (size_t i = 0; i < size; i++) { + size_t end = i; + while (patternData[end] != '\n') end++; + result->addPattern(patternData + i, end - i); + i = end; + } + return result; +} + +} // namespace android diff --git a/libs/minikin/Layout.cpp b/libs/minikin/Layout.cpp index db0667b..be29e3c 100644 --- a/libs/minikin/Layout.cpp +++ b/libs/minikin/Layout.cpp @@ -41,20 +41,49 @@ using std::string; using std::vector; -namespace android { +namespace minikin { -// TODO: these should move into the header file, but for now we don't want -// to cause namespace collisions with TextLayout.h -enum { - kBidi_LTR = 0, - kBidi_RTL = 1, - kBidi_Default_LTR = 2, - kBidi_Default_RTL = 3, - kBidi_Force_LTR = 4, - kBidi_Force_RTL = 5, - - kBidi_Mask = 0x7 -}; +Bitmap::Bitmap(int width, int height) : width(width), height(height) { + buf = new uint8_t[width * height](); +} + +Bitmap::~Bitmap() { + delete[] buf; +} + +void Bitmap::writePnm(std::ofstream &o) const { + o << "P5" << std::endl; + o << width << " " << height << std::endl; + o << "255" << std::endl; + o.write((const char *)buf, width * height); + o.close(); +} + +void Bitmap::drawGlyph(const android::GlyphBitmap& bitmap, int x, int y) { + int bmw = bitmap.width; + int bmh = bitmap.height; + x += bitmap.left; + y -= bitmap.top; + int x0 = std::max(0, x); + int x1 = std::min(width, x + bmw); + int y0 = std::max(0, y); + int y1 = std::min(height, y + bmh); + const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x); + uint8_t* dst = buf + y0 * width; + for (int yy = y0; yy < y1; yy++) { + for (int xx = x0; xx < x1; xx++) { + int pixel = (int)dst[xx] + (int)src[xx - x]; + pixel = pixel > 0xff ? 0xff : pixel; + dst[xx] = pixel; + } + src += bmw; + dst += width; + } +} + +} // namespace minikin + +namespace android { const int kDirection_Mask = 0x1; @@ -81,7 +110,7 @@ public: mStart(start), mCount(count), mId(collection->getId()), mStyle(style), mSize(paint.size), mScaleX(paint.scaleX), mSkewX(paint.skewX), mLetterSpacing(paint.letterSpacing), - mPaintFlags(paint.paintFlags), mIsRtl(dir) { + mPaintFlags(paint.paintFlags), mHyphenEdit(paint.hyphenEdit), mIsRtl(dir) { } bool operator==(const LayoutCacheKey &other) const; hash_t hash() const; @@ -115,6 +144,7 @@ private: float mSkewX; float mLetterSpacing; int32_t mPaintFlags; + HyphenEdit mHyphenEdit; bool mIsRtl; // Note: any fields added to MinikinPaint must also be reflected here. // TODO: language matching (possibly integrate into style) @@ -173,13 +203,24 @@ private: static const size_t kMaxEntries = 100; }; +static unsigned int disabledDecomposeCompatibility(hb_unicode_funcs_t*, hb_codepoint_t, + hb_codepoint_t*, void*) { + return 0; +} + class LayoutEngine : public Singleton<LayoutEngine> { public: LayoutEngine() { + unicodeFunctions = hb_unicode_funcs_create(hb_icu_get_unicode_funcs()); + /* Disable the function used for compatibility decomposition */ + hb_unicode_funcs_set_decompose_compatibility_func( + unicodeFunctions, disabledDecomposeCompatibility, NULL, NULL); hbBuffer = hb_buffer_create(); + hb_buffer_set_unicode_funcs(hbBuffer, unicodeFunctions); } hb_buffer_t* hbBuffer; + hb_unicode_funcs_t* unicodeFunctions; LayoutCache layoutCache; HbFaceCache hbFaceCache; }; @@ -196,6 +237,7 @@ bool LayoutCacheKey::operator==(const LayoutCacheKey& other) const { && mSkewX == other.mSkewX && mLetterSpacing == other.mLetterSpacing && mPaintFlags == other.mPaintFlags + && mHyphenEdit == other.mHyphenEdit && mIsRtl == other.mIsRtl && mNchars == other.mNchars && !memcmp(mChars, other.mChars, mNchars * sizeof(uint16_t)); @@ -211,6 +253,7 @@ hash_t LayoutCacheKey::hash() const { hash = JenkinsHashMix(hash, hash_type(mSkewX)); hash = JenkinsHashMix(hash, hash_type(mLetterSpacing)); hash = JenkinsHashMix(hash, hash_type(mPaintFlags)); + hash = JenkinsHashMix(hash, hash_type(mHyphenEdit.hasHyphen())); hash = JenkinsHashMix(hash, hash_type(mIsRtl)); hash = JenkinsHashMixShorts(hash, mChars, mNchars); return JenkinsHashWhiten(hash); @@ -220,44 +263,6 @@ hash_t hash_type(const LayoutCacheKey& key) { return key.hash(); } -Bitmap::Bitmap(int width, int height) : width(width), height(height) { - buf = new uint8_t[width * height](); -} - -Bitmap::~Bitmap() { - delete[] buf; -} - -void Bitmap::writePnm(std::ofstream &o) const { - o << "P5" << std::endl; - o << width << " " << height << std::endl; - o << "255" << std::endl; - o.write((const char *)buf, width * height); - o.close(); -} - -void Bitmap::drawGlyph(const GlyphBitmap& bitmap, int x, int y) { - int bmw = bitmap.width; - int bmh = bitmap.height; - x += bitmap.left; - y -= bitmap.top; - int x0 = std::max(0, x); - int x1 = std::min(width, x + bmw); - int y0 = std::max(0, y); - int y1 = std::min(height, y + bmh); - const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x); - uint8_t* dst = buf + y0 * width; - for (int yy = y0; yy < y1; yy++) { - for (int xx = x0; xx < x1; xx++) { - int pixel = (int)dst[xx] + (int)src[xx - x]; - pixel = pixel > 0xff ? 0xff : pixel; - dst[xx] = pixel; - } - src += bmw; - dst += width; - } -} - void MinikinRect::join(const MinikinRect& r) { if (isEmpty()) { set(r); @@ -312,12 +317,6 @@ static hb_bool_t harfbuzzGetGlyph(hb_font_t* hbFont, void* fontData, hb_codepoin MinikinPaint* paint = reinterpret_cast<MinikinPaint*>(fontData); MinikinFont* font = paint->font; uint32_t glyph_id; - /* HarfBuzz replaces broken input codepoints with (unsigned int) -1. - * Skia expects valid Unicode. - * Replace invalid codepoints with U+FFFD REPLACEMENT CHARACTER. - */ - if (unicode > 0x10FFFF) - unicode = 0xFFFD; bool ok = font->GetGlyph(unicode, &glyph_id); if (ok) { *glyph = glyph_id; @@ -408,7 +407,7 @@ int Layout::findFace(FakedFont face, LayoutContext* ctx) { static hb_script_t codePointToScript(hb_codepoint_t codepoint) { static hb_unicode_funcs_t* u = 0; if (!u) { - u = hb_icu_get_unicode_funcs(); + u = LayoutEngine::getInstance().unicodeFunctions; } return hb_unicode_script(u, codepoint); } @@ -518,6 +517,29 @@ static size_t getNextWordBreak(const uint16_t* chars, size_t offset, size_t len) return len; } +/** + * Disable certain scripts (mostly those with cursive connection) from having letterspacing + * applied. See https://github.com/behdad/harfbuzz/issues/64 for more details. + */ +static bool isScriptOkForLetterspacing(hb_script_t script) { + return !( + script == HB_SCRIPT_ARABIC || + script == HB_SCRIPT_NKO || + script == HB_SCRIPT_PSALTER_PAHLAVI || + script == HB_SCRIPT_MANDAIC || + script == HB_SCRIPT_MONGOLIAN || + script == HB_SCRIPT_PHAGS_PA || + script == HB_SCRIPT_DEVANAGARI || + script == HB_SCRIPT_BENGALI || + script == HB_SCRIPT_GURMUKHI || + script == HB_SCRIPT_MODI || + script == HB_SCRIPT_SHARADA || + script == HB_SCRIPT_SYLOTI_NAGRI || + script == HB_SCRIPT_TIRHUTA || + script == HB_SCRIPT_OGHAM + ); +} + void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize, int bidiFlags, const FontStyle &style, const MinikinPaint &paint) { AutoMutex _l(gMinikinLock); @@ -589,12 +611,15 @@ void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bu void Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t bufSize, bool isRtl, LayoutContext* ctx, size_t dstStart) { + HyphenEdit hyphen = ctx->paint.hyphenEdit; if (!isRtl) { // left to right size_t wordstart = start == bufSize ? start : getPrevWordBreak(buf, start + 1); size_t wordend; for (size_t iter = start; iter < start + count; iter = wordend) { wordend = getNextWordBreak(buf, iter, bufSize); + // Only apply hyphen to the last word in the string. + ctx->paint.hyphenEdit = wordend >= start + count ? hyphen : HyphenEdit(); size_t wordcount = std::min(start + count, wordend) - iter; doLayoutWord(buf + wordstart, iter - wordstart, wordcount, wordend - wordstart, isRtl, ctx, iter - dstStart); @@ -607,6 +632,8 @@ void Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t wordend = end == 0 ? 0 : getNextWordBreak(buf, end - 1, bufSize); for (size_t iter = end; iter > start; iter = wordstart) { wordstart = getPrevWordBreak(buf, iter); + // Only apply hyphen to the last (leftmost) word in the string. + ctx->paint.hyphenEdit = iter == end ? hyphen : HyphenEdit(); size_t bufStart = std::max(start, wordstart); doLayoutWord(buf + wordstart, bufStart - wordstart, iter - bufStart, wordend - wordstart, isRtl, ctx, bufStart - dstStart); @@ -677,15 +704,6 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t double size = ctx->paint.size; double scaleX = ctx->paint.scaleX; - double letterSpace = ctx->paint.letterSpacing * size * scaleX; - double letterSpaceHalfLeft; - if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { - letterSpace = round(letterSpace); - letterSpaceHalfLeft = floor(letterSpace * 0.5); - } else { - letterSpaceHalfLeft = letterSpace * 0.5; - } - double letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft; float x = mAdvance; float y = 0; @@ -715,7 +733,22 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t srunend = srunstart; hb_script_t script = getScriptRun(buf + start, run.end, &srunend); - hb_buffer_reset(buffer); + double letterSpace = 0.0; + double letterSpaceHalfLeft = 0.0; + double letterSpaceHalfRight = 0.0; + + if (ctx->paint.letterSpacing != 0.0 && isScriptOkForLetterspacing(script)) { + letterSpace = ctx->paint.letterSpacing * size * scaleX; + if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { + letterSpace = round(letterSpace); + letterSpaceHalfLeft = floor(letterSpace * 0.5); + } else { + letterSpaceHalfLeft = letterSpace * 0.5; + } + letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft; + } + + hb_buffer_clear_contents(buffer); hb_buffer_set_script(buffer, script); hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR); FontLanguage language = ctx->style.getLanguage(); @@ -724,6 +757,22 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t hb_buffer_set_language(buffer, hb_language_from_string(lang.c_str(), -1)); } hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart); + if (ctx->paint.hyphenEdit.hasHyphen() && srunend > srunstart) { + // TODO: check whether this is really the desired semantics. It could have the + // effect of assigning the hyphen width to a nonspacing mark + unsigned int lastCluster = start + srunend - 1; + + hb_codepoint_t hyphenChar = 0x2010; // HYPHEN + hb_codepoint_t glyph; + // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for HYPHEN. Note + // that we intentionally don't do anything special if the font doesn't have a + // HYPHEN-MINUS either, so a tofu could be shown, hinting towards something + // missing. + if (!hb_font_get_glyph(hbFont, hyphenChar, 0, &glyph)) { + hyphenChar = 0x002D; // HYPHEN-MINUS + } + hb_buffer_add(buffer, hyphenChar, lastCluster); + } hb_shape(hbFont, buffer, features.empty() ? NULL : &features[0], features.size()); unsigned int numGlyphs; hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer, &numGlyphs); @@ -758,7 +807,12 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint); glyphBounds.offset(x + xoff, y + yoff); mBounds.join(glyphBounds); - mAdvances[info[i].cluster - start] += xAdvance; + if (info[i].cluster - start < count) { + mAdvances[info[i].cluster - start] += xAdvance; + } else { + ALOGE("cluster %d (start %d) out of bounds of count %d", + info[i].cluster - start, start, count); + } x += xAdvance; } if (numGlyphs) @@ -806,7 +860,7 @@ void Layout::appendLayout(Layout* src, size_t start) { } } -void Layout::draw(Bitmap* surface, int x0, int y0, float size) const { +void Layout::draw(minikin::Bitmap* surface, int x0, int y0, float size) const { /* TODO: redo as MinikinPaint settings if (mProps.hasTag(minikinHinting)) { diff --git a/libs/minikin/LineBreaker.cpp b/libs/minikin/LineBreaker.cpp new file mode 100644 index 0000000..a832ca2 --- /dev/null +++ b/libs/minikin/LineBreaker.cpp @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define VERBOSE_DEBUG 0 + +#include <limits> + +#define LOG_TAG "Minikin" +#include <cutils/log.h> + +#include <minikin/Layout.h> +#include <minikin/LineBreaker.h> + +using std::vector; + +namespace android { + +const int CHAR_TAB = 0x0009; +const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; + +// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these +// constants are larger than any reasonable actual width score. +const float SCORE_INFTY = std::numeric_limits<float>::max(); +const float SCORE_OVERFULL = 1e12f; +const float SCORE_DESPERATE = 1e10f; + +// Multiplier for hyphen penalty on last line. +const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; +// Penalty assigned to each line break (to try to minimize number of lines) +// TODO: when we implement full justification (so spaces can shrink and stretch), this is +// probably not the most appropriate method. +const float LINE_PENALTY_MULTIPLIER = 2.0f; + +// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for +// unreasonably long words. This is somewhat of a heuristic because extremely long words +// are possible in some languages. This does mean that very long real words can get +// broken by desperate breaks, with no hyphens. +const size_t LONGEST_HYPHENATED_WORD = 45; + +// When the text buffer is within this limit, capacity of vectors is retained at finish(), +// to avoid allocation. +const size_t MAX_TEXT_BUF_RETAIN = 32678; + +void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) { + delete mBreakIterator; + UErrorCode status = U_ZERO_ERROR; + mBreakIterator = icu::BreakIterator::createLineInstance(locale, status); + // TODO: check status + + // TODO: load actual resource dependent on locale; letting Minikin do it is a hack + mHyphenator = hyphenator; +} + +void LineBreaker::setText() { + UErrorCode status = U_ZERO_ERROR; + utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status); + mBreakIterator->setText(&mUText, status); + mBreakIterator->first(); + + // handle initial break here because addStyleRun may never be called + mBreakIterator->next(); + mCandidates.clear(); + Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0}; + mCandidates.push_back(cand); + + // reset greedy breaker state + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + mLastBreak = 0; + mBestBreak = 0; + mBestScore = SCORE_INFTY; + mPreBreak = 0; + mFirstTabIndex = INT_MAX; +} + +void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { + mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); +} + + +void LineBreaker::setIndents(const std::vector<float>& indents) { + mLineWidths.setIndents(indents); +} + +// This function determines whether a character is a space that disappears at end of line. +// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], +// plus '\n'. +// Note: all such characters are in the BMP, so it's ok to use code units for this. +static bool isLineEndSpace(uint16_t c) { + return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || + c == 0x205F || c == 0x3000; +} + +// This function determines whether a character is like U+2010 HYPHEN in +// line breaking and usage: a character immediately after which line breaks +// are allowed, but words containing it should not be automatically +// hyphenated. This is a curated set, created by manually inspecting all +// the characters that have the Unicode line breaking property of BA or HY +// and seeing which ones are hyphens. +static bool isLineBreakingHyphen(uint16_t c) { + return (c == 0x002D || // HYPHEN-MINUS + c == 0x058A || // ARMENIAN HYPHEN + c == 0x05BE || // HEBREW PUNCTUATION MAQAF + c == 0x1400 || // CANADIAN SYLLABICS HYPHEN + c == 0x2010 || // HYPHEN + c == 0x2013 || // EN DASH + c == 0x2027 || // HYPHENATION POINT + c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN + c == 0x2E40); // DOUBLE HYPHEN +} + +// Ordinarily, this method measures the text in the range given. However, when paint +// is nullptr, it assumes the widths have already been calculated and stored in the +// width buffer. +// This method finds the candidate word breaks (using the ICU break iterator) and sends them +// to addCandidate. +float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface, + FontStyle style, size_t start, size_t end, bool isRtl) { + Layout layout; // performance TODO: move layout to self object to reduce allocation cost? + float width = 0.0f; + int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; + + float hyphenPenalty = 0.0; + if (paint != nullptr) { + layout.setFontCollection(typeface); + layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style, + *paint); + layout.getAdvances(mCharWidths.data() + start); + width = layout.getAdvance(); + + // a heuristic that seems to perform well + hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); + if (mHyphenationFrequency == kHyphenationFrequency_Normal) { + hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing + } + + mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); + } + + size_t current = (size_t)mBreakIterator->current(); + size_t wordEnd = start; + size_t lastBreak = start; + ParaWidth lastBreakWidth = mWidth; + ParaWidth postBreak = mWidth; + bool temporarilySkipHyphenation = false; + for (size_t i = start; i < end; i++) { + uint16_t c = mTextBuf[i]; + if (c == CHAR_TAB) { + mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); + if (mFirstTabIndex == INT_MAX) { + mFirstTabIndex = (int)i; + } + // fall back to greedy; other modes don't know how to deal with tabs + mStrategy = kBreakStrategy_Greedy; + } else { + mWidth += mCharWidths[i]; + if (!isLineEndSpace(c)) { + postBreak = mWidth; + wordEnd = i + 1; + } + } + if (i + 1 == current) { + // Override ICU's treatment of soft hyphen as a break opportunity, because we want it + // to be a hyphen break, with penalty and drawing behavior. + if (c != CHAR_SOFT_HYPHEN) { + // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so + // we can pass the whole word down to Hyphenator like the soft hyphen case. + bool wordEndsInHyphen = isLineBreakingHyphen(c); + if (paint != nullptr && mHyphenator != nullptr && + mHyphenationFrequency != kHyphenationFrequency_None && + !wordEndsInHyphen && !temporarilySkipHyphenation && + wordEnd > lastBreak && wordEnd - lastBreak <= LONGEST_HYPHENATED_WORD) { + mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak); + #if VERBOSE_DEBUG + std::string hyphenatedString; + for (size_t j = lastBreak; j < wordEnd; j++) { + if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-'); + // Note: only works with ASCII, should do UTF-8 conversion here + hyphenatedString.push_back(buffer()[j]); + } + ALOGD("hyphenated string: %s", hyphenatedString.c_str()); + #endif + + // measure hyphenated substrings + for (size_t j = lastBreak; j < wordEnd; j++) { + uint8_t hyph = mHyphBuf[j - lastBreak]; + if (hyph) { + paint->hyphenEdit = hyph; + layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak, + mTextBuf.size(), bidiFlags, style, *paint); + ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance(); + paint->hyphenEdit = 0; + layout.doLayout(mTextBuf.data(), j, wordEnd - j, + mTextBuf.size(), bidiFlags, style, *paint); + ParaWidth hyphPreBreak = postBreak - layout.getAdvance(); + addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph); + } + } + } + // Skip hyphenating the next word if and only if the present word ends in a hyphen + temporarilySkipHyphenation = wordEndsInHyphen; + + // Skip break for zero-width characters inside replacement span + if (paint != nullptr || current == end || mCharWidths[current] > 0) { + addWordBreak(current, mWidth, postBreak, 0.0, 0); + } + lastBreak = current; + lastBreakWidth = mWidth; + } + current = (size_t)mBreakIterator->next(); + } + } + + return width; +} + +// add a word break (possibly for a hyphenated fragment), and add desperate breaks if +// needed (ie when word exceeds current line width) +void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, + float penalty, uint8_t hyph) { + Candidate cand; + ParaWidth width = mCandidates.back().preBreak; + if (postBreak - width > currentLineWidth()) { + // Add desperate breaks. + // Note: these breaks are based on the shaping of the (non-broken) original text; they + // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. + size_t i = mCandidates.back().offset; + width += mCharWidths[i++]; + for (; i < offset; i++) { + float w = mCharWidths[i]; + if (w > 0) { + cand.offset = i; + cand.preBreak = width; + cand.postBreak = width; + cand.penalty = SCORE_DESPERATE; + cand.hyphenEdit = 0; +#if VERBOSE_DEBUG + ALOGD("desperate cand: %d %g:%g", + mCandidates.size(), cand.postBreak, cand.preBreak); +#endif + addCandidate(cand); + width += w; + } + } + } + + cand.offset = offset; + cand.preBreak = preBreak; + cand.postBreak = postBreak; + cand.penalty = penalty; + cand.hyphenEdit = hyph; +#if VERBOSE_DEBUG + ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); +#endif + addCandidate(cand); +} + +// TODO performance: could avoid populating mCandidates if greedy only +void LineBreaker::addCandidate(Candidate cand) { + size_t candIndex = mCandidates.size(); + mCandidates.push_back(cand); + if (cand.postBreak - mPreBreak > currentLineWidth()) { + // This break would create an overfull line, pick the best break and break there (greedy) + if (mBestBreak == mLastBreak) { + mBestBreak = candIndex; + } + pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak, + mCandidates[mBestBreak].hyphenEdit); + mBestScore = SCORE_INFTY; +#if VERBOSE_DEBUG + ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); +#endif + mLastBreak = mBestBreak; + mPreBreak = mCandidates[mBestBreak].preBreak; + } + if (cand.penalty <= mBestScore) { + mBestBreak = candIndex; + mBestScore = cand.penalty; + } +} + +void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) { + mBreaks.push_back(offset); + mWidths.push_back(width); + int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; + flags |= hyph; + mFlags.push_back(flags); + mFirstTabIndex = INT_MAX; +} + +void LineBreaker::addReplacement(size_t start, size_t end, float width) { + mCharWidths[start] = width; + std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); + addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); +} + +float LineBreaker::currentLineWidth() const { + return mLineWidths.getLineWidth(mBreaks.size()); +} + +void LineBreaker::computeBreaksGreedy() { + // All breaks but the last have been added in addCandidate already. + size_t nCand = mCandidates.size(); + if (nCand == 1 || mLastBreak != nCand - 1) { + pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0); + // don't need to update mBestScore, because we're done +#if VERBOSE_DEBUG + ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); +#endif + } +} + +// Follow "prev" links in mCandidates array, and copy to result arrays. +void LineBreaker::finishBreaksOptimal() { + // clear existing greedy break result + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + size_t nCand = mCandidates.size(); + size_t prev; + for (size_t i = nCand - 1; i > 0; i = prev) { + prev = mCandidates[i].prev; + mBreaks.push_back(mCandidates[i].offset); + mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); + mFlags.push_back(mCandidates[i].hyphenEdit); + } + std::reverse(mBreaks.begin(), mBreaks.end()); + std::reverse(mWidths.begin(), mWidths.end()); + std::reverse(mFlags.begin(), mFlags.end()); +} + +void LineBreaker::computeBreaksOptimal(bool isRectangle) { + size_t active = 0; + size_t nCand = mCandidates.size(); + float width = mLineWidths.getLineWidth(0); + for (size_t i = 1; i < nCand; i++) { + bool atEnd = i == nCand - 1; + float best = SCORE_INFTY; + size_t bestPrev = 0; + size_t lineNumberLast = 0; + + if (!isRectangle) { + size_t lineNumberLast = mCandidates[active].lineNumber; + width = mLineWidths.getLineWidth(lineNumberLast); + } + ParaWidth leftEdge = mCandidates[i].postBreak - width; + float bestHope = 0; + + for (size_t j = active; j < i; j++) { + if (!isRectangle) { + size_t lineNumber = mCandidates[j].lineNumber; + if (lineNumber != lineNumberLast) { + float widthNew = mLineWidths.getLineWidth(lineNumber); + if (widthNew != width) { + leftEdge = mCandidates[i].postBreak - width; + bestHope = 0; + width = widthNew; + } + lineNumberLast = lineNumber; + } + } + float jScore = mCandidates[j].score; + if (jScore + bestHope >= best) continue; + float delta = mCandidates[j].preBreak - leftEdge; + + // compute width score for line + + // Note: the "bestHope" optimization makes the assumption that, when delta is + // non-negative, widthScore will increase monotonically as successive candidate + // breaks are considered. + float widthScore = 0.0f; + float additionalPenalty = 0.0f; + if (delta < 0) { + widthScore = SCORE_OVERFULL; + } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { + // increase penalty for hyphen on last line + additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; + } else { + widthScore = delta * delta; + } + + if (delta < 0) { + active = j + 1; + } else { + bestHope = widthScore; + } + + float score = jScore + widthScore + additionalPenalty; + if (score <= best) { + best = score; + bestPrev = j; + } + } + mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; + mCandidates[i].prev = bestPrev; + mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; +#if VERBOSE_DEBUG + ALOGD("break %d: score=%g, prev=%d", i, mCandidates[i].score, mCandidates[i].prev); +#endif + } + finishBreaksOptimal(); +} + +size_t LineBreaker::computeBreaks() { + if (mStrategy == kBreakStrategy_Greedy) { + computeBreaksGreedy(); + } else { + computeBreaksOptimal(mLineWidths.isConstant()); + } + return mBreaks.size(); +} + +void LineBreaker::finish() { + mWidth = 0; + mCandidates.clear(); + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { + mTextBuf.clear(); + mTextBuf.shrink_to_fit(); + mCharWidths.clear(); + mCharWidths.shrink_to_fit(); + mHyphBuf.clear(); + mHyphBuf.shrink_to_fit(); + mCandidates.shrink_to_fit(); + mBreaks.shrink_to_fit(); + mWidths.shrink_to_fit(); + mFlags.shrink_to_fit(); + } + mStrategy = kBreakStrategy_Greedy; + mHyphenationFrequency = kHyphenationFrequency_Normal; + mLinePenalty = 0.0f; +} + +} // namespace android diff --git a/libs/minikin/Measurement.cpp b/libs/minikin/Measurement.cpp new file mode 100644 index 0000000..0b68ac5 --- /dev/null +++ b/libs/minikin/Measurement.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "Minikin" +#include <cutils/log.h> + +#include <cmath> +#include <unicode/uchar.h> + +#include <minikin/GraphemeBreak.h> +#include <minikin/Measurement.h> + +namespace android { + +// These could be considered helper methods of layout, but need only be loosely coupled, so +// are separate. + +static float getRunAdvance(Layout& layout, const uint16_t* buf, size_t layoutStart, size_t start, + size_t count, size_t offset) { + float advance = 0.0f; + size_t lastCluster = start; + float clusterWidth = 0.0f; + for (size_t i = start; i < offset; i++) { + float charAdvance = layout.getCharAdvance(i - layoutStart); + if (charAdvance != 0.0f) { + advance += charAdvance; + lastCluster = i; + clusterWidth = charAdvance; + } + } + if (offset < start + count && layout.getCharAdvance(offset - layoutStart) == 0.0f) { + // In the middle of a cluster, distribute width of cluster so that each grapheme cluster + // gets an equal share. + // TODO: get caret information out of font when that's available + size_t nextCluster; + for (nextCluster = offset + 1; nextCluster < start + count; nextCluster++) { + if (layout.getCharAdvance(nextCluster - layoutStart) != 0.0f) break; + } + int numGraphemeClusters = 0; + int numGraphemeClustersAfter = 0; + for (size_t i = lastCluster; i < nextCluster; i++) { + bool isAfter = i >= offset; + if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) { + numGraphemeClusters++; + if (isAfter) { + numGraphemeClustersAfter++; + } + } + } + if (numGraphemeClusters > 0) { + advance -= clusterWidth * numGraphemeClustersAfter / numGraphemeClusters; + } + } + return advance; +} + +float getRunAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, + size_t offset) { + return getRunAdvance(layout, buf, start, start, count, offset); +} + +/** + * Essentially the inverse of getRunAdvance. Compute the value of offset for which the + * measured caret comes closest to the provided advance param, and which is on a grapheme + * cluster boundary. + * + * The actual implementation fast-forwards through clusters to get "close", then does a finer-grain + * search within the cluster and grapheme breaks. + */ +size_t getOffsetForAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, + float advance) { + float x = 0.0f, xLastClusterStart = 0.0f, xSearchStart = 0.0f; + size_t lastClusterStart = start, searchStart = start; + for (size_t i = start; i < start + count; i++) { + if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) { + searchStart = lastClusterStart; + xSearchStart = xLastClusterStart; + } + float width = layout.getCharAdvance(i - start); + if (width != 0.0f) { + lastClusterStart = i; + xLastClusterStart = x; + x += width; + if (x > advance) { + break; + } + } + } + size_t best = searchStart; + float bestDist = FLT_MAX; + for (size_t i = searchStart; i <= start + count; i++) { + if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) { + // "getRunAdvance(layout, buf, start, count, i) - advance" but more efficient + float delta = getRunAdvance(layout, buf, start, searchStart, count - searchStart, i) + + + xSearchStart - advance; + if (std::abs(delta) < bestDist) { + bestDist = std::abs(delta); + best = i; + } + if (delta >= 0.0f) { + break; + } + } + } + return best; +} + +} |