diff options
-rw-r--r-- | include/minikin/Hyphenator.h | 62 | ||||
-rw-r--r-- | include/minikin/Layout.h | 27 | ||||
-rw-r--r-- | include/minikin/LineBreaker.h | 250 | ||||
-rw-r--r-- | include/minikin/Measurement.h | 31 | ||||
-rw-r--r-- | include/minikin/MinikinFont.h | 16 | ||||
-rw-r--r-- | libs/minikin/Android.mk | 3 | ||||
-rw-r--r-- | libs/minikin/FontCollection.cpp | 7 | ||||
-rw-r--r-- | libs/minikin/GraphemeBreak.cpp | 8 | ||||
-rw-r--r-- | libs/minikin/Hyphenator.cpp | 163 | ||||
-rw-r--r-- | libs/minikin/Layout.cpp | 196 | ||||
-rw-r--r-- | libs/minikin/LineBreaker.cpp | 450 | ||||
-rw-r--r-- | libs/minikin/Measurement.cpp | 122 | ||||
-rw-r--r-- | sample/example.cpp | 8 |
13 files changed, 1256 insertions, 87 deletions
diff --git a/include/minikin/Hyphenator.h b/include/minikin/Hyphenator.h new file mode 100644 index 0000000..581c657 --- /dev/null +++ b/include/minikin/Hyphenator.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An implementation of Liang's hyphenation algorithm. + */ + +#include <memory> +#include <unordered_map> + +#ifndef MINIKIN_HYPHENATOR_H +#define MINIKIN_HYPHENATOR_H + +namespace android { + +class Trie { +public: + std::vector<uint8_t> result; + std::unordered_map<uint16_t, Trie> succ; +}; + +class Hyphenator { +public: + // Note: this will also require a locale, for proper case folding behavior + static Hyphenator* load(const uint16_t* patternData, size_t size); + + // Compute the hyphenation of a word, storing the hyphenation in result vector. Each + // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit + // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break, + // but this will be expanded to other edits for nonstandard hyphenation. + // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen". + void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len); + +private: + void addPattern(const uint16_t* pattern, size_t size); + + void hyphenateSoft(std::vector<uint8_t>* result, const uint16_t* word, size_t len); + + // TODO: these should become parameters, as they might vary by locale, screen size, and + // possibly explicit user control. + static const int MIN_PREFIX = 2; + static const int MIN_SUFFIX = 3; + + Trie root; +}; + +} // namespace android + +#endif // MINIKIN_HYPHENATOR_H
\ No newline at end of file diff --git a/include/minikin/Layout.h b/include/minikin/Layout.h index 9f87597..cdf4aac 100644 --- a/include/minikin/Layout.h +++ b/include/minikin/Layout.h @@ -24,7 +24,7 @@ #include <minikin/FontCollection.h> #include <minikin/MinikinFontFreeType.h> -namespace android { +namespace minikin { // The Bitmap class is for debugging. We'll probably move it out // of here into a separate lightweight software rendering module @@ -34,13 +34,17 @@ public: Bitmap(int width, int height); ~Bitmap(); void writePnm(std::ofstream& o) const; - void drawGlyph(const GlyphBitmap& bitmap, int x, int y); + void drawGlyph(const android::GlyphBitmap& bitmap, int x, int y); private: int width; int height; uint8_t* buf; }; +} // namespace minikin + +namespace android { + struct LayoutGlyph { // index into mFaces and mHbFonts vectors. We could imagine // moving this into a run length representation, because it's @@ -57,6 +61,17 @@ struct LayoutGlyph { // Internal state used during layout operation class LayoutContext; +enum { + kBidi_LTR = 0, + kBidi_RTL = 1, + kBidi_Default_LTR = 2, + kBidi_Default_RTL = 3, + kBidi_Force_LTR = 4, + kBidi_Force_RTL = 5, + + kBidi_Mask = 0x7 +}; + // Lifecycle and threading assumptions for Layout: // The object is assumed to be owned by a single thread; multiple threads // may not mutate it at the same time. @@ -78,7 +93,7 @@ public: void doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize, int bidiFlags, const FontStyle &style, const MinikinPaint &paint); - void draw(Bitmap*, int x0, int y0, float size) const; + void draw(minikin::Bitmap*, int x0, int y0, float size) const; // Deprecated. Nont needed. Remove when callers are removed. static void init(); @@ -95,9 +110,13 @@ public: float getAdvance() const; // Get advances, copying into caller-provided buffer. The size of this - // buffer must match the length of the string (nchars arg to doLayout). + // buffer must match the length of the string (count arg to doLayout). void getAdvances(float* advances); + // The i parameter is an offset within the buf relative to start, it is < count, where + // start and count are the parameters to doLayout + float getCharAdvance(size_t i) const { return mAdvances[i]; } + void getBounds(MinikinRect* rect); // Purge all caches, useful in low memory conditions diff --git a/include/minikin/LineBreaker.h b/include/minikin/LineBreaker.h new file mode 100644 index 0000000..e031fb3 --- /dev/null +++ b/include/minikin/LineBreaker.h @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A module for breaking paragraphs into lines, supporting high quality + * hyphenation and justification. + */ + +#ifndef MINIKIN_LINE_BREAKER_H +#define MINIKIN_LINE_BREAKER_H + +#include "unicode/brkiter.h" +#include "unicode/locid.h" +#include <cmath> +#include <vector> +#include "minikin/Hyphenator.h" + +namespace android { + +enum BreakStrategy { + kBreakStrategy_Greedy = 0, + kBreakStrategy_HighQuality = 1, + kBreakStrategy_Balanced = 2 +}; + +enum HyphenationFrequency { + kHyphenationFrequency_None = 0, + kHyphenationFrequency_Normal = 1, + kHyphenationFrequency_Full = 2 +}; + +// TODO: want to generalize to be able to handle array of line widths +class LineWidths { + public: + void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { + mFirstWidth = firstWidth; + mFirstWidthLineCount = firstWidthLineCount; + mRestWidth = restWidth; + } + void setIndents(const std::vector<float>& indents) { + mIndents = indents; + } + bool isConstant() const { + // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen + return mRestWidth == mFirstWidth && mIndents.empty(); + } + float getLineWidth(int line) const { + float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; + if (!mIndents.empty()) { + if ((size_t)line < mIndents.size()) { + width -= mIndents[line]; + } else { + width -= mIndents.back(); + } + } + return width; + } + private: + float mFirstWidth; + int mFirstWidthLineCount; + float mRestWidth; + std::vector<float> mIndents; +}; + +class TabStops { + public: + void set(const int* stops, size_t nStops, int tabWidth) { + if (stops != nullptr) { + mStops.assign(stops, stops + nStops); + } else { + mStops.clear(); + } + mTabWidth = tabWidth; + } + float nextTab(float widthSoFar) const { + for (size_t i = 0; i < mStops.size(); i++) { + if (mStops[i] > widthSoFar) { + return mStops[i]; + } + } + return floor(widthSoFar / mTabWidth + 1) * mTabWidth; + } + private: + std::vector<int> mStops; + int mTabWidth; +}; + +class LineBreaker { + public: + const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java + + ~LineBreaker() { + utext_close(&mUText); + delete mBreakIterator; + } + + // Note: Locale persists across multiple invocations (it is not cleaned up by finish()), + // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always + // be set on the first invocation, but callers are encouraged not to call again unless + // locale has actually changed. + // That logic could be here but it's better for performance that it's upstream because of + // the cost of constructing and comparing the ICU Locale object. + // Note: caller is responsible for managing lifetime of hyphenator + void setLocale(const icu::Locale& locale, Hyphenator* hyphenator); + + void resize(size_t size) { + mTextBuf.resize(size); + mCharWidths.resize(size); + } + + size_t size() const { + return mTextBuf.size(); + } + + uint16_t* buffer() { + return mTextBuf.data(); + } + + float* charWidths() { + return mCharWidths.data(); + } + + // set text to current contents of buffer + void setText(); + + void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth); + + void setIndents(const std::vector<float>& indents); + + void setTabStops(const int* stops, size_t nStops, int tabWidth) { + mTabStops.set(stops, nStops, tabWidth); + } + + BreakStrategy getStrategy() const { return mStrategy; } + + void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } + + HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; } + + void setHyphenationFrequency(HyphenationFrequency frequency) { + mHyphenationFrequency = frequency; + } + + // TODO: this class is actually fairly close to being general and not tied to using + // Minikin to do the shaping of the strings. The main thing that would need to be changed + // is having some kind of callback (or virtual class, or maybe even template), which could + // easily be instantiated with Minikin's Layout. Future work for when needed. + float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style, + size_t start, size_t end, bool isRtl); + + void addReplacement(size_t start, size_t end, float width); + + size_t computeBreaks(); + + const int* getBreaks() const { + return mBreaks.data(); + } + + const float* getWidths() const { + return mWidths.data(); + } + + const int* getFlags() const { + return mFlags.data(); + } + + void finish(); + + private: + // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for + // very large paragraphs, accuracy could degrade using only 32-bit float. Note however + // that float is used extensively on the Java side for this. This is a typedef so that + // we can easily change it based on performance/accuracy tradeoff. + typedef double ParaWidth; + + // A single candidate break + struct Candidate { + size_t offset; // offset to text buffer, in code units + size_t prev; // index to previous break + ParaWidth preBreak; + ParaWidth postBreak; + float penalty; // penalty of this break (for example, hyphen penalty) + float score; // best score found for this break + size_t lineNumber; // only updated for non-constant line widths + uint8_t hyphenEdit; + }; + + float currentLineWidth() const; + + void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty, + uint8_t hyph); + + void addCandidate(Candidate cand); + + // push an actual break to the output. Takes care of setting flags for tab + void pushBreak(int offset, float width, uint8_t hyph); + + void computeBreaksGreedy(); + + void computeBreaksOptimal(bool isRectangular); + + void finishBreaksOptimal(); + + icu::BreakIterator* mBreakIterator = nullptr; + UText mUText = UTEXT_INITIALIZER; + std::vector<uint16_t>mTextBuf; + std::vector<float>mCharWidths; + + Hyphenator* mHyphenator; + std::vector<uint8_t> mHyphBuf; + + // layout parameters + BreakStrategy mStrategy = kBreakStrategy_Greedy; + HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; + LineWidths mLineWidths; + TabStops mTabStops; + + // result of line breaking + std::vector<int> mBreaks; + std::vector<float> mWidths; + std::vector<int> mFlags; + + ParaWidth mWidth = 0; + std::vector<Candidate> mCandidates; + float mLinePenalty = 0.0f; + + // the following are state for greedy breaker (updated while adding style runs) + size_t mLastBreak; + size_t mBestBreak; + float mBestScore; + ParaWidth mPreBreak; // prebreak of last break + int mFirstTabIndex; +}; + +} // namespace android + +#endif // MINIKIN_LINE_BREAKER_H diff --git a/include/minikin/Measurement.h b/include/minikin/Measurement.h new file mode 100644 index 0000000..fc47fa3 --- /dev/null +++ b/include/minikin/Measurement.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINIKIN_MEASUREMENT_H +#define MINIKIN_MEASUREMENT_H + +#include <minikin/Layout.h> + +namespace android { + +float getRunAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, size_t offset); + +size_t getOffsetForAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, + float advance); + +} + +#endif // MINIKIN_MEASUREMENT_H diff --git a/include/minikin/MinikinFont.h b/include/minikin/MinikinFont.h index 3f07589..7f65cd7 100644 --- a/include/minikin/MinikinFont.h +++ b/include/minikin/MinikinFont.h @@ -27,6 +27,19 @@ namespace android { +// The hyphen edit represents an edit to the string when a word is +// hyphenated. The most common hyphen edit is adding a "-" at the end +// of a syllable, but nonstandard hyphenation allows for more choices. +class HyphenEdit { +public: + HyphenEdit() : hyphen(0) { } + HyphenEdit(uint32_t hyphenInt) : hyphen(hyphenInt) { } + bool hasHyphen() const { return hyphen != 0; } + bool operator==(const HyphenEdit &other) const { return hyphen == other.hyphen; } +private: + uint32_t hyphen; +}; + class MinikinFont; // Possibly move into own .h file? @@ -36,7 +49,7 @@ struct MinikinPaint { fakery(), fontFeatureSettings() { } bool skipCache() const { - return !fontFeatureSettings.empty(); + return !fontFeatureSettings.empty(); } MinikinFont *font; @@ -46,6 +59,7 @@ struct MinikinPaint { float letterSpacing; uint32_t paintFlags; FontFakery fakery; + HyphenEdit hyphenEdit; std::string fontFeatureSettings; }; diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk index d9c973d..873f279 100644 --- a/libs/minikin/Android.mk +++ b/libs/minikin/Android.mk @@ -22,7 +22,10 @@ LOCAL_SRC_FILES := \ FontCollection.cpp \ FontFamily.cpp \ GraphemeBreak.cpp \ + Hyphenator.cpp \ Layout.cpp \ + LineBreaker.cpp \ + Measurement.cpp \ MinikinInternal.cpp \ MinikinRefCounted.cpp \ MinikinFontFreeType.cpp \ diff --git a/libs/minikin/FontCollection.cpp b/libs/minikin/FontCollection.cpp index e3911c5..b4bfe31 100644 --- a/libs/minikin/FontCollection.cpp +++ b/libs/minikin/FontCollection.cpp @@ -122,7 +122,7 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch, return family; } int score = lang.match(family->lang()) * 2; - if (variant != 0 && variant == family->variant()) { + if (family->variant() == 0 || family->variant() == variant) { score++; } if (score > bestScore) { @@ -152,10 +152,13 @@ const uint32_t NBSP = 0xa0; const uint32_t ZWJ = 0x200c; const uint32_t ZWNJ = 0x200d; const uint32_t KEYCAP = 0x20e3; +const uint32_t HYPHEN = 0x2010; +const uint32_t NB_HYPHEN = 0x2011; // Characters where we want to continue using existing font run instead of // recomputing the best match in the fallback list. -static const uint32_t stickyWhitelist[] = { '!', ',', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, KEYCAP }; +static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, + KEYCAP, HYPHEN, NB_HYPHEN }; static bool isStickyWhitelisted(uint32_t c) { for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp index 5d8978d..f8f386c 100644 --- a/libs/minikin/GraphemeBreak.cpp +++ b/libs/minikin/GraphemeBreak.cpp @@ -41,7 +41,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co uint32_t c2 = 0; size_t offset_back = offset; U16_PREV(buf, start, offset_back, c1); - U16_NEXT(buf, offset, count, c2); + U16_NEXT(buf, offset, start + count, c2); int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK); int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK); // Rule GB3, CR x LF @@ -54,7 +54,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co } // Rule GB5, / (Control | CR | LF) if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { - // exclude zero-width control characters from breaking (tailoring of TR29) + // exclude zero-width control characters from breaking (tailoring of UAX #29) if (c2 == 0x00ad || (c2 >= 0x200b && c2 <= 0x200f) || (c2 >= 0x2028 && c2 <= 0x202e) @@ -83,12 +83,12 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) { if (c2 == 0xe33) { // most other implementations break THAI CHARACTER SARA AM - // (tailoring of TR29) + // (tailoring of UAX #29) return true; } return false; } - // Cluster indic syllables togeter (tailoring of TR29) + // Cluster indic syllables together (tailoring of UAX #29) if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { return false; diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp new file mode 100644 index 0000000..3eb151b --- /dev/null +++ b/libs/minikin/Hyphenator.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> +#include <memory> +#include <algorithm> +#include <string> +#include <unicode/uchar.h> + +// HACK: for reading pattern file +#include <fcntl.h> + +#define LOG_TAG "Minikin" +#include "utils/Log.h" + +#include "minikin/Hyphenator.h" + +using std::vector; + +namespace android { + +static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; + +void Hyphenator::addPattern(const uint16_t* pattern, size_t size) { + vector<uint16_t> word; + vector<uint8_t> result; + + // start by parsing the Liang-format pattern into a word and a result vector, the + // vector right-aligned but without leading zeros. Examples: + // a1bc2d -> abcd [1, 0, 2, 0] + // abc1 -> abc [1] + // 1a2b3c4d5 -> abcd [1, 2, 3, 4, 5] + bool lastWasLetter = false; + bool haveSeenNumber = false; + for (size_t i = 0; i < size; i++) { + uint16_t c = pattern[i]; + if (isdigit(c)) { + result.push_back(c - '0'); + lastWasLetter = false; + haveSeenNumber = true; + } else { + word.push_back(c); + if (lastWasLetter && haveSeenNumber) { + result.push_back(0); + } + lastWasLetter = true; + } + } + if (lastWasLetter) { + result.push_back(0); + } + Trie* t = &root; + for (size_t i = 0; i < word.size(); i++) { + t = &t->succ[word[i]]; + } + t->result = result; +} + +// If any soft hyphen is present in the word, use soft hyphens to decide hyphenation, +// as recommended in UAX #14 (Use of Soft Hyphen) +void Hyphenator::hyphenateSoft(vector<uint8_t>* result, const uint16_t* word, size_t len) { + (*result)[0] = 0; + for (size_t i = 1; i < len; i++) { + (*result)[i] = word[i - 1] == CHAR_SOFT_HYPHEN; + } +} + +void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t len) { + result->clear(); + result->resize(len); + if (len < MIN_PREFIX + MIN_SUFFIX) return; + size_t maxOffset = len - MIN_SUFFIX + 1; + for (size_t i = 0; i < len + 1; i++) { + const Trie* node = &root; + for (size_t j = i; j < len + 2; j++) { + uint16_t c; + if (j == 0 || j == len + 1) { + c = '.'; // word boundary character in pattern data files + } else { + c = word[j - 1]; + if (c == CHAR_SOFT_HYPHEN) { + hyphenateSoft(result, word, len); + return; + } + // TODO: This uses ICU's simple character to character lowercasing, which ignores + // the locale, and ignores cases when lowercasing a character results in more than + // one character. It should be fixed to consider the locale (in order for it to work + // correctly for Turkish and Azerbaijani), as well as support one-to-many, and + // many-to-many case conversions (including non-BMP cases). + if (c < 0x00C0) { // U+00C0 is the lowest uppercase non-ASCII character + // Convert uppercase ASCII to lowercase ASCII, but keep other characters as-is + if (0x0041 <= c && c <= 0x005A) { + c += 0x0020; + } + } else { + c = u_tolower(c); + } + } + auto search = node->succ.find(c); + if (search != node->succ.end()) { + node = &search->second; + } else { + break; + } + if (!node->result.empty()) { + int resultLen = node->result.size(); + int offset = j + 1 - resultLen; + int start = std::max(MIN_PREFIX - offset, 0); + int end = std::min(resultLen, (int)maxOffset - offset); + // TODO performance: this inner loop can profitably be optimized + for (int k = start; k < end; k++) { + (*result)[offset + k] = std::max((*result)[offset + k], node->result[k]); + } +#if 0 + // debug printing of matched patterns + std::string dbg; + for (size_t k = i; k <= j + 1; k++) { + int off = k - j - 2 + resultLen; + if (off >= 0 && node->result[off] != 0) { + dbg.push_back((char)('0' + node->result[off])); + } + if (k < j + 1) { + uint16_t c = (k == 0 || k == len + 1) ? '.' : word[k - 1]; + dbg.push_back((char)c); + } + } + ALOGD("%d:%d %s", i, j, dbg.c_str()); +#endif + } + } + } + // Since the above calculation does not modify values outside + // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0. + for (size_t i = MIN_PREFIX; i < maxOffset; i++) { + (*result)[i] &= 1; + } +} + +Hyphenator* Hyphenator::load(const uint16_t *patternData, size_t size) { + Hyphenator* result = new Hyphenator; + for (size_t i = 0; i < size; i++) { + size_t end = i; + while (patternData[end] != '\n') end++; + result->addPattern(patternData + i, end - i); + i = end; + } + return result; +} + +} // namespace android diff --git a/libs/minikin/Layout.cpp b/libs/minikin/Layout.cpp index db0667b..be29e3c 100644 --- a/libs/minikin/Layout.cpp +++ b/libs/minikin/Layout.cpp @@ -41,20 +41,49 @@ using std::string; using std::vector; -namespace android { +namespace minikin { -// TODO: these should move into the header file, but for now we don't want -// to cause namespace collisions with TextLayout.h -enum { - kBidi_LTR = 0, - kBidi_RTL = 1, - kBidi_Default_LTR = 2, - kBidi_Default_RTL = 3, - kBidi_Force_LTR = 4, - kBidi_Force_RTL = 5, - - kBidi_Mask = 0x7 -}; +Bitmap::Bitmap(int width, int height) : width(width), height(height) { + buf = new uint8_t[width * height](); +} + +Bitmap::~Bitmap() { + delete[] buf; +} + +void Bitmap::writePnm(std::ofstream &o) const { + o << "P5" << std::endl; + o << width << " " << height << std::endl; + o << "255" << std::endl; + o.write((const char *)buf, width * height); + o.close(); +} + +void Bitmap::drawGlyph(const android::GlyphBitmap& bitmap, int x, int y) { + int bmw = bitmap.width; + int bmh = bitmap.height; + x += bitmap.left; + y -= bitmap.top; + int x0 = std::max(0, x); + int x1 = std::min(width, x + bmw); + int y0 = std::max(0, y); + int y1 = std::min(height, y + bmh); + const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x); + uint8_t* dst = buf + y0 * width; + for (int yy = y0; yy < y1; yy++) { + for (int xx = x0; xx < x1; xx++) { + int pixel = (int)dst[xx] + (int)src[xx - x]; + pixel = pixel > 0xff ? 0xff : pixel; + dst[xx] = pixel; + } + src += bmw; + dst += width; + } +} + +} // namespace minikin + +namespace android { const int kDirection_Mask = 0x1; @@ -81,7 +110,7 @@ public: mStart(start), mCount(count), mId(collection->getId()), mStyle(style), mSize(paint.size), mScaleX(paint.scaleX), mSkewX(paint.skewX), mLetterSpacing(paint.letterSpacing), - mPaintFlags(paint.paintFlags), mIsRtl(dir) { + mPaintFlags(paint.paintFlags), mHyphenEdit(paint.hyphenEdit), mIsRtl(dir) { } bool operator==(const LayoutCacheKey &other) const; hash_t hash() const; @@ -115,6 +144,7 @@ private: float mSkewX; float mLetterSpacing; int32_t mPaintFlags; + HyphenEdit mHyphenEdit; bool mIsRtl; // Note: any fields added to MinikinPaint must also be reflected here. // TODO: language matching (possibly integrate into style) @@ -173,13 +203,24 @@ private: static const size_t kMaxEntries = 100; }; +static unsigned int disabledDecomposeCompatibility(hb_unicode_funcs_t*, hb_codepoint_t, + hb_codepoint_t*, void*) { + return 0; +} + class LayoutEngine : public Singleton<LayoutEngine> { public: LayoutEngine() { + unicodeFunctions = hb_unicode_funcs_create(hb_icu_get_unicode_funcs()); + /* Disable the function used for compatibility decomposition */ + hb_unicode_funcs_set_decompose_compatibility_func( + unicodeFunctions, disabledDecomposeCompatibility, NULL, NULL); hbBuffer = hb_buffer_create(); + hb_buffer_set_unicode_funcs(hbBuffer, unicodeFunctions); } hb_buffer_t* hbBuffer; + hb_unicode_funcs_t* unicodeFunctions; LayoutCache layoutCache; HbFaceCache hbFaceCache; }; @@ -196,6 +237,7 @@ bool LayoutCacheKey::operator==(const LayoutCacheKey& other) const { && mSkewX == other.mSkewX && mLetterSpacing == other.mLetterSpacing && mPaintFlags == other.mPaintFlags + && mHyphenEdit == other.mHyphenEdit && mIsRtl == other.mIsRtl && mNchars == other.mNchars && !memcmp(mChars, other.mChars, mNchars * sizeof(uint16_t)); @@ -211,6 +253,7 @@ hash_t LayoutCacheKey::hash() const { hash = JenkinsHashMix(hash, hash_type(mSkewX)); hash = JenkinsHashMix(hash, hash_type(mLetterSpacing)); hash = JenkinsHashMix(hash, hash_type(mPaintFlags)); + hash = JenkinsHashMix(hash, hash_type(mHyphenEdit.hasHyphen())); hash = JenkinsHashMix(hash, hash_type(mIsRtl)); hash = JenkinsHashMixShorts(hash, mChars, mNchars); return JenkinsHashWhiten(hash); @@ -220,44 +263,6 @@ hash_t hash_type(const LayoutCacheKey& key) { return key.hash(); } -Bitmap::Bitmap(int width, int height) : width(width), height(height) { - buf = new uint8_t[width * height](); -} - -Bitmap::~Bitmap() { - delete[] buf; -} - -void Bitmap::writePnm(std::ofstream &o) const { - o << "P5" << std::endl; - o << width << " " << height << std::endl; - o << "255" << std::endl; - o.write((const char *)buf, width * height); - o.close(); -} - -void Bitmap::drawGlyph(const GlyphBitmap& bitmap, int x, int y) { - int bmw = bitmap.width; - int bmh = bitmap.height; - x += bitmap.left; - y -= bitmap.top; - int x0 = std::max(0, x); - int x1 = std::min(width, x + bmw); - int y0 = std::max(0, y); - int y1 = std::min(height, y + bmh); - const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x); - uint8_t* dst = buf + y0 * width; - for (int yy = y0; yy < y1; yy++) { - for (int xx = x0; xx < x1; xx++) { - int pixel = (int)dst[xx] + (int)src[xx - x]; - pixel = pixel > 0xff ? 0xff : pixel; - dst[xx] = pixel; - } - src += bmw; - dst += width; - } -} - void MinikinRect::join(const MinikinRect& r) { if (isEmpty()) { set(r); @@ -312,12 +317,6 @@ static hb_bool_t harfbuzzGetGlyph(hb_font_t* hbFont, void* fontData, hb_codepoin MinikinPaint* paint = reinterpret_cast<MinikinPaint*>(fontData); MinikinFont* font = paint->font; uint32_t glyph_id; - /* HarfBuzz replaces broken input codepoints with (unsigned int) -1. - * Skia expects valid Unicode. - * Replace invalid codepoints with U+FFFD REPLACEMENT CHARACTER. - */ - if (unicode > 0x10FFFF) - unicode = 0xFFFD; bool ok = font->GetGlyph(unicode, &glyph_id); if (ok) { *glyph = glyph_id; @@ -408,7 +407,7 @@ int Layout::findFace(FakedFont face, LayoutContext* ctx) { static hb_script_t codePointToScript(hb_codepoint_t codepoint) { static hb_unicode_funcs_t* u = 0; if (!u) { - u = hb_icu_get_unicode_funcs(); + u = LayoutEngine::getInstance().unicodeFunctions; } return hb_unicode_script(u, codepoint); } @@ -518,6 +517,29 @@ static size_t getNextWordBreak(const uint16_t* chars, size_t offset, size_t len) return len; } +/** + * Disable certain scripts (mostly those with cursive connection) from having letterspacing + * applied. See https://github.com/behdad/harfbuzz/issues/64 for more details. + */ +static bool isScriptOkForLetterspacing(hb_script_t script) { + return !( + script == HB_SCRIPT_ARABIC || + script == HB_SCRIPT_NKO || + script == HB_SCRIPT_PSALTER_PAHLAVI || + script == HB_SCRIPT_MANDAIC || + script == HB_SCRIPT_MONGOLIAN || + script == HB_SCRIPT_PHAGS_PA || + script == HB_SCRIPT_DEVANAGARI || + script == HB_SCRIPT_BENGALI || + script == HB_SCRIPT_GURMUKHI || + script == HB_SCRIPT_MODI || + script == HB_SCRIPT_SHARADA || + script == HB_SCRIPT_SYLOTI_NAGRI || + script == HB_SCRIPT_TIRHUTA || + script == HB_SCRIPT_OGHAM + ); +} + void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize, int bidiFlags, const FontStyle &style, const MinikinPaint &paint) { AutoMutex _l(gMinikinLock); @@ -589,12 +611,15 @@ void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bu void Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t bufSize, bool isRtl, LayoutContext* ctx, size_t dstStart) { + HyphenEdit hyphen = ctx->paint.hyphenEdit; if (!isRtl) { // left to right size_t wordstart = start == bufSize ? start : getPrevWordBreak(buf, start + 1); size_t wordend; for (size_t iter = start; iter < start + count; iter = wordend) { wordend = getNextWordBreak(buf, iter, bufSize); + // Only apply hyphen to the last word in the string. + ctx->paint.hyphenEdit = wordend >= start + count ? hyphen : HyphenEdit(); size_t wordcount = std::min(start + count, wordend) - iter; doLayoutWord(buf + wordstart, iter - wordstart, wordcount, wordend - wordstart, isRtl, ctx, iter - dstStart); @@ -607,6 +632,8 @@ void Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t wordend = end == 0 ? 0 : getNextWordBreak(buf, end - 1, bufSize); for (size_t iter = end; iter > start; iter = wordstart) { wordstart = getPrevWordBreak(buf, iter); + // Only apply hyphen to the last (leftmost) word in the string. + ctx->paint.hyphenEdit = iter == end ? hyphen : HyphenEdit(); size_t bufStart = std::max(start, wordstart); doLayoutWord(buf + wordstart, bufStart - wordstart, iter - bufStart, wordend - wordstart, isRtl, ctx, bufStart - dstStart); @@ -677,15 +704,6 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t double size = ctx->paint.size; double scaleX = ctx->paint.scaleX; - double letterSpace = ctx->paint.letterSpacing * size * scaleX; - double letterSpaceHalfLeft; - if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { - letterSpace = round(letterSpace); - letterSpaceHalfLeft = floor(letterSpace * 0.5); - } else { - letterSpaceHalfLeft = letterSpace * 0.5; - } - double letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft; float x = mAdvance; float y = 0; @@ -715,7 +733,22 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t srunend = srunstart; hb_script_t script = getScriptRun(buf + start, run.end, &srunend); - hb_buffer_reset(buffer); + double letterSpace = 0.0; + double letterSpaceHalfLeft = 0.0; + double letterSpaceHalfRight = 0.0; + + if (ctx->paint.letterSpacing != 0.0 && isScriptOkForLetterspacing(script)) { + letterSpace = ctx->paint.letterSpacing * size * scaleX; + if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { + letterSpace = round(letterSpace); + letterSpaceHalfLeft = floor(letterSpace * 0.5); + } else { + letterSpaceHalfLeft = letterSpace * 0.5; + } + letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft; + } + + hb_buffer_clear_contents(buffer); hb_buffer_set_script(buffer, script); hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR); FontLanguage language = ctx->style.getLanguage(); @@ -724,6 +757,22 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t hb_buffer_set_language(buffer, hb_language_from_string(lang.c_str(), -1)); } hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart); + if (ctx->paint.hyphenEdit.hasHyphen() && srunend > srunstart) { + // TODO: check whether this is really the desired semantics. It could have the + // effect of assigning the hyphen width to a nonspacing mark + unsigned int lastCluster = start + srunend - 1; + + hb_codepoint_t hyphenChar = 0x2010; // HYPHEN + hb_codepoint_t glyph; + // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for HYPHEN. Note + // that we intentionally don't do anything special if the font doesn't have a + // HYPHEN-MINUS either, so a tofu could be shown, hinting towards something + // missing. + if (!hb_font_get_glyph(hbFont, hyphenChar, 0, &glyph)) { + hyphenChar = 0x002D; // HYPHEN-MINUS + } + hb_buffer_add(buffer, hyphenChar, lastCluster); + } hb_shape(hbFont, buffer, features.empty() ? NULL : &features[0], features.size()); unsigned int numGlyphs; hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer, &numGlyphs); @@ -758,7 +807,12 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint); glyphBounds.offset(x + xoff, y + yoff); mBounds.join(glyphBounds); - mAdvances[info[i].cluster - start] += xAdvance; + if (info[i].cluster - start < count) { + mAdvances[info[i].cluster - start] += xAdvance; + } else { + ALOGE("cluster %d (start %d) out of bounds of count %d", + info[i].cluster - start, start, count); + } x += xAdvance; } if (numGlyphs) @@ -806,7 +860,7 @@ void Layout::appendLayout(Layout* src, size_t start) { } } -void Layout::draw(Bitmap* surface, int x0, int y0, float size) const { +void Layout::draw(minikin::Bitmap* surface, int x0, int y0, float size) const { /* TODO: redo as MinikinPaint settings if (mProps.hasTag(minikinHinting)) { diff --git a/libs/minikin/LineBreaker.cpp b/libs/minikin/LineBreaker.cpp new file mode 100644 index 0000000..a832ca2 --- /dev/null +++ b/libs/minikin/LineBreaker.cpp @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define VERBOSE_DEBUG 0 + +#include <limits> + +#define LOG_TAG "Minikin" +#include <cutils/log.h> + +#include <minikin/Layout.h> +#include <minikin/LineBreaker.h> + +using std::vector; + +namespace android { + +const int CHAR_TAB = 0x0009; +const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; + +// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these +// constants are larger than any reasonable actual width score. +const float SCORE_INFTY = std::numeric_limits<float>::max(); +const float SCORE_OVERFULL = 1e12f; +const float SCORE_DESPERATE = 1e10f; + +// Multiplier for hyphen penalty on last line. +const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; +// Penalty assigned to each line break (to try to minimize number of lines) +// TODO: when we implement full justification (so spaces can shrink and stretch), this is +// probably not the most appropriate method. +const float LINE_PENALTY_MULTIPLIER = 2.0f; + +// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for +// unreasonably long words. This is somewhat of a heuristic because extremely long words +// are possible in some languages. This does mean that very long real words can get +// broken by desperate breaks, with no hyphens. +const size_t LONGEST_HYPHENATED_WORD = 45; + +// When the text buffer is within this limit, capacity of vectors is retained at finish(), +// to avoid allocation. +const size_t MAX_TEXT_BUF_RETAIN = 32678; + +void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) { + delete mBreakIterator; + UErrorCode status = U_ZERO_ERROR; + mBreakIterator = icu::BreakIterator::createLineInstance(locale, status); + // TODO: check status + + // TODO: load actual resource dependent on locale; letting Minikin do it is a hack + mHyphenator = hyphenator; +} + +void LineBreaker::setText() { + UErrorCode status = U_ZERO_ERROR; + utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status); + mBreakIterator->setText(&mUText, status); + mBreakIterator->first(); + + // handle initial break here because addStyleRun may never be called + mBreakIterator->next(); + mCandidates.clear(); + Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0}; + mCandidates.push_back(cand); + + // reset greedy breaker state + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + mLastBreak = 0; + mBestBreak = 0; + mBestScore = SCORE_INFTY; + mPreBreak = 0; + mFirstTabIndex = INT_MAX; +} + +void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { + mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); +} + + +void LineBreaker::setIndents(const std::vector<float>& indents) { + mLineWidths.setIndents(indents); +} + +// This function determines whether a character is a space that disappears at end of line. +// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], +// plus '\n'. +// Note: all such characters are in the BMP, so it's ok to use code units for this. +static bool isLineEndSpace(uint16_t c) { + return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || + c == 0x205F || c == 0x3000; +} + +// This function determines whether a character is like U+2010 HYPHEN in +// line breaking and usage: a character immediately after which line breaks +// are allowed, but words containing it should not be automatically +// hyphenated. This is a curated set, created by manually inspecting all +// the characters that have the Unicode line breaking property of BA or HY +// and seeing which ones are hyphens. +static bool isLineBreakingHyphen(uint16_t c) { + return (c == 0x002D || // HYPHEN-MINUS + c == 0x058A || // ARMENIAN HYPHEN + c == 0x05BE || // HEBREW PUNCTUATION MAQAF + c == 0x1400 || // CANADIAN SYLLABICS HYPHEN + c == 0x2010 || // HYPHEN + c == 0x2013 || // EN DASH + c == 0x2027 || // HYPHENATION POINT + c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN + c == 0x2E40); // DOUBLE HYPHEN +} + +// Ordinarily, this method measures the text in the range given. However, when paint +// is nullptr, it assumes the widths have already been calculated and stored in the +// width buffer. +// This method finds the candidate word breaks (using the ICU break iterator) and sends them +// to addCandidate. +float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface, + FontStyle style, size_t start, size_t end, bool isRtl) { + Layout layout; // performance TODO: move layout to self object to reduce allocation cost? + float width = 0.0f; + int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; + + float hyphenPenalty = 0.0; + if (paint != nullptr) { + layout.setFontCollection(typeface); + layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style, + *paint); + layout.getAdvances(mCharWidths.data() + start); + width = layout.getAdvance(); + + // a heuristic that seems to perform well + hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); + if (mHyphenationFrequency == kHyphenationFrequency_Normal) { + hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing + } + + mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); + } + + size_t current = (size_t)mBreakIterator->current(); + size_t wordEnd = start; + size_t lastBreak = start; + ParaWidth lastBreakWidth = mWidth; + ParaWidth postBreak = mWidth; + bool temporarilySkipHyphenation = false; + for (size_t i = start; i < end; i++) { + uint16_t c = mTextBuf[i]; + if (c == CHAR_TAB) { + mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); + if (mFirstTabIndex == INT_MAX) { + mFirstTabIndex = (int)i; + } + // fall back to greedy; other modes don't know how to deal with tabs + mStrategy = kBreakStrategy_Greedy; + } else { + mWidth += mCharWidths[i]; + if (!isLineEndSpace(c)) { + postBreak = mWidth; + wordEnd = i + 1; + } + } + if (i + 1 == current) { + // Override ICU's treatment of soft hyphen as a break opportunity, because we want it + // to be a hyphen break, with penalty and drawing behavior. + if (c != CHAR_SOFT_HYPHEN) { + // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so + // we can pass the whole word down to Hyphenator like the soft hyphen case. + bool wordEndsInHyphen = isLineBreakingHyphen(c); + if (paint != nullptr && mHyphenator != nullptr && + mHyphenationFrequency != kHyphenationFrequency_None && + !wordEndsInHyphen && !temporarilySkipHyphenation && + wordEnd > lastBreak && wordEnd - lastBreak <= LONGEST_HYPHENATED_WORD) { + mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak); + #if VERBOSE_DEBUG + std::string hyphenatedString; + for (size_t j = lastBreak; j < wordEnd; j++) { + if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-'); + // Note: only works with ASCII, should do UTF-8 conversion here + hyphenatedString.push_back(buffer()[j]); + } + ALOGD("hyphenated string: %s", hyphenatedString.c_str()); + #endif + + // measure hyphenated substrings + for (size_t j = lastBreak; j < wordEnd; j++) { + uint8_t hyph = mHyphBuf[j - lastBreak]; + if (hyph) { + paint->hyphenEdit = hyph; + layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak, + mTextBuf.size(), bidiFlags, style, *paint); + ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance(); + paint->hyphenEdit = 0; + layout.doLayout(mTextBuf.data(), j, wordEnd - j, + mTextBuf.size(), bidiFlags, style, *paint); + ParaWidth hyphPreBreak = postBreak - layout.getAdvance(); + addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph); + } + } + } + // Skip hyphenating the next word if and only if the present word ends in a hyphen + temporarilySkipHyphenation = wordEndsInHyphen; + + // Skip break for zero-width characters inside replacement span + if (paint != nullptr || current == end || mCharWidths[current] > 0) { + addWordBreak(current, mWidth, postBreak, 0.0, 0); + } + lastBreak = current; + lastBreakWidth = mWidth; + } + current = (size_t)mBreakIterator->next(); + } + } + + return width; +} + +// add a word break (possibly for a hyphenated fragment), and add desperate breaks if +// needed (ie when word exceeds current line width) +void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, + float penalty, uint8_t hyph) { + Candidate cand; + ParaWidth width = mCandidates.back().preBreak; + if (postBreak - width > currentLineWidth()) { + // Add desperate breaks. + // Note: these breaks are based on the shaping of the (non-broken) original text; they + // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. + size_t i = mCandidates.back().offset; + width += mCharWidths[i++]; + for (; i < offset; i++) { + float w = mCharWidths[i]; + if (w > 0) { + cand.offset = i; + cand.preBreak = width; + cand.postBreak = width; + cand.penalty = SCORE_DESPERATE; + cand.hyphenEdit = 0; +#if VERBOSE_DEBUG + ALOGD("desperate cand: %d %g:%g", + mCandidates.size(), cand.postBreak, cand.preBreak); +#endif + addCandidate(cand); + width += w; + } + } + } + + cand.offset = offset; + cand.preBreak = preBreak; + cand.postBreak = postBreak; + cand.penalty = penalty; + cand.hyphenEdit = hyph; +#if VERBOSE_DEBUG + ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); +#endif + addCandidate(cand); +} + +// TODO performance: could avoid populating mCandidates if greedy only +void LineBreaker::addCandidate(Candidate cand) { + size_t candIndex = mCandidates.size(); + mCandidates.push_back(cand); + if (cand.postBreak - mPreBreak > currentLineWidth()) { + // This break would create an overfull line, pick the best break and break there (greedy) + if (mBestBreak == mLastBreak) { + mBestBreak = candIndex; + } + pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak, + mCandidates[mBestBreak].hyphenEdit); + mBestScore = SCORE_INFTY; +#if VERBOSE_DEBUG + ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); +#endif + mLastBreak = mBestBreak; + mPreBreak = mCandidates[mBestBreak].preBreak; + } + if (cand.penalty <= mBestScore) { + mBestBreak = candIndex; + mBestScore = cand.penalty; + } +} + +void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) { + mBreaks.push_back(offset); + mWidths.push_back(width); + int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; + flags |= hyph; + mFlags.push_back(flags); + mFirstTabIndex = INT_MAX; +} + +void LineBreaker::addReplacement(size_t start, size_t end, float width) { + mCharWidths[start] = width; + std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); + addStyleRun(nullptr, nullptr, FontStyle(), start, end, false); +} + +float LineBreaker::currentLineWidth() const { + return mLineWidths.getLineWidth(mBreaks.size()); +} + +void LineBreaker::computeBreaksGreedy() { + // All breaks but the last have been added in addCandidate already. + size_t nCand = mCandidates.size(); + if (nCand == 1 || mLastBreak != nCand - 1) { + pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0); + // don't need to update mBestScore, because we're done +#if VERBOSE_DEBUG + ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); +#endif + } +} + +// Follow "prev" links in mCandidates array, and copy to result arrays. +void LineBreaker::finishBreaksOptimal() { + // clear existing greedy break result + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + size_t nCand = mCandidates.size(); + size_t prev; + for (size_t i = nCand - 1; i > 0; i = prev) { + prev = mCandidates[i].prev; + mBreaks.push_back(mCandidates[i].offset); + mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); + mFlags.push_back(mCandidates[i].hyphenEdit); + } + std::reverse(mBreaks.begin(), mBreaks.end()); + std::reverse(mWidths.begin(), mWidths.end()); + std::reverse(mFlags.begin(), mFlags.end()); +} + +void LineBreaker::computeBreaksOptimal(bool isRectangle) { + size_t active = 0; + size_t nCand = mCandidates.size(); + float width = mLineWidths.getLineWidth(0); + for (size_t i = 1; i < nCand; i++) { + bool atEnd = i == nCand - 1; + float best = SCORE_INFTY; + size_t bestPrev = 0; + size_t lineNumberLast = 0; + + if (!isRectangle) { + size_t lineNumberLast = mCandidates[active].lineNumber; + width = mLineWidths.getLineWidth(lineNumberLast); + } + ParaWidth leftEdge = mCandidates[i].postBreak - width; + float bestHope = 0; + + for (size_t j = active; j < i; j++) { + if (!isRectangle) { + size_t lineNumber = mCandidates[j].lineNumber; + if (lineNumber != lineNumberLast) { + float widthNew = mLineWidths.getLineWidth(lineNumber); + if (widthNew != width) { + leftEdge = mCandidates[i].postBreak - width; + bestHope = 0; + width = widthNew; + } + lineNumberLast = lineNumber; + } + } + float jScore = mCandidates[j].score; + if (jScore + bestHope >= best) continue; + float delta = mCandidates[j].preBreak - leftEdge; + + // compute width score for line + + // Note: the "bestHope" optimization makes the assumption that, when delta is + // non-negative, widthScore will increase monotonically as successive candidate + // breaks are considered. + float widthScore = 0.0f; + float additionalPenalty = 0.0f; + if (delta < 0) { + widthScore = SCORE_OVERFULL; + } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { + // increase penalty for hyphen on last line + additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; + } else { + widthScore = delta * delta; + } + + if (delta < 0) { + active = j + 1; + } else { + bestHope = widthScore; + } + + float score = jScore + widthScore + additionalPenalty; + if (score <= best) { + best = score; + bestPrev = j; + } + } + mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; + mCandidates[i].prev = bestPrev; + mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; +#if VERBOSE_DEBUG + ALOGD("break %d: score=%g, prev=%d", i, mCandidates[i].score, mCandidates[i].prev); +#endif + } + finishBreaksOptimal(); +} + +size_t LineBreaker::computeBreaks() { + if (mStrategy == kBreakStrategy_Greedy) { + computeBreaksGreedy(); + } else { + computeBreaksOptimal(mLineWidths.isConstant()); + } + return mBreaks.size(); +} + +void LineBreaker::finish() { + mWidth = 0; + mCandidates.clear(); + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { + mTextBuf.clear(); + mTextBuf.shrink_to_fit(); + mCharWidths.clear(); + mCharWidths.shrink_to_fit(); + mHyphBuf.clear(); + mHyphBuf.shrink_to_fit(); + mCandidates.shrink_to_fit(); + mBreaks.shrink_to_fit(); + mWidths.shrink_to_fit(); + mFlags.shrink_to_fit(); + } + mStrategy = kBreakStrategy_Greedy; + mHyphenationFrequency = kHyphenationFrequency_Normal; + mLinePenalty = 0.0f; +} + +} // namespace android diff --git a/libs/minikin/Measurement.cpp b/libs/minikin/Measurement.cpp new file mode 100644 index 0000000..0b68ac5 --- /dev/null +++ b/libs/minikin/Measurement.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "Minikin" +#include <cutils/log.h> + +#include <cmath> +#include <unicode/uchar.h> + +#include <minikin/GraphemeBreak.h> +#include <minikin/Measurement.h> + +namespace android { + +// These could be considered helper methods of layout, but need only be loosely coupled, so +// are separate. + +static float getRunAdvance(Layout& layout, const uint16_t* buf, size_t layoutStart, size_t start, + size_t count, size_t offset) { + float advance = 0.0f; + size_t lastCluster = start; + float clusterWidth = 0.0f; + for (size_t i = start; i < offset; i++) { + float charAdvance = layout.getCharAdvance(i - layoutStart); + if (charAdvance != 0.0f) { + advance += charAdvance; + lastCluster = i; + clusterWidth = charAdvance; + } + } + if (offset < start + count && layout.getCharAdvance(offset - layoutStart) == 0.0f) { + // In the middle of a cluster, distribute width of cluster so that each grapheme cluster + // gets an equal share. + // TODO: get caret information out of font when that's available + size_t nextCluster; + for (nextCluster = offset + 1; nextCluster < start + count; nextCluster++) { + if (layout.getCharAdvance(nextCluster - layoutStart) != 0.0f) break; + } + int numGraphemeClusters = 0; + int numGraphemeClustersAfter = 0; + for (size_t i = lastCluster; i < nextCluster; i++) { + bool isAfter = i >= offset; + if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) { + numGraphemeClusters++; + if (isAfter) { + numGraphemeClustersAfter++; + } + } + } + if (numGraphemeClusters > 0) { + advance -= clusterWidth * numGraphemeClustersAfter / numGraphemeClusters; + } + } + return advance; +} + +float getRunAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, + size_t offset) { + return getRunAdvance(layout, buf, start, start, count, offset); +} + +/** + * Essentially the inverse of getRunAdvance. Compute the value of offset for which the + * measured caret comes closest to the provided advance param, and which is on a grapheme + * cluster boundary. + * + * The actual implementation fast-forwards through clusters to get "close", then does a finer-grain + * search within the cluster and grapheme breaks. + */ +size_t getOffsetForAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, + float advance) { + float x = 0.0f, xLastClusterStart = 0.0f, xSearchStart = 0.0f; + size_t lastClusterStart = start, searchStart = start; + for (size_t i = start; i < start + count; i++) { + if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) { + searchStart = lastClusterStart; + xSearchStart = xLastClusterStart; + } + float width = layout.getCharAdvance(i - start); + if (width != 0.0f) { + lastClusterStart = i; + xLastClusterStart = x; + x += width; + if (x > advance) { + break; + } + } + } + size_t best = searchStart; + float bestDist = FLT_MAX; + for (size_t i = searchStart; i <= start + count; i++) { + if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) { + // "getRunAdvance(layout, buf, start, count, i) - advance" but more efficient + float delta = getRunAdvance(layout, buf, start, searchStart, count - searchStart, i) + + + xSearchStart - advance; + if (std::abs(delta) < bestDist) { + bestDist = std::abs(delta); + best = i; + } + if (delta >= 0.0f) { + break; + } + } + } + return best; +} + +} diff --git a/sample/example.cpp b/sample/example.cpp index 487357a..3fbfa79 100644 --- a/sample/example.cpp +++ b/sample/example.cpp @@ -28,8 +28,8 @@ #include <minikin/Layout.h> using std::vector; - -namespace android { +using namespace android; +using namespace minikin; FT_Library library; // TODO: this should not be a global @@ -99,8 +99,6 @@ int runMinikinTest() { return 0; } -} - int main(int argc, const char** argv) { - return android::runMinikinTest(); + return runMinikinTest(); } |