summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2015-03-18 23:04:28 -0700
committerRaph Levien <raph@google.com>2015-03-30 09:15:53 -0700
commitdaf6a6bdbf2ff1f66496d6200cb253e2f50759d5 (patch)
tree20b32ff4bddb7fd70b72a734fea4d9ac5458dd51
parent01f526614431e3a0a6e1a48039e00b8a9b7d6fbf (diff)
downloadandroid_frameworks_minikin-daf6a6bdbf2ff1f66496d6200cb253e2f50759d5.tar.gz
android_frameworks_minikin-daf6a6bdbf2ff1f66496d6200cb253e2f50759d5.tar.bz2
android_frameworks_minikin-daf6a6bdbf2ff1f66496d6200cb253e2f50759d5.zip
Add hyphenation to line breaking
This patch adds hyphenation using the Liang hyphenation algorithm, similar to TeX. It also improves the optimized line breaker so that it works correctly and efficiently even when the line width is not constant (there is a specialization for constant width, which is probably worthwhile, but performance TODOs remain). Still to be done: * hyphenator has many shortcuts, only tested with English * interaction between punctuation and hyphenation is problematic Change-Id: I2d94a1668ebc536398b7c43fcf486333eeb7c6aa
-rw-r--r--include/minikin/Hyphenator.h62
-rw-r--r--include/minikin/LineBreaker.h45
-rw-r--r--libs/minikin/Android.mk1
-rw-r--r--libs/minikin/Hyphenator.cpp152
-rw-r--r--libs/minikin/LineBreaker.cpp196
5 files changed, 407 insertions, 49 deletions
diff --git a/include/minikin/Hyphenator.h b/include/minikin/Hyphenator.h
new file mode 100644
index 0000000..581c657
--- /dev/null
+++ b/include/minikin/Hyphenator.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An implementation of Liang's hyphenation algorithm.
+ */
+
+#include <memory>
+#include <unordered_map>
+
+#ifndef MINIKIN_HYPHENATOR_H
+#define MINIKIN_HYPHENATOR_H
+
+namespace android {
+
+class Trie {
+public:
+ std::vector<uint8_t> result;
+ std::unordered_map<uint16_t, Trie> succ;
+};
+
+class Hyphenator {
+public:
+ // Note: this will also require a locale, for proper case folding behavior
+ static Hyphenator* load(const uint16_t* patternData, size_t size);
+
+ // Compute the hyphenation of a word, storing the hyphenation in result vector. Each
+ // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit
+ // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break,
+ // but this will be expanded to other edits for nonstandard hyphenation.
+ // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen".
+ void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+
+private:
+ void addPattern(const uint16_t* pattern, size_t size);
+
+ void hyphenateSoft(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+
+ // TODO: these should become parameters, as they might vary by locale, screen size, and
+ // possibly explicit user control.
+ static const int MIN_PREFIX = 2;
+ static const int MIN_SUFFIX = 3;
+
+ Trie root;
+};
+
+} // namespace android
+
+#endif // MINIKIN_HYPHENATOR_H \ No newline at end of file
diff --git a/include/minikin/LineBreaker.h b/include/minikin/LineBreaker.h
index 29afba0..92e72e2 100644
--- a/include/minikin/LineBreaker.h
+++ b/include/minikin/LineBreaker.h
@@ -26,6 +26,7 @@
#include "unicode/locid.h"
#include <cmath>
#include <vector>
+#include "minikin/Hyphenator.h"
namespace android {
@@ -43,6 +44,10 @@ class LineWidths {
mFirstWidthLineCount = firstWidthLineCount;
mRestWidth = restWidth;
}
+ bool isConstant() const {
+ // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
+ return mRestWidth == mFirstWidth;
+ }
float getLineWidth(int line) const {
return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
}
@@ -77,6 +82,8 @@ class TabStops {
class LineBreaker {
public:
+ const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java
+
~LineBreaker() {
utext_close(&mUText);
delete mBreakIterator;
@@ -88,13 +95,8 @@ class LineBreaker {
// locale has actually changed.
// That logic could be here but it's better for performance that it's upstream because of
// the cost of constructing and comparing the ICU Locale object.
- void setLocale(const icu::Locale& locale) {
- delete mBreakIterator;
- UErrorCode status = U_ZERO_ERROR;
- mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
- // TODO: check status
- // TODO: load hyphenator from locale
- }
+ // Note: caller is responsible for managing lifetime of hyphenator
+ void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
void resize(size_t size) {
mTextBuf.resize(size);
@@ -130,8 +132,8 @@ class LineBreaker {
// Minikin to do the shaping of the strings. The main thing that would need to be changed
// is having some kind of callback (or virtual class, or maybe even template), which could
// easily be instantiated with Minikin's Layout. Future work for when needed.
- float addStyleRun(const MinikinPaint* paint, const FontCollection* typeface,
- FontStyle style, size_t start, size_t end, bool isRtl);
+ float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
+ size_t start, size_t end, bool isRtl);
void addReplacement(size_t start, size_t end, float width);
@@ -145,7 +147,7 @@ class LineBreaker {
return mWidths.data();
}
- const uint8_t* getFlags() const {
+ const int* getFlags() const {
return mFlags.data();
}
@@ -166,23 +168,40 @@ class LineBreaker {
ParaWidth postBreak;
float penalty; // penalty of this break (for example, hyphen penalty)
float score; // best score found for this break
+ size_t lineNumber; // only updated for non-constant line widths
+ uint8_t hyphenEdit;
};
float currentLineWidth() const;
- void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty);
+ // compute shrink/stretch penalty for line
+ float computeScore(float delta, bool atEnd);
+
+ void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
+ uint8_t hyph);
void addCandidate(Candidate cand);
+ // push an actual break to the output. Takes care of setting flags for tab
+ void pushBreak(int offset, float width, uint8_t hyph);
+
void computeBreaksGreedy();
- void computeBreaksOpt();
+ void computeBreaksOptimal();
+
+ // special case when LineWidth is constant (layout is rectangle)
+ void computeBreaksOptimalRect();
+
+ void finishBreaksOptimal();
icu::BreakIterator* mBreakIterator = nullptr;
UText mUText = UTEXT_INITIALIZER;
std::vector<uint16_t>mTextBuf;
std::vector<float>mCharWidths;
+ Hyphenator* mHyphenator;
+ std::vector<uint8_t> mHyphBuf;
+
// layout parameters
BreakStrategy mStrategy = kBreakStrategy_Greedy;
LineWidths mLineWidths;
@@ -191,7 +210,7 @@ class LineBreaker {
// result of line breaking
std::vector<int> mBreaks;
std::vector<float> mWidths;
- std::vector<uint8_t> mFlags;
+ std::vector<int> mFlags;
ParaWidth mWidth = 0;
std::vector<Candidate> mCandidates;
diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk
index 5406824..34b535c 100644
--- a/libs/minikin/Android.mk
+++ b/libs/minikin/Android.mk
@@ -22,6 +22,7 @@ LOCAL_SRC_FILES := \
FontCollection.cpp \
FontFamily.cpp \
GraphemeBreak.cpp \
+ Hyphenator.cpp \
Layout.cpp \
LineBreaker.cpp \
MinikinInternal.cpp \
diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp
new file mode 100644
index 0000000..c50b386
--- /dev/null
+++ b/libs/minikin/Hyphenator.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+#include <cctype>
+#include <algorithm>
+#include <string>
+
+// HACK: for reading pattern file
+#include <fcntl.h>
+
+#define LOG_TAG "Minikin"
+#include "utils/Log.h"
+
+#include "minikin/Hyphenator.h"
+
+using std::vector;
+
+namespace android {
+
+static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
+
+void Hyphenator::addPattern(const uint16_t* pattern, size_t size) {
+ vector<uint16_t> word;
+ vector<uint8_t> result;
+
+ // start by parsing the Liang-format pattern into a word and a result vector, the
+ // vector right-aligned but without leading zeros. Examples:
+ // a1bc2d -> abcd [1, 0, 2, 0]
+ // abc1 -> abc [1]
+ // 1a2b3c4d5 -> abcd [1, 2, 3, 4, 5]
+ bool lastWasLetter = false;
+ bool haveSeenNumber = false;
+ for (size_t i = 0; i < size; i++) {
+ uint16_t c = pattern[i];
+ if (isdigit(c)) {
+ result.push_back(c - '0');
+ lastWasLetter = false;
+ haveSeenNumber = true;
+ } else {
+ word.push_back(c);
+ if (lastWasLetter && haveSeenNumber) {
+ result.push_back(0);
+ }
+ lastWasLetter = true;
+ }
+ }
+ if (lastWasLetter) {
+ result.push_back(0);
+ }
+ Trie* t = &root;
+ for (size_t i = 0; i < word.size(); i++) {
+ t = &t->succ[word[i]];
+ }
+ t->result = result;
+}
+
+// If any soft hyphen is present in the word, use soft hyphens to decide hyphenation,
+// as recommended in UAX #14 (Use of Soft Hyphen)
+void Hyphenator::hyphenateSoft(vector<uint8_t>* result, const uint16_t* word, size_t len) {
+ (*result)[0] = 0;
+ for (size_t i = 1; i < len; i++) {
+ (*result)[i] = word[i - 1] == CHAR_SOFT_HYPHEN;
+ }
+}
+
+void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t len) {
+ result->clear();
+ result->resize(len);
+ if (len < MIN_PREFIX + MIN_SUFFIX) return;
+ size_t maxOffset = len - MIN_SUFFIX + 1;
+ for (size_t i = 0; i < len + 1; i++) {
+ const Trie* node = &root;
+ for (size_t j = i; j < len + 2; j++) {
+ uint16_t c;
+ if (j == 0 || j == len + 1) {
+ c = '.'; // word boundary character in pattern data files
+ } else {
+ c = word[j - 1];
+ if (c == CHAR_SOFT_HYPHEN) {
+ hyphenateSoft(result, word, len);
+ return;
+ }
+ // TODO: use locale-sensitive case folding from ICU.
+ c = tolower(c);
+ }
+ auto search = node->succ.find(c);
+ if (search != node->succ.end()) {
+ node = &search->second;
+ } else {
+ break;
+ }
+ if (!node->result.empty()) {
+ int resultLen = node->result.size();
+ int offset = j + 1 - resultLen;
+ int start = std::max(MIN_PREFIX - offset, 0);
+ int end = std::min(resultLen, (int)maxOffset - offset);
+ // TODO performance: this inner loop can profitably be optimized
+ for (int k = start; k < end; k++) {
+ (*result)[offset + k] = std::max((*result)[offset + k], node->result[k]);
+ }
+#if 0
+ // debug printing of matched patterns
+ std::string dbg;
+ for (size_t k = i; k <= j + 1; k++) {
+ int off = k - j - 2 + resultLen;
+ if (off >= 0 && node->result[off] != 0) {
+ dbg.push_back((char)('0' + node->result[off]));
+ }
+ if (k < j + 1) {
+ uint16_t c = (k == 0 || k == len + 1) ? '.' : word[k - 1];
+ dbg.push_back((char)c);
+ }
+ }
+ ALOGD("%d:%d %s", i, j, dbg.c_str());
+#endif
+ }
+ }
+ }
+ // Since the above calculation does not modify values outside
+ // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0.
+ for (size_t i = MIN_PREFIX; i < maxOffset; i++) {
+ (*result)[i] &= 1;
+ }
+}
+
+Hyphenator* Hyphenator::load(const uint16_t *patternData, size_t size) {
+ Hyphenator* result = new Hyphenator;
+ for (size_t i = 0; i < size; i++) {
+ size_t end = i;
+ while (patternData[end] != '\n') end++;
+ result->addPattern(patternData + i, end - i);
+ i = end;
+ }
+ return result;
+}
+
+} // namespace android
diff --git a/libs/minikin/LineBreaker.cpp b/libs/minikin/LineBreaker.cpp
index 99d7f69..f7f3fb3 100644
--- a/libs/minikin/LineBreaker.cpp
+++ b/libs/minikin/LineBreaker.cpp
@@ -29,6 +29,7 @@ using std::vector;
namespace android {
const int CHAR_TAB = 0x0009;
+const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
// constants are larger than any reasonable actual width score.
@@ -40,6 +41,16 @@ const float SCORE_DESPERATE = 1e10f;
// to avoid allocation.
const size_t MAX_TEXT_BUF_RETAIN = 32678;
+void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
+ delete mBreakIterator;
+ UErrorCode status = U_ZERO_ERROR;
+ mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
+ // TODO: check status
+
+ // TODO: load actual resource dependent on locale; letting Minikin do it is a hack
+ mHyphenator = hyphenator;
+}
+
void LineBreaker::setText() {
UErrorCode status = U_ZERO_ERROR;
utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
@@ -49,7 +60,7 @@ void LineBreaker::setText() {
// handle initial break here because addStyleRun may never be called
mBreakIterator->next();
mCandidates.clear();
- Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0};
+ Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
mCandidates.push_back(cand);
// reset greedy breaker state
@@ -64,7 +75,6 @@ void LineBreaker::setText() {
}
void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
- ALOGD("width %f", firstWidth);
mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
}
@@ -81,22 +91,29 @@ static bool isLineEndSpace(uint16_t c) {
// width buffer.
// This method finds the candidate word breaks (using the ICU break iterator) and sends them
// to addCandidate.
-float LineBreaker::addStyleRun(const MinikinPaint* paint, const FontCollection* typeface,
+float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
FontStyle style, size_t start, size_t end, bool isRtl) {
Layout layout; // performance TODO: move layout to self object to reduce allocation cost?
float width = 0.0f;
int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
+ float hyphenPenalty = 0.0;
if (paint != nullptr) {
layout.setFontCollection(typeface);
layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
*paint);
layout.getAdvances(mCharWidths.data() + start);
width = layout.getAdvance();
+
+ // a heuristic that seems to perform well
+ hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
}
- ParaWidth postBreak = mWidth;
size_t current = (size_t)mBreakIterator->current();
+ size_t wordEnd = start;
+ size_t lastBreak = start;
+ ParaWidth lastBreakWidth = mWidth;
+ ParaWidth postBreak = mWidth;
for (size_t i = start; i < end; i++) {
uint16_t c = mTextBuf[i];
if (c == CHAR_TAB) {
@@ -110,14 +127,48 @@ float LineBreaker::addStyleRun(const MinikinPaint* paint, const FontCollection*
mWidth += mCharWidths[i];
if (!isLineEndSpace(c)) {
postBreak = mWidth;
+ wordEnd = i + 1;
}
}
if (i + 1 == current) {
- // TODO: hyphenation goes here
-
- // Skip break for zero-width characters.
- if (current == mTextBuf.size() || mCharWidths[current] > 0) {
- addWordBreak(current, mWidth, postBreak, 0);
+ // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
+ // to be a hyphen break, with penalty and drawing behavior.
+ if (c != CHAR_SOFT_HYPHEN) {
+ if (paint != nullptr && mHyphenator != nullptr && wordEnd > lastBreak) {
+ mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak);
+ #if VERBOSE_DEBUG
+ std::string hyphenatedString;
+ for (size_t j = lastBreak; j < wordEnd; j++) {
+ if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-');
+ // Note: only works with ASCII, should do UTF-8 conversion here
+ hyphenatedString.push_back(buffer()[j]);
+ }
+ ALOGD("hyphenated string: %s", hyphenatedString.c_str());
+ #endif
+
+ // measure hyphenated substrings
+ for (size_t j = lastBreak; j < wordEnd; j++) {
+ uint8_t hyph = mHyphBuf[j - lastBreak];
+ if (hyph) {
+ paint->hyphenEdit = hyph;
+ layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak,
+ mTextBuf.size(), bidiFlags, style, *paint);
+ ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance();
+ paint->hyphenEdit = 0;
+ layout.doLayout(mTextBuf.data(), j, wordEnd - j,
+ mTextBuf.size(), bidiFlags, style, *paint);
+ ParaWidth hyphPreBreak = postBreak - layout.getAdvance();
+ addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
+ }
+ }
+ }
+
+ // Skip break for zero-width characters.
+ if (current == mTextBuf.size() || mCharWidths[current] > 0) {
+ addWordBreak(current, mWidth, postBreak, 0.0, 0);
+ }
+ lastBreak = current;
+ lastBreakWidth = mWidth;
}
current = (size_t)mBreakIterator->next();
}
@@ -129,7 +180,7 @@ float LineBreaker::addStyleRun(const MinikinPaint* paint, const FontCollection*
// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
// needed (ie when word exceeds current line width)
void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
- float penalty) {
+ float penalty, uint8_t hyph) {
Candidate cand;
ParaWidth width = mCandidates.back().preBreak;
if (postBreak - width > currentLineWidth()) {
@@ -145,6 +196,7 @@ void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth post
cand.preBreak = width;
cand.postBreak = width;
cand.penalty = SCORE_DESPERATE;
+ cand.hyphenEdit = 0;
#if VERBOSE_DEBUG
ALOGD("desperate cand: %d %g:%g",
mCandidates.size(), cand.postBreak, cand.preBreak);
@@ -159,12 +211,24 @@ void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth post
cand.preBreak = preBreak;
cand.postBreak = postBreak;
cand.penalty = penalty;
+ cand.hyphenEdit = hyph;
#if VERBOSE_DEBUG
ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
#endif
addCandidate(cand);
}
+// TODO: for justified text, refine with shrink/stretch
+float LineBreaker::computeScore(float delta, bool atEnd) {
+ if (delta < 0) {
+ return SCORE_OVERFULL;
+ } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
+ return 0.0;
+ } else {
+ return delta * delta;
+ }
+}
+
// TODO performance: could avoid populating mCandidates if greedy only
void LineBreaker::addCandidate(Candidate cand) {
size_t candIndex = mCandidates.size();
@@ -174,10 +238,8 @@ void LineBreaker::addCandidate(Candidate cand) {
if (mBestBreak == mLastBreak) {
mBestBreak = candIndex;
}
- mBreaks.push_back(mCandidates[mBestBreak].offset);
- mWidths.push_back(mCandidates[mBestBreak].postBreak - mPreBreak);
- mFlags.push_back(mFirstTabIndex < mBreaks.back());
- mFirstTabIndex = INT_MAX;
+ pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
+ mCandidates[mBestBreak].hyphenEdit);
mBestScore = SCORE_INFTY;
#if VERBOSE_DEBUG
ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
@@ -191,6 +253,15 @@ void LineBreaker::addCandidate(Candidate cand) {
}
}
+void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
+ mBreaks.push_back(offset);
+ mWidths.push_back(width);
+ int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
+ flags |= hyph;
+ mFlags.push_back(flags);
+ mFirstTabIndex = INT_MAX;
+}
+
void LineBreaker::addReplacement(size_t start, size_t end, float width) {
mCharWidths[start] = width;
std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
@@ -205,27 +276,87 @@ void LineBreaker::computeBreaksGreedy() {
// All breaks but the last have been added in addCandidate already.
size_t nCand = mCandidates.size();
if (nCand == 1 || mLastBreak != nCand - 1) {
- mBreaks.push_back(mCandidates[nCand - 1].offset);
- mWidths.push_back(mCandidates[nCand - 1].postBreak - mPreBreak);
- mFlags.push_back(mFirstTabIndex < mBreaks.back());
- // don't need to update mFirstTabIndex or mBestScore, because we're done
+ pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
+ // don't need to update mBestScore, because we're done
#if VERBOSE_DEBUG
ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
#endif
}
}
-void LineBreaker::computeBreaksOpt() {
+// Follow "prev" links in mCandidates array, and copy to result arrays.
+void LineBreaker::finishBreaksOptimal() {
// clear existing greedy break result
mBreaks.clear();
mWidths.clear();
mFlags.clear();
+ size_t nCand = mCandidates.size();
+ size_t prev;
+ for (size_t i = nCand - 1; i > 0; i = prev) {
+ prev = mCandidates[i].prev;
+ mBreaks.push_back(mCandidates[i].offset);
+ mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
+ mFlags.push_back(mCandidates[i].hyphenEdit);
+ }
+ std::reverse(mBreaks.begin(), mBreaks.end());
+ std::reverse(mWidths.begin(), mWidths.end());
+ std::reverse(mFlags.begin(), mFlags.end());
+}
+
+void LineBreaker::computeBreaksOptimal() {
+ size_t active = 0;
+ size_t nCand = mCandidates.size();
+ for (size_t i = 1; i < nCand; i++) {
+ bool atEnd = i == nCand - 1;
+ float best = SCORE_INFTY;
+ size_t bestPrev = 0;
+
+ size_t lineNumberLast = mCandidates[active].lineNumber;
+ float width = mLineWidths.getLineWidth(lineNumberLast);
+ ParaWidth leftEdge = mCandidates[i].postBreak - width;
+ float bestHope = 0;
+
+ for (size_t j = active; j < i; j++) {
+ size_t lineNumber = mCandidates[j].lineNumber;
+ if (lineNumber != lineNumberLast) {
+ float widthNew = mLineWidths.getLineWidth(lineNumber);
+ if (widthNew != width) {
+ leftEdge = mCandidates[i].postBreak - width;
+ bestHope = 0;
+ width = widthNew;
+ }
+ lineNumberLast = lineNumber;
+ }
+ float jScore = mCandidates[j].score;
+ if (jScore + bestHope >= best) continue;
+ float delta = mCandidates[j].preBreak - leftEdge;
+
+ float widthScore = computeScore(delta, atEnd);
+ if (delta < 0) {
+ active = j + 1;
+ } else {
+ bestHope = widthScore;
+ }
+
+ float score = jScore + widthScore;
+ if (score <= best) {
+ best = score;
+ bestPrev = j;
+ }
+ }
+ mCandidates[i].score = best + mCandidates[i].penalty;
+ mCandidates[i].prev = bestPrev;
+ mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
+ }
+ finishBreaksOptimal();
+}
+
+void LineBreaker::computeBreaksOptimalRect() {
size_t active = 0;
size_t nCand = mCandidates.size();
float width = mLineWidths.getLineWidth(0);
- // TODO: actually support non-constant width
for (size_t i = 1; i < nCand; i++) {
- bool stretchIsFree = mStrategy != kBreakStrategy_Balanced && i == nCand - 1;
+ bool atEnd = i == nCand - 1;
float best = SCORE_INFTY;
size_t bestPrev = 0;
@@ -235,17 +366,15 @@ void LineBreaker::computeBreaksOpt() {
ParaWidth leftEdge = mCandidates[i].postBreak - width;
for (size_t j = active; j < i; j++) {
+ // TODO performance: can break if bestHope >= best; worth it?
float jScore = mCandidates[j].score;
if (jScore + bestHope >= best) continue;
float delta = mCandidates[j].preBreak - leftEdge;
- // TODO: for justified text, refine with shrink/stretch
- float widthScore;
+ float widthScore = computeScore(delta, atEnd);
if (delta < 0) {
- widthScore = SCORE_OVERFULL;
active = j + 1;
} else {
- widthScore = stretchIsFree ? 0 : delta * delta;
bestHope = widthScore;
}
@@ -258,23 +387,16 @@ void LineBreaker::computeBreaksOpt() {
mCandidates[i].score = best + mCandidates[i].penalty;
mCandidates[i].prev = bestPrev;
}
- size_t prev;
- for (size_t i = nCand - 1; i > 0; i = prev) {
- prev = mCandidates[i].prev;
- mBreaks.push_back(mCandidates[i].offset);
- mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
- mFlags.push_back(0);
- }
- std::reverse(mBreaks.begin(), mBreaks.end());
- std::reverse(mWidths.begin(), mWidths.end());
- std::reverse(mFlags.begin(), mFlags.end());
+ finishBreaksOptimal();
}
size_t LineBreaker::computeBreaks() {
if (mStrategy == kBreakStrategy_Greedy) {
computeBreaksGreedy();
+ } else if (mLineWidths.isConstant()) {
+ computeBreaksOptimalRect();
} else {
- computeBreaksOpt();
+ computeBreaksOptimal();
}
return mBreaks.size();
}
@@ -290,6 +412,8 @@ void LineBreaker::finish() {
mTextBuf.shrink_to_fit();
mCharWidths.clear();
mCharWidths.shrink_to_fit();
+ mHyphBuf.clear();
+ mHyphBuf.shrink_to_fit();
mCandidates.shrink_to_fit();
mBreaks.shrink_to_fit();
mWidths.shrink_to_fit();