summaryrefslogtreecommitdiffstats
path: root/include/minikin
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2015-03-18 23:04:28 -0700
committerRaph Levien <raph@google.com>2015-03-30 09:15:53 -0700
commitdaf6a6bdbf2ff1f66496d6200cb253e2f50759d5 (patch)
tree20b32ff4bddb7fd70b72a734fea4d9ac5458dd51 /include/minikin
parent01f526614431e3a0a6e1a48039e00b8a9b7d6fbf (diff)
downloadandroid_frameworks_minikin-daf6a6bdbf2ff1f66496d6200cb253e2f50759d5.tar.gz
android_frameworks_minikin-daf6a6bdbf2ff1f66496d6200cb253e2f50759d5.tar.bz2
android_frameworks_minikin-daf6a6bdbf2ff1f66496d6200cb253e2f50759d5.zip
Add hyphenation to line breaking
This patch adds hyphenation using the Liang hyphenation algorithm, similar to TeX. It also improves the optimized line breaker so that it works correctly and efficiently even when the line width is not constant (there is a specialization for constant width, which is probably worthwhile, but performance TODOs remain). Still to be done: * hyphenator has many shortcuts, only tested with English * interaction between punctuation and hyphenation is problematic Change-Id: I2d94a1668ebc536398b7c43fcf486333eeb7c6aa
Diffstat (limited to 'include/minikin')
-rw-r--r--include/minikin/Hyphenator.h62
-rw-r--r--include/minikin/LineBreaker.h45
2 files changed, 94 insertions, 13 deletions
diff --git a/include/minikin/Hyphenator.h b/include/minikin/Hyphenator.h
new file mode 100644
index 0000000..581c657
--- /dev/null
+++ b/include/minikin/Hyphenator.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An implementation of Liang's hyphenation algorithm.
+ */
+
+#include <memory>
+#include <unordered_map>
+
+#ifndef MINIKIN_HYPHENATOR_H
+#define MINIKIN_HYPHENATOR_H
+
+namespace android {
+
+class Trie {
+public:
+ std::vector<uint8_t> result;
+ std::unordered_map<uint16_t, Trie> succ;
+};
+
+class Hyphenator {
+public:
+ // Note: this will also require a locale, for proper case folding behavior
+ static Hyphenator* load(const uint16_t* patternData, size_t size);
+
+ // Compute the hyphenation of a word, storing the hyphenation in result vector. Each
+ // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit
+ // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break,
+ // but this will be expanded to other edits for nonstandard hyphenation.
+ // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen".
+ void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+
+private:
+ void addPattern(const uint16_t* pattern, size_t size);
+
+ void hyphenateSoft(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+
+ // TODO: these should become parameters, as they might vary by locale, screen size, and
+ // possibly explicit user control.
+ static const int MIN_PREFIX = 2;
+ static const int MIN_SUFFIX = 3;
+
+ Trie root;
+};
+
+} // namespace android
+
+#endif // MINIKIN_HYPHENATOR_H \ No newline at end of file
diff --git a/include/minikin/LineBreaker.h b/include/minikin/LineBreaker.h
index 29afba0..92e72e2 100644
--- a/include/minikin/LineBreaker.h
+++ b/include/minikin/LineBreaker.h
@@ -26,6 +26,7 @@
#include "unicode/locid.h"
#include <cmath>
#include <vector>
+#include "minikin/Hyphenator.h"
namespace android {
@@ -43,6 +44,10 @@ class LineWidths {
mFirstWidthLineCount = firstWidthLineCount;
mRestWidth = restWidth;
}
+ bool isConstant() const {
+ // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
+ return mRestWidth == mFirstWidth;
+ }
float getLineWidth(int line) const {
return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
}
@@ -77,6 +82,8 @@ class TabStops {
class LineBreaker {
public:
+ const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java
+
~LineBreaker() {
utext_close(&mUText);
delete mBreakIterator;
@@ -88,13 +95,8 @@ class LineBreaker {
// locale has actually changed.
// That logic could be here but it's better for performance that it's upstream because of
// the cost of constructing and comparing the ICU Locale object.
- void setLocale(const icu::Locale& locale) {
- delete mBreakIterator;
- UErrorCode status = U_ZERO_ERROR;
- mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
- // TODO: check status
- // TODO: load hyphenator from locale
- }
+ // Note: caller is responsible for managing lifetime of hyphenator
+ void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
void resize(size_t size) {
mTextBuf.resize(size);
@@ -130,8 +132,8 @@ class LineBreaker {
// Minikin to do the shaping of the strings. The main thing that would need to be changed
// is having some kind of callback (or virtual class, or maybe even template), which could
// easily be instantiated with Minikin's Layout. Future work for when needed.
- float addStyleRun(const MinikinPaint* paint, const FontCollection* typeface,
- FontStyle style, size_t start, size_t end, bool isRtl);
+ float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
+ size_t start, size_t end, bool isRtl);
void addReplacement(size_t start, size_t end, float width);
@@ -145,7 +147,7 @@ class LineBreaker {
return mWidths.data();
}
- const uint8_t* getFlags() const {
+ const int* getFlags() const {
return mFlags.data();
}
@@ -166,23 +168,40 @@ class LineBreaker {
ParaWidth postBreak;
float penalty; // penalty of this break (for example, hyphen penalty)
float score; // best score found for this break
+ size_t lineNumber; // only updated for non-constant line widths
+ uint8_t hyphenEdit;
};
float currentLineWidth() const;
- void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty);
+ // compute shrink/stretch penalty for line
+ float computeScore(float delta, bool atEnd);
+
+ void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
+ uint8_t hyph);
void addCandidate(Candidate cand);
+ // push an actual break to the output. Takes care of setting flags for tab
+ void pushBreak(int offset, float width, uint8_t hyph);
+
void computeBreaksGreedy();
- void computeBreaksOpt();
+ void computeBreaksOptimal();
+
+ // special case when LineWidth is constant (layout is rectangle)
+ void computeBreaksOptimalRect();
+
+ void finishBreaksOptimal();
icu::BreakIterator* mBreakIterator = nullptr;
UText mUText = UTEXT_INITIALIZER;
std::vector<uint16_t>mTextBuf;
std::vector<float>mCharWidths;
+ Hyphenator* mHyphenator;
+ std::vector<uint8_t> mHyphBuf;
+
// layout parameters
BreakStrategy mStrategy = kBreakStrategy_Greedy;
LineWidths mLineWidths;
@@ -191,7 +210,7 @@ class LineBreaker {
// result of line breaking
std::vector<int> mBreaks;
std::vector<float> mWidths;
- std::vector<uint8_t> mFlags;
+ std::vector<int> mFlags;
ParaWidth mWidth = 0;
std::vector<Candidate> mCandidates;