13 files changed, 1256 insertions, 87 deletions
diff --git a/include/minikin/Hyphenator.h b/include/minikin/Hyphenator.h
new file mode 100644
index 0000000..581c657
--- /dev/null
+++ b/include/minikin/Hyphenator.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An implementation of Liang's hyphenation algorithm.
+ */
+
+#include <memory>
+#include <unordered_map>
+
+#ifndef MINIKIN_HYPHENATOR_H
+#define MINIKIN_HYPHENATOR_H
+
+namespace android {
+
+class Trie {
+public:
+    std::vector<uint8_t> result;
+    std::unordered_map<uint16_t, Trie> succ;
+};
+
+class Hyphenator {
+public:
+    // Note: this will also require a locale, for proper case folding behavior
+    static Hyphenator* load(const uint16_t* patternData, size_t size);
+
+    // Compute the hyphenation of a word, storing the hyphenation in result vector. Each
+    // entry in the vector is a "hyphen edit" to be applied at the corresponding code unit
+    // offset in the word. Currently 0 means no hyphen and 1 means insert hyphen and break,
+    // but this will be expanded to other edits for nonstandard hyphenation.
+    // Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen".
+    void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+
+private:
+    void addPattern(const uint16_t* pattern, size_t size);
+
+    void hyphenateSoft(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+
+    // TODO: these should become parameters, as they might vary by locale, screen size, and
+    // possibly explicit user control.
+    static const int MIN_PREFIX = 2;
+    static const int MIN_SUFFIX = 3;
+
+    Trie root;
+};
+
+}  // namespace android
+
+#endif   // MINIKIN_HYPHENATOR_H
+\ No newline at end of file
diff --git a/include/minikin/Layout.h b/include/minikin/Layout.h
index 9f87597..cdf4aac 100644
--- a/include/minikin/Layout.h
+++ b/include/minikin/Layout.h
@@ -24,7 +24,7 @@
 #include <minikin/FontCollection.h>
 #include <minikin/MinikinFontFreeType.h>
 
-namespace android {
+namespace minikin {
 
 // The Bitmap class is for debugging. We'll probably move it out
 // of here into a separate lightweight software rendering module
@@ -34,13 +34,17 @@ public:
     Bitmap(int width, int height);
     ~Bitmap();
     void writePnm(std::ofstream& o) const;
-    void drawGlyph(const GlyphBitmap& bitmap, int x, int y);
+    void drawGlyph(const android::GlyphBitmap& bitmap, int x, int y);
 private:
     int width;
     int height;
     uint8_t* buf;
 };
 
+} // namespace minikin
+
+namespace android {
+
 struct LayoutGlyph {
     // index into mFaces and mHbFonts vectors. We could imagine
     // moving this into a run length representation, because it's
@@ -57,6 +61,17 @@ struct LayoutGlyph {
 // Internal state used during layout operation
 class LayoutContext;
 
+enum {
+    kBidi_LTR = 0,
+    kBidi_RTL = 1,
+    kBidi_Default_LTR = 2,
+    kBidi_Default_RTL = 3,
+    kBidi_Force_LTR = 4,
+    kBidi_Force_RTL = 5,
+
+    kBidi_Mask = 0x7
+};
+
 // Lifecycle and threading assumptions for Layout:
 // The object is assumed to be owned by a single thread; multiple threads
 // may not mutate it at the same time.
@@ -78,7 +93,7 @@ public:
     void doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
         int bidiFlags, const FontStyle &style, const MinikinPaint &paint);
 
-    void draw(Bitmap*, int x0, int y0, float size) const;
+    void draw(minikin::Bitmap*, int x0, int y0, float size) const;
 
     // Deprecated. Nont needed. Remove when callers are removed.
     static void init();
@@ -95,9 +110,13 @@ public:
     float getAdvance() const;
 
     // Get advances, copying into caller-provided buffer. The size of this
-    // buffer must match the length of the string (nchars arg to doLayout).
+    // buffer must match the length of the string (count arg to doLayout).
     void getAdvances(float* advances);
 
+    // The i parameter is an offset within the buf relative to start, it is < count, where
+    // start and count are the parameters to doLayout
+    float getCharAdvance(size_t i) const { return mAdvances[i]; }
+
     void getBounds(MinikinRect* rect);
 
     // Purge all caches, useful in low memory conditions
diff --git a/include/minikin/LineBreaker.h b/include/minikin/LineBreaker.h
new file mode 100644
index 0000000..e031fb3
--- /dev/null
+++ b/include/minikin/LineBreaker.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A module for breaking paragraphs into lines, supporting high quality
+ * hyphenation and justification.
+ */
+
+#ifndef MINIKIN_LINE_BREAKER_H
+#define MINIKIN_LINE_BREAKER_H
+
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include <cmath>
+#include <vector>
+#include "minikin/Hyphenator.h"
+
+namespace android {
+
+enum BreakStrategy {
+    kBreakStrategy_Greedy = 0,
+    kBreakStrategy_HighQuality = 1,
+    kBreakStrategy_Balanced = 2
+};
+
+enum HyphenationFrequency {
+    kHyphenationFrequency_None = 0,
+    kHyphenationFrequency_Normal = 1,
+    kHyphenationFrequency_Full = 2
+};
+
+// TODO: want to generalize to be able to handle array of line widths
+class LineWidths {
+    public:
+        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
+            mFirstWidth = firstWidth;
+            mFirstWidthLineCount = firstWidthLineCount;
+            mRestWidth = restWidth;
+        }
+        void setIndents(const std::vector<float>& indents) {
+            mIndents = indents;
+        }
+        bool isConstant() const {
+            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
+            return mRestWidth == mFirstWidth && mIndents.empty();
+        }
+        float getLineWidth(int line) const {
+            float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
+            if (!mIndents.empty()) {
+                if ((size_t)line < mIndents.size()) {
+                    width -= mIndents[line];
+                } else {
+                    width -= mIndents.back();
+                }
+            }
+            return width;
+        }
+    private:
+        float mFirstWidth;
+        int mFirstWidthLineCount;
+        float mRestWidth;
+        std::vector<float> mIndents;
+};
+
+class TabStops {
+    public:
+        void set(const int* stops, size_t nStops, int tabWidth) {
+            if (stops != nullptr) {
+                mStops.assign(stops, stops + nStops);
+            } else {
+                mStops.clear();
+            }
+            mTabWidth = tabWidth;
+        }
+        float nextTab(float widthSoFar) const {
+            for (size_t i = 0; i < mStops.size(); i++) {
+                if (mStops[i] > widthSoFar) {
+                    return mStops[i];
+                }
+            }
+            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
+        }
+    private:
+        std::vector<int> mStops;
+        int mTabWidth;
+};
+
+class LineBreaker {
+    public:
+        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
+
+        ~LineBreaker() {
+            utext_close(&mUText);
+            delete mBreakIterator;
+        }
+
+        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
+        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
+        // be set on the first invocation, but callers are encouraged not to call again unless
+        // locale has actually changed.
+        // That logic could be here but it's better for performance that it's upstream because of
+        // the cost of constructing and comparing the ICU Locale object.
+        // Note: caller is responsible for managing lifetime of hyphenator
+        void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
+
+        void resize(size_t size) {
+            mTextBuf.resize(size);
+            mCharWidths.resize(size);
+        }
+
+        size_t size() const {
+            return mTextBuf.size();
+        }
+
+        uint16_t* buffer() {
+            return mTextBuf.data();
+        }
+
+        float* charWidths() {
+            return mCharWidths.data();
+        }
+
+        // set text to current contents of buffer
+        void setText();
+
+        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
+
+        void setIndents(const std::vector<float>& indents);
+
+        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
+            mTabStops.set(stops, nStops, tabWidth);
+        }
+
+        BreakStrategy getStrategy() const { return mStrategy; }
+
+        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
+
+        HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
+
+        void setHyphenationFrequency(HyphenationFrequency frequency) {
+            mHyphenationFrequency = frequency;
+        }
+
+        // TODO: this class is actually fairly close to being general and not tied to using
+        // Minikin to do the shaping of the strings. The main thing that would need to be changed
+        // is having some kind of callback (or virtual class, or maybe even template), which could
+        // easily be instantiated with Minikin's Layout. Future work for when needed.
+        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
+                size_t start, size_t end, bool isRtl);
+
+        void addReplacement(size_t start, size_t end, float width);
+
+        size_t computeBreaks();
+
+        const int* getBreaks() const {
+            return mBreaks.data();
+        }
+
+        const float* getWidths() const {
+            return mWidths.data();
+        }
+
+        const int* getFlags() const {
+            return mFlags.data();
+        }
+
+        void finish();
+
+    private:
+        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
+        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
+        // that float is used extensively on the Java side for this. This is a typedef so that
+        // we can easily change it based on performance/accuracy tradeoff.
+        typedef double ParaWidth;
+
+        // A single candidate break
+        struct Candidate {
+            size_t offset;  // offset to text buffer, in code units
+            size_t prev;  // index to previous break
+            ParaWidth preBreak;
+            ParaWidth postBreak;
+            float penalty;  // penalty of this break (for example, hyphen penalty)
+            float score;  // best score found for this break
+            size_t lineNumber;  // only updated for non-constant line widths
+            uint8_t hyphenEdit;
+        };
+
+        float currentLineWidth() const;
+
+        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
+                uint8_t hyph);
+
+        void addCandidate(Candidate cand);
+
+        // push an actual break to the output. Takes care of setting flags for tab
+        void pushBreak(int offset, float width, uint8_t hyph);
+
+        void computeBreaksGreedy();
+
+        void computeBreaksOptimal(bool isRectangular);
+
+        void finishBreaksOptimal();
+
+        icu::BreakIterator* mBreakIterator = nullptr;
+        UText mUText = UTEXT_INITIALIZER;
+        std::vector<uint16_t>mTextBuf;
+        std::vector<float>mCharWidths;
+
+        Hyphenator* mHyphenator;
+        std::vector<uint8_t> mHyphBuf;
+
+        // layout parameters
+        BreakStrategy mStrategy = kBreakStrategy_Greedy;
+        HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
+        LineWidths mLineWidths;
+        TabStops mTabStops;
+
+        // result of line breaking
+        std::vector<int> mBreaks;
+        std::vector<float> mWidths;
+        std::vector<int> mFlags;
+
+        ParaWidth mWidth = 0;
+        std::vector<Candidate> mCandidates;
+        float mLinePenalty = 0.0f;
+
+        // the following are state for greedy breaker (updated while adding style runs)
+        size_t mLastBreak;
+        size_t mBestBreak;
+        float mBestScore;
+        ParaWidth mPreBreak;  // prebreak of last break
+        int mFirstTabIndex;
+};
+
+}  // namespace android
+
+#endif  // MINIKIN_LINE_BREAKER_H
diff --git a/include/minikin/Measurement.h b/include/minikin/Measurement.h
new file mode 100644
index 0000000..fc47fa3
--- /dev/null
+++ b/include/minikin/Measurement.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINIKIN_MEASUREMENT_H
+#define MINIKIN_MEASUREMENT_H
+
+#include <minikin/Layout.h>
+
+namespace android {
+
+float getRunAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count, size_t offset);
+
+size_t getOffsetForAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count,
+        float advance);
+
+}
+
+#endif  // MINIKIN_MEASUREMENT_H
diff --git a/include/minikin/MinikinFont.h b/include/minikin/MinikinFont.h
index 3f07589..7f65cd7 100644
--- a/include/minikin/MinikinFont.h
+++ b/include/minikin/MinikinFont.h
@@ -27,6 +27,19 @@
 
 namespace android {
 
+// The hyphen edit represents an edit to the string when a word is
+// hyphenated. The most common hyphen edit is adding a "-" at the end
+// of a syllable, but nonstandard hyphenation allows for more choices.
+class HyphenEdit {
+public:
+    HyphenEdit() : hyphen(0) { }
+    HyphenEdit(uint32_t hyphenInt) : hyphen(hyphenInt) { }
+    bool hasHyphen() const { return hyphen != 0; }
+    bool operator==(const HyphenEdit &other) const { return hyphen == other.hyphen; }
+private:
+    uint32_t hyphen;
+};
+
 class MinikinFont;
 
 // Possibly move into own .h file?
@@ -36,7 +49,7 @@ struct MinikinPaint {
             fakery(), fontFeatureSettings() { }
 
     bool skipCache() const {
-      return !fontFeatureSettings.empty();
+        return !fontFeatureSettings.empty();
     }
 
     MinikinFont *font;
@@ -46,6 +59,7 @@ struct MinikinPaint {
     float letterSpacing;
     uint32_t paintFlags;
     FontFakery fakery;
+    HyphenEdit hyphenEdit;
     std::string fontFeatureSettings;
 };
 
diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk
index d9c973d..873f279 100644
--- a/libs/minikin/Android.mk
+++ b/libs/minikin/Android.mk
@@ -22,7 +22,10 @@ LOCAL_SRC_FILES := \
     FontCollection.cpp \
     FontFamily.cpp \
     GraphemeBreak.cpp \
+    Hyphenator.cpp \
     Layout.cpp \
+    LineBreaker.cpp \
+    Measurement.cpp \
     MinikinInternal.cpp \
     MinikinRefCounted.cpp \
     MinikinFontFreeType.cpp \
diff --git a/libs/minikin/FontCollection.cpp b/libs/minikin/FontCollection.cpp
index e3911c5..b4bfe31 100644
--- a/libs/minikin/FontCollection.cpp
+++ b/libs/minikin/FontCollection.cpp
@@ -122,7 +122,7 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch,
                 return family;
             }
             int score = lang.match(family->lang()) * 2;
-            if (variant != 0 && variant == family->variant()) {
+            if (family->variant() == 0 || family->variant() == variant) {
                 score++;
             }
             if (score > bestScore) {
@@ -152,10 +152,13 @@ const uint32_t NBSP = 0xa0;
 const uint32_t ZWJ = 0x200c;
 const uint32_t ZWNJ = 0x200d;
 const uint32_t KEYCAP = 0x20e3;
+const uint32_t HYPHEN = 0x2010;
+const uint32_t NB_HYPHEN = 0x2011;
 
 // Characters where we want to continue using existing font run instead of
 // recomputing the best match in the fallback list.
-static const uint32_t stickyWhitelist[] = { '!', ',', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, KEYCAP };
+static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
+        KEYCAP, HYPHEN, NB_HYPHEN };
 
 static bool isStickyWhitelisted(uint32_t c) {
     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
index 5d8978d..f8f386c 100644
--- a/libs/minikin/GraphemeBreak.cpp
+++ b/libs/minikin/GraphemeBreak.cpp
@@ -41,7 +41,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
     uint32_t c2 = 0;
     size_t offset_back = offset;
     U16_PREV(buf, start, offset_back, c1);
-    U16_NEXT(buf, offset, count, c2);
+    U16_NEXT(buf, offset, start + count, c2);
     int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK);
     int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK);
     // Rule GB3, CR x LF
@@ -54,7 +54,7 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
     }
     // Rule GB5, / (Control | CR | LF)
     if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
-        // exclude zero-width control characters from breaking (tailoring of TR29)
+        // exclude zero-width control characters from breaking (tailoring of UAX #29)
         if (c2 == 0x00ad
                 || (c2 >= 0x200b && c2 <= 0x200f)
                 || (c2 >= 0x2028 && c2 <= 0x202e)
@@ -83,12 +83,12 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
     if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) {
         if (c2 == 0xe33) {
             // most other implementations break THAI CHARACTER SARA AM
-            // (tailoring of TR29)
+            // (tailoring of UAX #29)
             return true;
         }
         return false;
     }
-    // Cluster indic syllables togeter (tailoring of TR29)
+    // Cluster indic syllables together (tailoring of UAX #29)
     if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9  // virama
             && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
         return false;
diff --git a/libs/minikin/Hyphenator.cpp b/libs/minikin/Hyphenator.cpp
new file mode 100644
index 0000000..3eb151b
--- /dev/null
+++ b/libs/minikin/Hyphenator.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+#include <algorithm>
+#include <string>
+#include <unicode/uchar.h>
+
+// HACK: for reading pattern file
+#include <fcntl.h>
+
+#define LOG_TAG "Minikin"
+#include "utils/Log.h"
+
+#include "minikin/Hyphenator.h"
+
+using std::vector;
+
+namespace android {
+
+static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
+
+void Hyphenator::addPattern(const uint16_t* pattern, size_t size) {
+    vector<uint16_t> word;
+    vector<uint8_t> result;
+
+    // start by parsing the Liang-format pattern into a word and a result vector, the
+    // vector right-aligned but without leading zeros. Examples:
+    // a1bc2d -> abcd [1, 0, 2, 0]
+    // abc1 -> abc [1]
+    // 1a2b3c4d5 -> abcd [1, 2, 3, 4, 5]
+    bool lastWasLetter = false;
+    bool haveSeenNumber = false;
+    for (size_t i = 0; i < size; i++) {
+        uint16_t c = pattern[i];
+        if (isdigit(c)) {
+            result.push_back(c - '0');
+            lastWasLetter = false;
+            haveSeenNumber = true;
+        } else {
+            word.push_back(c);
+            if (lastWasLetter && haveSeenNumber) {
+                result.push_back(0);
+            }
+            lastWasLetter = true;
+        }
+    }
+    if (lastWasLetter) {
+        result.push_back(0);
+    }
+    Trie* t = &root;
+    for (size_t i = 0; i < word.size(); i++) {
+        t = &t->succ[word[i]];
+    }
+    t->result = result;
+}
+
+// If any soft hyphen is present in the word, use soft hyphens to decide hyphenation,
+// as recommended in UAX #14 (Use of Soft Hyphen)
+void Hyphenator::hyphenateSoft(vector<uint8_t>* result, const uint16_t* word, size_t len) {
+    (*result)[0] = 0;
+    for (size_t i = 1; i < len; i++) {
+        (*result)[i] = word[i - 1] == CHAR_SOFT_HYPHEN;
+    }
+}
+
+void Hyphenator::hyphenate(vector<uint8_t>* result, const uint16_t* word, size_t len) {
+    result->clear();
+    result->resize(len);
+    if (len < MIN_PREFIX + MIN_SUFFIX) return;
+    size_t maxOffset = len - MIN_SUFFIX + 1;
+    for (size_t i = 0; i < len + 1; i++) {
+        const Trie* node = &root;
+        for (size_t j = i; j < len + 2; j++) {
+            uint16_t c;
+            if (j == 0 || j == len + 1) {
+                c = '.';  // word boundary character in pattern data files
+            } else {
+                c = word[j - 1];
+                if (c == CHAR_SOFT_HYPHEN) {
+                    hyphenateSoft(result, word, len);
+                    return;
+                }
+                // TODO: This uses ICU's simple character to character lowercasing, which ignores
+                // the locale, and ignores cases when lowercasing a character results in more than
+                // one character. It should be fixed to consider the locale (in order for it to work
+                // correctly for Turkish and Azerbaijani), as well as support one-to-many, and
+                // many-to-many case conversions (including non-BMP cases).
+                if (c < 0x00C0) { // U+00C0 is the lowest uppercase non-ASCII character
+                    // Convert uppercase ASCII to lowercase ASCII, but keep other characters as-is
+                    if (0x0041 <= c && c <= 0x005A) {
+                        c += 0x0020;
+                    }
+                } else {
+                    c = u_tolower(c);
+                }
+            }
+            auto search = node->succ.find(c);
+            if (search != node->succ.end()) {
+                node = &search->second;
+            } else {
+                break;
+            }
+            if (!node->result.empty()) {
+                int resultLen = node->result.size();
+                int offset = j + 1 - resultLen;
+                int start = std::max(MIN_PREFIX - offset, 0);
+                int end = std::min(resultLen, (int)maxOffset - offset);
+                // TODO performance: this inner loop can profitably be optimized
+                for (int k = start; k < end; k++) {
+                    (*result)[offset + k] = std::max((*result)[offset + k], node->result[k]);
+                }
+#if 0
+                // debug printing of matched patterns
+                std::string dbg;
+                for (size_t k = i; k <= j + 1; k++) {
+                    int off = k - j - 2 + resultLen;
+                    if (off >= 0 && node->result[off] != 0) {
+                        dbg.push_back((char)('0' + node->result[off]));
+                    }
+                    if (k < j + 1) {
+                        uint16_t c = (k == 0 || k == len + 1) ? '.' : word[k - 1];
+                        dbg.push_back((char)c);
+                    }
+                }
+                ALOGD("%d:%d %s", i, j, dbg.c_str());
+#endif
+            }
+        }
+    }
+    // Since the above calculation does not modify values outside
+    // [MIN_PREFIX, len - MIN_SUFFIX], they are left as 0.
+    for (size_t i = MIN_PREFIX; i < maxOffset; i++) {
+        (*result)[i] &= 1;
+    }
+}
+
+Hyphenator* Hyphenator::load(const uint16_t *patternData, size_t size) {
+    Hyphenator* result = new Hyphenator;
+    for (size_t i = 0; i < size; i++) {
+        size_t end = i;
+        while (patternData[end] != '\n') end++;
+        result->addPattern(patternData + i, end - i);
+        i = end;
+    }
+    return result;
+}
+
+}  // namespace android
diff --git a/libs/minikin/Layout.cpp b/libs/minikin/Layout.cpp
index db0667b..be29e3c 100644
--- a/libs/minikin/Layout.cpp
+++ b/libs/minikin/Layout.cpp
@@ -41,20 +41,49 @@
 using std::string;
 using std::vector;
 
-namespace android {
+namespace minikin {
 
-// TODO: these should move into the header file, but for now we don't want
-// to cause namespace collisions with TextLayout.h
-enum {
-    kBidi_LTR = 0,
-    kBidi_RTL = 1,
-    kBidi_Default_LTR = 2,
-    kBidi_Default_RTL = 3,
-    kBidi_Force_LTR = 4,
-    kBidi_Force_RTL = 5,
-
-    kBidi_Mask = 0x7
-};
+Bitmap::Bitmap(int width, int height) : width(width), height(height) {
+    buf = new uint8_t[width * height]();
+}
+
+Bitmap::~Bitmap() {
+    delete[] buf;
+}
+
+void Bitmap::writePnm(std::ofstream &o) const {
+    o << "P5" << std::endl;
+    o << width << " " << height << std::endl;
+    o << "255" << std::endl;
+    o.write((const char *)buf, width * height);
+    o.close();
+}
+
+void Bitmap::drawGlyph(const android::GlyphBitmap& bitmap, int x, int y) {
+    int bmw = bitmap.width;
+    int bmh = bitmap.height;
+    x += bitmap.left;
+    y -= bitmap.top;
+    int x0 = std::max(0, x);
+    int x1 = std::min(width, x + bmw);
+    int y0 = std::max(0, y);
+    int y1 = std::min(height, y + bmh);
+    const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x);
+    uint8_t* dst = buf + y0 * width;
+    for (int yy = y0; yy < y1; yy++) {
+        for (int xx = x0; xx < x1; xx++) {
+            int pixel = (int)dst[xx] + (int)src[xx - x];
+            pixel = pixel > 0xff ? 0xff : pixel;
+            dst[xx] = pixel;
+        }
+        src += bmw;
+        dst += width;
+    }
+}
+
+} // namespace minikin
+
+namespace android {
 
 const int kDirection_Mask = 0x1;
 
@@ -81,7 +110,7 @@ public:
             mStart(start), mCount(count), mId(collection->getId()), mStyle(style),
             mSize(paint.size), mScaleX(paint.scaleX), mSkewX(paint.skewX),
             mLetterSpacing(paint.letterSpacing),
-            mPaintFlags(paint.paintFlags), mIsRtl(dir) {
+            mPaintFlags(paint.paintFlags), mHyphenEdit(paint.hyphenEdit), mIsRtl(dir) {
     }
     bool operator==(const LayoutCacheKey &other) const;
     hash_t hash() const;
@@ -115,6 +144,7 @@ private:
     float mSkewX;
     float mLetterSpacing;
     int32_t mPaintFlags;
+    HyphenEdit mHyphenEdit;
     bool mIsRtl;
     // Note: any fields added to MinikinPaint must also be reflected here.
     // TODO: language matching (possibly integrate into style)
@@ -173,13 +203,24 @@ private:
     static const size_t kMaxEntries = 100;
 };
 
+static unsigned int disabledDecomposeCompatibility(hb_unicode_funcs_t*, hb_codepoint_t,
+                                                   hb_codepoint_t*, void*) {
+    return 0;
+}
+
 class LayoutEngine : public Singleton<LayoutEngine> {
 public:
     LayoutEngine() {
+        unicodeFunctions = hb_unicode_funcs_create(hb_icu_get_unicode_funcs());
+        /* Disable the function used for compatibility decomposition */
+        hb_unicode_funcs_set_decompose_compatibility_func(
+                unicodeFunctions, disabledDecomposeCompatibility, NULL, NULL);
         hbBuffer = hb_buffer_create();
+        hb_buffer_set_unicode_funcs(hbBuffer, unicodeFunctions);
     }
 
     hb_buffer_t* hbBuffer;
+    hb_unicode_funcs_t* unicodeFunctions;
     LayoutCache layoutCache;
     HbFaceCache hbFaceCache;
 };
@@ -196,6 +237,7 @@ bool LayoutCacheKey::operator==(const LayoutCacheKey& other) const {
             && mSkewX == other.mSkewX
             && mLetterSpacing == other.mLetterSpacing
             && mPaintFlags == other.mPaintFlags
+            && mHyphenEdit == other.mHyphenEdit
             && mIsRtl == other.mIsRtl
             && mNchars == other.mNchars
             && !memcmp(mChars, other.mChars, mNchars * sizeof(uint16_t));
@@ -211,6 +253,7 @@ hash_t LayoutCacheKey::hash() const {
     hash = JenkinsHashMix(hash, hash_type(mSkewX));
     hash = JenkinsHashMix(hash, hash_type(mLetterSpacing));
     hash = JenkinsHashMix(hash, hash_type(mPaintFlags));
+    hash = JenkinsHashMix(hash, hash_type(mHyphenEdit.hasHyphen()));
     hash = JenkinsHashMix(hash, hash_type(mIsRtl));
     hash = JenkinsHashMixShorts(hash, mChars, mNchars);
     return JenkinsHashWhiten(hash);
@@ -220,44 +263,6 @@ hash_t hash_type(const LayoutCacheKey& key) {
     return key.hash();
 }
 
-Bitmap::Bitmap(int width, int height) : width(width), height(height) {
-    buf = new uint8_t[width * height]();
-}
-
-Bitmap::~Bitmap() {
-    delete[] buf;
-}
-
-void Bitmap::writePnm(std::ofstream &o) const {
-    o << "P5" << std::endl;
-    o << width << " " << height << std::endl;
-    o << "255" << std::endl;
-    o.write((const char *)buf, width * height);
-    o.close();
-}
-
-void Bitmap::drawGlyph(const GlyphBitmap& bitmap, int x, int y) {
-    int bmw = bitmap.width;
-    int bmh = bitmap.height;
-    x += bitmap.left;
-    y -= bitmap.top;
-    int x0 = std::max(0, x);
-    int x1 = std::min(width, x + bmw);
-    int y0 = std::max(0, y);
-    int y1 = std::min(height, y + bmh);
-    const unsigned char* src = bitmap.buffer + (y0 - y) * bmw + (x0 - x);
-    uint8_t* dst = buf + y0 * width;
-    for (int yy = y0; yy < y1; yy++) {
-        for (int xx = x0; xx < x1; xx++) {
-            int pixel = (int)dst[xx] + (int)src[xx - x];
-            pixel = pixel > 0xff ? 0xff : pixel;
-            dst[xx] = pixel;
-        }
-        src += bmw;
-        dst += width;
-    }
-}
-
 void MinikinRect::join(const MinikinRect& r) {
     if (isEmpty()) {
         set(r);
@@ -312,12 +317,6 @@ static hb_bool_t harfbuzzGetGlyph(hb_font_t* hbFont, void* fontData, hb_codepoin
     MinikinPaint* paint = reinterpret_cast<MinikinPaint*>(fontData);
     MinikinFont* font = paint->font;
     uint32_t glyph_id;
-    /* HarfBuzz replaces broken input codepoints with (unsigned int) -1.
-     * Skia expects valid Unicode.
-     * Replace invalid codepoints with U+FFFD REPLACEMENT CHARACTER.
-     */
-    if (unicode > 0x10FFFF)
-        unicode = 0xFFFD;
     bool ok = font->GetGlyph(unicode, &glyph_id);
     if (ok) {
         *glyph = glyph_id;
@@ -408,7 +407,7 @@ int Layout::findFace(FakedFont face, LayoutContext* ctx) {
 static hb_script_t codePointToScript(hb_codepoint_t codepoint) {
     static hb_unicode_funcs_t* u = 0;
     if (!u) {
-        u = hb_icu_get_unicode_funcs();
+        u = LayoutEngine::getInstance().unicodeFunctions;
     }
     return hb_unicode_script(u, codepoint);
 }
@@ -518,6 +517,29 @@ static size_t getNextWordBreak(const uint16_t* chars, size_t offset, size_t len)
     return len;
 }
 
+/**
+ * Disable certain scripts (mostly those with cursive connection) from having letterspacing
+ * applied. See https://github.com/behdad/harfbuzz/issues/64 for more details.
+ */
+static bool isScriptOkForLetterspacing(hb_script_t script) {
+    return !(
+            script == HB_SCRIPT_ARABIC ||
+            script == HB_SCRIPT_NKO ||
+            script == HB_SCRIPT_PSALTER_PAHLAVI ||
+            script == HB_SCRIPT_MANDAIC ||
+            script == HB_SCRIPT_MONGOLIAN ||
+            script == HB_SCRIPT_PHAGS_PA ||
+            script == HB_SCRIPT_DEVANAGARI ||
+            script == HB_SCRIPT_BENGALI ||
+            script == HB_SCRIPT_GURMUKHI ||
+            script == HB_SCRIPT_MODI ||
+            script == HB_SCRIPT_SHARADA ||
+            script == HB_SCRIPT_SYLOTI_NAGRI ||
+            script == HB_SCRIPT_TIRHUTA ||
+            script == HB_SCRIPT_OGHAM
+            );
+}
+
 void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
         int bidiFlags, const FontStyle &style, const MinikinPaint &paint) {
     AutoMutex _l(gMinikinLock);
@@ -589,12 +611,15 @@ void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bu
 
 void Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t bufSize,
         bool isRtl, LayoutContext* ctx, size_t dstStart) {
+    HyphenEdit hyphen = ctx->paint.hyphenEdit;
     if (!isRtl) {
         // left to right
         size_t wordstart = start == bufSize ? start : getPrevWordBreak(buf, start + 1);
         size_t wordend;
         for (size_t iter = start; iter < start + count; iter = wordend) {
             wordend = getNextWordBreak(buf, iter, bufSize);
+            // Only apply hyphen to the last word in the string.
+            ctx->paint.hyphenEdit = wordend >= start + count ? hyphen : HyphenEdit();
             size_t wordcount = std::min(start + count, wordend) - iter;
             doLayoutWord(buf + wordstart, iter - wordstart, wordcount, wordend - wordstart,
                     isRtl, ctx, iter - dstStart);
@@ -607,6 +632,8 @@ void Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count,
         size_t wordend = end == 0 ? 0 : getNextWordBreak(buf, end - 1, bufSize);
         for (size_t iter = end; iter > start; iter = wordstart) {
             wordstart = getPrevWordBreak(buf, iter);
+            // Only apply hyphen to the last (leftmost) word in the string.
+            ctx->paint.hyphenEdit = iter == end ? hyphen : HyphenEdit();
             size_t bufStart = std::max(start, wordstart);
             doLayoutWord(buf + wordstart, bufStart - wordstart, iter - bufStart,
                     wordend - wordstart, isRtl, ctx, bufStart - dstStart);
@@ -677,15 +704,6 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t
 
     double size = ctx->paint.size;
     double scaleX = ctx->paint.scaleX;
-    double letterSpace = ctx->paint.letterSpacing * size * scaleX;
-    double letterSpaceHalfLeft;
-    if ((ctx->paint.paintFlags & LinearTextFlag) == 0) {
-        letterSpace = round(letterSpace);
-        letterSpaceHalfLeft = floor(letterSpace * 0.5);
-    } else {
-        letterSpaceHalfLeft = letterSpace * 0.5;
-    }
-    double letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft;
 
     float x = mAdvance;
     float y = 0;
@@ -715,7 +733,22 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t
             srunend = srunstart;
             hb_script_t script = getScriptRun(buf + start, run.end, &srunend);
 
-            hb_buffer_reset(buffer);
+            double letterSpace = 0.0;
+            double letterSpaceHalfLeft = 0.0;
+            double letterSpaceHalfRight = 0.0;
+
+            if (ctx->paint.letterSpacing != 0.0 && isScriptOkForLetterspacing(script)) {
+                letterSpace = ctx->paint.letterSpacing * size * scaleX;
+                if ((ctx->paint.paintFlags & LinearTextFlag) == 0) {
+                    letterSpace = round(letterSpace);
+                    letterSpaceHalfLeft = floor(letterSpace * 0.5);
+                } else {
+                    letterSpaceHalfLeft = letterSpace * 0.5;
+                }
+                letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft;
+            }
+
+            hb_buffer_clear_contents(buffer);
             hb_buffer_set_script(buffer, script);
             hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
             FontLanguage language = ctx->style.getLanguage();
@@ -724,6 +757,22 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t
                 hb_buffer_set_language(buffer, hb_language_from_string(lang.c_str(), -1));
             }
             hb_buffer_add_utf16(buffer, buf, bufSize, srunstart + start, srunend - srunstart);
+            if (ctx->paint.hyphenEdit.hasHyphen() && srunend > srunstart) {
+                // TODO: check whether this is really the desired semantics. It could have the
+                // effect of assigning the hyphen width to a nonspacing mark
+                unsigned int lastCluster = start + srunend - 1;
+
+                hb_codepoint_t hyphenChar = 0x2010; // HYPHEN
+                hb_codepoint_t glyph;
+                // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for HYPHEN. Note
+                // that we intentionally don't do anything special if the font doesn't have a
+                // HYPHEN-MINUS either, so a tofu could be shown, hinting towards something
+                // missing.
+                if (!hb_font_get_glyph(hbFont, hyphenChar, 0, &glyph)) {
+                    hyphenChar = 0x002D; // HYPHEN-MINUS
+                }
+                hb_buffer_add(buffer, hyphenChar, lastCluster);
+            }
             hb_shape(hbFont, buffer, features.empty() ? NULL : &features[0], features.size());
             unsigned int numGlyphs;
             hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer, &numGlyphs);
@@ -758,7 +807,12 @@ void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t
                 ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint);
                 glyphBounds.offset(x + xoff, y + yoff);
                 mBounds.join(glyphBounds);
-                mAdvances[info[i].cluster - start] += xAdvance;
+                if (info[i].cluster - start < count) {
+                    mAdvances[info[i].cluster - start] += xAdvance;
+                } else {
+                    ALOGE("cluster %d (start %d) out of bounds of count %d",
+                        info[i].cluster - start, start, count);
+                }
                 x += xAdvance;
             }
             if (numGlyphs)
@@ -806,7 +860,7 @@ void Layout::appendLayout(Layout* src, size_t start) {
     }
 }
 
-void Layout::draw(Bitmap* surface, int x0, int y0, float size) const {
+void Layout::draw(minikin::Bitmap* surface, int x0, int y0, float size) const {
     /*
     TODO: redo as MinikinPaint settings
     if (mProps.hasTag(minikinHinting)) {
diff --git a/libs/minikin/LineBreaker.cpp b/libs/minikin/LineBreaker.cpp
new file mode 100644
index 0000000..a832ca2
--- /dev/null
+++ b/libs/minikin/LineBreaker.cpp
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define VERBOSE_DEBUG 0
+
+#include <limits>
+
+#define LOG_TAG "Minikin"
+#include <cutils/log.h>
+
+#include <minikin/Layout.h>
+#include <minikin/LineBreaker.h>
+
+using std::vector;
+
+namespace android {
+
+const int CHAR_TAB = 0x0009;
+const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
+
+// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
+// constants are larger than any reasonable actual width score.
+const float SCORE_INFTY = std::numeric_limits<float>::max();
+const float SCORE_OVERFULL = 1e12f;
+const float SCORE_DESPERATE = 1e10f;
+
+// Multiplier for hyphen penalty on last line.
+const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
+// Penalty assigned to each line break (to try to minimize number of lines)
+// TODO: when we implement full justification (so spaces can shrink and stretch), this is
+// probably not the most appropriate method.
+const float LINE_PENALTY_MULTIPLIER = 2.0f;
+
+// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
+// unreasonably long words. This is somewhat of a heuristic because extremely long words
+// are possible in some languages. This does mean that very long real words can get
+// broken by desperate breaks, with no hyphens.
+const size_t LONGEST_HYPHENATED_WORD = 45;
+
+// When the text buffer is within this limit, capacity of vectors is retained at finish(),
+// to avoid allocation.
+const size_t MAX_TEXT_BUF_RETAIN = 32678;
+
+void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
+    delete mBreakIterator;
+    UErrorCode status = U_ZERO_ERROR;
+    mBreakIterator = icu::BreakIterator::createLineInstance(locale, status);
+    // TODO: check status
+
+    // TODO: load actual resource dependent on locale; letting Minikin do it is a hack
+    mHyphenator = hyphenator;
+}
+
+void LineBreaker::setText() {
+    UErrorCode status = U_ZERO_ERROR;
+    utext_openUChars(&mUText, mTextBuf.data(), mTextBuf.size(), &status);
+    mBreakIterator->setText(&mUText, status);
+    mBreakIterator->first();
+
+    // handle initial break here because addStyleRun may never be called
+    mBreakIterator->next();
+    mCandidates.clear();
+    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
+    mCandidates.push_back(cand);
+
+    // reset greedy breaker state
+    mBreaks.clear();
+    mWidths.clear();
+    mFlags.clear();
+    mLastBreak = 0;
+    mBestBreak = 0;
+    mBestScore = SCORE_INFTY;
+    mPreBreak = 0;
+    mFirstTabIndex = INT_MAX;
+}
+
+void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
+    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
+}
+
+
+void LineBreaker::setIndents(const std::vector<float>& indents) {
+    mLineWidths.setIndents(indents);
+}
+
+// This function determines whether a character is a space that disappears at end of line.
+// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
+// plus '\n'.
+// Note: all such characters are in the BMP, so it's ok to use code units for this.
+static bool isLineEndSpace(uint16_t c) {
+    return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
+            c == 0x205F || c == 0x3000;
+}
+
+// This function determines whether a character is like U+2010 HYPHEN in
+// line breaking and usage: a character immediately after which line breaks
+// are allowed, but words containing it should not be automatically
+// hyphenated. This is a curated set, created by manually inspecting all
+// the characters that have the Unicode line breaking property of BA or HY
+// and seeing which ones are hyphens.
+static bool isLineBreakingHyphen(uint16_t c) {
+    return (c == 0x002D || // HYPHEN-MINUS
+            c == 0x058A || // ARMENIAN HYPHEN
+            c == 0x05BE || // HEBREW PUNCTUATION MAQAF
+            c == 0x1400 || // CANADIAN SYLLABICS HYPHEN
+            c == 0x2010 || // HYPHEN
+            c == 0x2013 || // EN DASH
+            c == 0x2027 || // HYPHENATION POINT
+            c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN
+            c == 0x2E40);  // DOUBLE HYPHEN
+}
+
+// Ordinarily, this method measures the text in the range given. However, when paint
+// is nullptr, it assumes the widths have already been calculated and stored in the
+// width buffer.
+// This method finds the candidate word breaks (using the ICU break iterator) and sends them
+// to addCandidate.
+float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
+        FontStyle style, size_t start, size_t end, bool isRtl) {
+    Layout layout;  // performance TODO: move layout to self object to reduce allocation cost?
+    float width = 0.0f;
+    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
+
+    float hyphenPenalty = 0.0;
+    if (paint != nullptr) {
+        layout.setFontCollection(typeface);
+        layout.doLayout(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style,
+                *paint);
+        layout.getAdvances(mCharWidths.data() + start);
+        width = layout.getAdvance();
+
+        // a heuristic that seems to perform well
+        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
+        if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
+            hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
+        }
+
+        mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
+    }
+
+    size_t current = (size_t)mBreakIterator->current();
+    size_t wordEnd = start;
+    size_t lastBreak = start;
+    ParaWidth lastBreakWidth = mWidth;
+    ParaWidth postBreak = mWidth;
+    bool temporarilySkipHyphenation = false;
+    for (size_t i = start; i < end; i++) {
+        uint16_t c = mTextBuf[i];
+        if (c == CHAR_TAB) {
+            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
+            if (mFirstTabIndex == INT_MAX) {
+                mFirstTabIndex = (int)i;
+            }
+            // fall back to greedy; other modes don't know how to deal with tabs
+            mStrategy = kBreakStrategy_Greedy;
+        } else {
+            mWidth += mCharWidths[i];
+            if (!isLineEndSpace(c)) {
+                postBreak = mWidth;
+                wordEnd = i + 1;
+            }
+        }
+        if (i + 1 == current) {
+            // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
+            // to be a hyphen break, with penalty and drawing behavior.
+            if (c != CHAR_SOFT_HYPHEN) {
+                // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so
+                // we can pass the whole word down to Hyphenator like the soft hyphen case.
+                bool wordEndsInHyphen = isLineBreakingHyphen(c);
+                if (paint != nullptr && mHyphenator != nullptr &&
+                        mHyphenationFrequency != kHyphenationFrequency_None &&
+                        !wordEndsInHyphen && !temporarilySkipHyphenation &&
+                        wordEnd > lastBreak && wordEnd - lastBreak <= LONGEST_HYPHENATED_WORD) {
+                    mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[lastBreak], wordEnd - lastBreak);
+    #if VERBOSE_DEBUG
+                    std::string hyphenatedString;
+                    for (size_t j = lastBreak; j < wordEnd; j++) {
+                        if (mHyphBuf[j - lastBreak]) hyphenatedString.push_back('-');
+                        // Note: only works with ASCII, should do UTF-8 conversion here
+                        hyphenatedString.push_back(buffer()[j]);
+                    }
+                    ALOGD("hyphenated string: %s", hyphenatedString.c_str());
+    #endif
+
+                    // measure hyphenated substrings
+                    for (size_t j = lastBreak; j < wordEnd; j++) {
+                        uint8_t hyph = mHyphBuf[j - lastBreak];
+                        if (hyph) {
+                            paint->hyphenEdit = hyph;
+                            layout.doLayout(mTextBuf.data(), lastBreak, j - lastBreak,
+                                    mTextBuf.size(), bidiFlags, style, *paint);
+                            ParaWidth hyphPostBreak = lastBreakWidth + layout.getAdvance();
+                            paint->hyphenEdit = 0;
+                            layout.doLayout(mTextBuf.data(), j, wordEnd - j,
+                                    mTextBuf.size(), bidiFlags, style, *paint);
+                            ParaWidth hyphPreBreak = postBreak - layout.getAdvance();
+                            addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
+                        }
+                    }
+                }
+                // Skip hyphenating the next word if and only if the present word ends in a hyphen
+                temporarilySkipHyphenation = wordEndsInHyphen;
+
+                // Skip break for zero-width characters inside replacement span
+                if (paint != nullptr || current == end || mCharWidths[current] > 0) {
+                    addWordBreak(current, mWidth, postBreak, 0.0, 0);
+                }
+                lastBreak = current;
+                lastBreakWidth = mWidth;
+            }
+            current = (size_t)mBreakIterator->next();
+        }
+    }
+
+    return width;
+}
+
+// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
+// needed (ie when word exceeds current line width)
+void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
+        float penalty, uint8_t hyph) {
+    Candidate cand;
+    ParaWidth width = mCandidates.back().preBreak;
+    if (postBreak - width > currentLineWidth()) {
+        // Add desperate breaks.
+        // Note: these breaks are based on the shaping of the (non-broken) original text; they
+        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
+        size_t i = mCandidates.back().offset;
+        width += mCharWidths[i++];
+        for (; i < offset; i++) {
+            float w = mCharWidths[i];
+            if (w > 0) {
+                cand.offset = i;
+                cand.preBreak = width;
+                cand.postBreak = width;
+                cand.penalty = SCORE_DESPERATE;
+                cand.hyphenEdit = 0;
+#if VERBOSE_DEBUG
+                ALOGD("desperate cand: %d %g:%g",
+                        mCandidates.size(), cand.postBreak, cand.preBreak);
+#endif
+                addCandidate(cand);
+                width += w;
+            }
+        }
+    }
+
+    cand.offset = offset;
+    cand.preBreak = preBreak;
+    cand.postBreak = postBreak;
+    cand.penalty = penalty;
+    cand.hyphenEdit = hyph;
+#if VERBOSE_DEBUG
+    ALOGD("cand: %d %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
+#endif
+    addCandidate(cand);
+}
+
+// TODO performance: could avoid populating mCandidates if greedy only
+void LineBreaker::addCandidate(Candidate cand) {
+    size_t candIndex = mCandidates.size();
+    mCandidates.push_back(cand);
+    if (cand.postBreak - mPreBreak > currentLineWidth()) {
+        // This break would create an overfull line, pick the best break and break there (greedy)
+        if (mBestBreak == mLastBreak) {
+            mBestBreak = candIndex;
+        }
+        pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
+                mCandidates[mBestBreak].hyphenEdit);
+        mBestScore = SCORE_INFTY;
+#if VERBOSE_DEBUG
+        ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
+#endif
+        mLastBreak = mBestBreak;
+        mPreBreak = mCandidates[mBestBreak].preBreak;
+    }
+    if (cand.penalty <= mBestScore) {
+        mBestBreak = candIndex;
+        mBestScore = cand.penalty;
+    }
+}
+
+void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
+    mBreaks.push_back(offset);
+    mWidths.push_back(width);
+    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
+    flags |= hyph;
+    mFlags.push_back(flags);
+    mFirstTabIndex = INT_MAX;
+}
+
+void LineBreaker::addReplacement(size_t start, size_t end, float width) {
+    mCharWidths[start] = width;
+    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
+    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
+}
+
+float LineBreaker::currentLineWidth() const {
+    return mLineWidths.getLineWidth(mBreaks.size());
+}
+
+void LineBreaker::computeBreaksGreedy() {
+    // All breaks but the last have been added in addCandidate already.
+    size_t nCand = mCandidates.size();
+    if (nCand == 1 || mLastBreak != nCand - 1) {
+        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
+        // don't need to update mBestScore, because we're done
+#if VERBOSE_DEBUG
+        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
+#endif
+    }
+}
+
+// Follow "prev" links in mCandidates array, and copy to result arrays.
+void LineBreaker::finishBreaksOptimal() {
+    // clear existing greedy break result
+    mBreaks.clear();
+    mWidths.clear();
+    mFlags.clear();
+    size_t nCand = mCandidates.size();
+    size_t prev;
+    for (size_t i = nCand - 1; i > 0; i = prev) {
+        prev = mCandidates[i].prev;
+        mBreaks.push_back(mCandidates[i].offset);
+        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
+        mFlags.push_back(mCandidates[i].hyphenEdit);
+    }
+    std::reverse(mBreaks.begin(), mBreaks.end());
+    std::reverse(mWidths.begin(), mWidths.end());
+    std::reverse(mFlags.begin(), mFlags.end());
+}
+
+void LineBreaker::computeBreaksOptimal(bool isRectangle) {
+    size_t active = 0;
+    size_t nCand = mCandidates.size();
+    float width = mLineWidths.getLineWidth(0);
+    for (size_t i = 1; i < nCand; i++) {
+        bool atEnd = i == nCand - 1;
+        float best = SCORE_INFTY;
+        size_t bestPrev = 0;
+        size_t lineNumberLast = 0;
+
+        if (!isRectangle) {
+            size_t lineNumberLast = mCandidates[active].lineNumber;
+            width = mLineWidths.getLineWidth(lineNumberLast);
+        }
+        ParaWidth leftEdge = mCandidates[i].postBreak - width;
+        float bestHope = 0;
+
+        for (size_t j = active; j < i; j++) {
+            if (!isRectangle) {
+                size_t lineNumber = mCandidates[j].lineNumber;
+                if (lineNumber != lineNumberLast) {
+                    float widthNew = mLineWidths.getLineWidth(lineNumber);
+                    if (widthNew != width) {
+                        leftEdge = mCandidates[i].postBreak - width;
+                        bestHope = 0;
+                        width = widthNew;
+                    }
+                    lineNumberLast = lineNumber;
+                }
+            }
+            float jScore = mCandidates[j].score;
+            if (jScore + bestHope >= best) continue;
+            float delta = mCandidates[j].preBreak - leftEdge;
+
+            // compute width score for line
+
+            // Note: the "bestHope" optimization makes the assumption that, when delta is
+            // non-negative, widthScore will increase monotonically as successive candidate
+            // breaks are considered.
+            float widthScore = 0.0f;
+            float additionalPenalty = 0.0f;
+            if (delta < 0) {
+                widthScore = SCORE_OVERFULL;
+            } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
+                // increase penalty for hyphen on last line
+                additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
+            } else {
+                widthScore = delta * delta;
+            }
+
+            if (delta < 0) {
+                active = j + 1;
+            } else {
+                bestHope = widthScore;
+            }
+
+            float score = jScore + widthScore + additionalPenalty;
+            if (score <= best) {
+                best = score;
+                bestPrev = j;
+            }
+        }
+        mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
+        mCandidates[i].prev = bestPrev;
+        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
+#if VERBOSE_DEBUG
+        ALOGD("break %d: score=%g, prev=%d", i, mCandidates[i].score, mCandidates[i].prev);
+#endif
+    }
+    finishBreaksOptimal();
+}
+
+size_t LineBreaker::computeBreaks() {
+    if (mStrategy == kBreakStrategy_Greedy) {
+        computeBreaksGreedy();
+    } else {
+        computeBreaksOptimal(mLineWidths.isConstant());
+    }
+    return mBreaks.size();
+}
+
+void LineBreaker::finish() {
+    mWidth = 0;
+    mCandidates.clear();
+    mBreaks.clear();
+    mWidths.clear();
+    mFlags.clear();
+    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
+        mTextBuf.clear();
+        mTextBuf.shrink_to_fit();
+        mCharWidths.clear();
+        mCharWidths.shrink_to_fit();
+        mHyphBuf.clear();
+        mHyphBuf.shrink_to_fit();
+        mCandidates.shrink_to_fit();
+        mBreaks.shrink_to_fit();
+        mWidths.shrink_to_fit();
+        mFlags.shrink_to_fit();
+    }
+    mStrategy = kBreakStrategy_Greedy;
+    mHyphenationFrequency = kHyphenationFrequency_Normal;
+    mLinePenalty = 0.0f;
+}
+
+}  // namespace android
diff --git a/libs/minikin/Measurement.cpp b/libs/minikin/Measurement.cpp
new file mode 100644
index 0000000..0b68ac5
--- /dev/null
+++ b/libs/minikin/Measurement.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "Minikin"
+#include <cutils/log.h>
+
+#include <cmath>
+#include <unicode/uchar.h>
+
+#include <minikin/GraphemeBreak.h>
+#include <minikin/Measurement.h>
+
+namespace android {
+
+// These could be considered helper methods of layout, but need only be loosely coupled, so
+// are separate.
+
+static float getRunAdvance(Layout& layout, const uint16_t* buf, size_t layoutStart, size_t start,
+        size_t count, size_t offset) {
+    float advance = 0.0f;
+    size_t lastCluster = start;
+    float clusterWidth = 0.0f;
+    for (size_t i = start; i < offset; i++) {
+        float charAdvance = layout.getCharAdvance(i - layoutStart);
+        if (charAdvance != 0.0f) {
+            advance += charAdvance;
+            lastCluster = i;
+            clusterWidth = charAdvance;
+        }
+    }
+    if (offset < start + count && layout.getCharAdvance(offset - layoutStart) == 0.0f) {
+        // In the middle of a cluster, distribute width of cluster so that each grapheme cluster
+        // gets an equal share.
+        // TODO: get caret information out of font when that's available
+        size_t nextCluster;
+        for (nextCluster = offset + 1; nextCluster < start + count; nextCluster++) {
+            if (layout.getCharAdvance(nextCluster - layoutStart) != 0.0f) break;
+        }
+        int numGraphemeClusters = 0;
+        int numGraphemeClustersAfter = 0;
+        for (size_t i = lastCluster; i < nextCluster; i++) {
+            bool isAfter = i >= offset;
+            if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) {
+                numGraphemeClusters++;
+                if (isAfter) {
+                    numGraphemeClustersAfter++;
+                }
+            }
+        }
+        if (numGraphemeClusters > 0) {
+            advance -= clusterWidth * numGraphemeClustersAfter / numGraphemeClusters;
+        }
+    }
+    return advance;
+}
+
+float getRunAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count,
+        size_t offset) {
+    return getRunAdvance(layout, buf, start, start, count, offset);
+}
+
+/**
+ * Essentially the inverse of getRunAdvance. Compute the value of offset for which the
+ * measured caret comes closest to the provided advance param, and which is on a grapheme
+ * cluster boundary.
+ *
+ * The actual implementation fast-forwards through clusters to get "close", then does a finer-grain
+ * search within the cluster and grapheme breaks.
+ */
+size_t getOffsetForAdvance(Layout& layout, const uint16_t* buf, size_t start, size_t count,
+        float advance) {
+    float x = 0.0f, xLastClusterStart = 0.0f, xSearchStart = 0.0f;
+    size_t lastClusterStart = start, searchStart = start;
+    for (size_t i = start; i < start + count; i++) {
+        if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) {
+            searchStart = lastClusterStart;
+            xSearchStart = xLastClusterStart;
+        }
+        float width = layout.getCharAdvance(i - start);
+        if (width != 0.0f) {
+            lastClusterStart = i;
+            xLastClusterStart = x;
+            x += width;
+            if (x > advance) {
+                break;
+            }
+        }
+    }
+    size_t best = searchStart;
+    float bestDist = FLT_MAX;
+    for (size_t i = searchStart; i <= start + count; i++) {
+        if (GraphemeBreak::isGraphemeBreak(buf, start, count, i)) {
+            // "getRunAdvance(layout, buf, start, count, i) - advance" but more efficient
+            float delta = getRunAdvance(layout, buf, start, searchStart, count - searchStart, i)
+
+                    + xSearchStart - advance;
+            if (std::abs(delta) < bestDist) {
+                bestDist = std::abs(delta);
+                best = i;
+            }
+            if (delta >= 0.0f) {
+                break;
+            }
+        }
+    }
+    return best;
+}
+
+}
diff --git a/sample/example.cpp b/sample/example.cpp
index 487357a..3fbfa79 100644
--- a/sample/example.cpp
+++ b/sample/example.cpp
@@ -28,8 +28,8 @@
 #include <minikin/Layout.h>
 
 using std::vector;
-
-namespace android {
+using namespace android;
+using namespace minikin;
 
 FT_Library library;  // TODO: this should not be a global
 
@@ -99,8 +99,6 @@ int runMinikinTest() {
     return 0;
 }
 
-}
-
 int main(int argc, const char** argv) {
-    return android::runMinikinTest();
+    return runMinikinTest();
 }