From adaf42f0d3444de2b0bb977ccc94801458497f46 Mon Sep 17 00:00:00 2001
From: Raph Levien <raph@google.com>
Date: Thu, 29 Oct 2015 11:39:58 -0700
Subject: Accept variation selector in emoji sequences - DO NOT MERGE

This patch basically ignores variation selectors for the purpose of
itemization into font runs. This allows GSUB to be applied when input
sequences contain variation selectors.

Bug: 25368653
Change-Id: I9c1d325ae0cd322c21b7e850d0ec4d73551b2372
---
 libs/minikin/FontCollection.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/libs/minikin/FontCollection.cpp b/libs/minikin/FontCollection.cpp
index b4bfe31..2bcbc03 100644
--- a/libs/minikin/FontCollection.cpp
+++ b/libs/minikin/FontCollection.cpp
@@ -167,6 +167,10 @@ static bool isStickyWhitelisted(uint32_t c) {
     return false;
 }
 
+static bool isVariationSelector(uint32_t c) {
+    return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
+}
+
 void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
         vector<Run>* result) const {
     FontLanguage lang = style.getLanguage();
@@ -184,9 +188,11 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty
                 nShorts = 2;
             }
         }
-        // Continue using existing font as long as it has coverage and is whitelisted
+        // Continue using existing font as long as it has coverage and is whitelisted;
+        // also variation sequences continue existing run.
         if (lastFamily == NULL
-                || !(isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))) {
+                || !((isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))
+                      || isVariationSelector(ch))) {
             FontFamily* family = getFamilyForChar(ch, lang, variant);
             if (i == 0 || family != lastFamily) {
                 size_t start = i;
-- 
cgit v1.2.3


From c65e6f1ee0b2f32183766726ac459188b1a37b35 Mon Sep 17 00:00:00 2001
From: Raph Levien <raph@google.com>
Date: Mon, 2 Nov 2015 17:17:24 -0800
Subject: Suppress linebreaks in emoji ZWJ sequences - DO NOT MERGE

Due to the way emoji ZWJ sequences are defined, the ICU line breaking
algorithm determines that there are valid line breaks inside the
sequence. This patch suppresses these line breaks.

Bug: 25433289
Change-Id: I225ebebc0f4186e4b8f48fee399c4a62b3f0218a
---
 libs/minikin/LineBreaker.cpp | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/libs/minikin/LineBreaker.cpp b/libs/minikin/LineBreaker.cpp
index a832ca2..77374fe 100644
--- a/libs/minikin/LineBreaker.cpp
+++ b/libs/minikin/LineBreaker.cpp
@@ -17,6 +17,7 @@
 #define VERBOSE_DEBUG 0
 
 #include <limits>
+#include <unicode/utf16.h>
 
 #define LOG_TAG "Minikin"
 #include <cutils/log.h>
@@ -30,6 +31,7 @@ namespace android {
 
 const int CHAR_TAB = 0x0009;
 const uint16_t CHAR_SOFT_HYPHEN = 0x00AD;
+const uint16_t CHAR_ZWJ = 0x200D;
 
 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
 // constants are larger than any reasonable actual width score.
@@ -123,6 +125,32 @@ static bool isLineBreakingHyphen(uint16_t c) {
             c == 0x2E40);  // DOUBLE HYPHEN
 }
 
+/**
+ * Determine whether a line break at position i within the buffer buf is valid. This
+ * represents customization beyond the ICU behavior, because plain ICU provides some
+ * line break opportunities that we don't want.
+ **/
+static bool isBreakValid(uint16_t codeUnit, const uint16_t* buf, size_t bufEnd, size_t i) {
+    if (codeUnit == CHAR_SOFT_HYPHEN) {
+        return false;
+    }
+    if (codeUnit == CHAR_ZWJ) {
+        // Possible emoji ZWJ sequence
+        uint32_t next_codepoint;
+        U16_NEXT(buf, i, bufEnd, next_codepoint);
+        if (next_codepoint == 0x2764 ||       // HEAVY BLACK HEART
+                next_codepoint == 0x1F466 ||  // BOY
+                next_codepoint == 0x1F467 ||  // GIRL
+                next_codepoint == 0x1F468 ||  // MAN
+                next_codepoint == 0x1F469 ||  // WOMAN
+                next_codepoint == 0x1F48B ||  // KISS MARK
+                next_codepoint == 0x1F5E8) {  // LEFT SPEECH BUBBLE
+            return false;
+        }
+    }
+    return true;
+}
+
 // Ordinarily, this method measures the text in the range given. However, when paint
 // is nullptr, it assumes the widths have already been calculated and stored in the
 // width buffer.
@@ -175,8 +203,9 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa
         }
         if (i + 1 == current) {
             // Override ICU's treatment of soft hyphen as a break opportunity, because we want it
-            // to be a hyphen break, with penalty and drawing behavior.
-            if (c != CHAR_SOFT_HYPHEN) {
+            // to be a hyphen break, with penalty and drawing behavior. Also, suppress line
+            // breaks within emoji ZWJ sequences.
+            if (isBreakValid(c, mTextBuf.data(), end, i + 1)) {
                 // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so
                 // we can pass the whole word down to Hyphenator like the soft hyphen case.
                 bool wordEndsInHyphen = isLineBreakingHyphen(c);
-- 
cgit v1.2.3


From 6299a6ba13906c695f7a4f6748f7bc5856a110e5 Mon Sep 17 00:00:00 2001
From: Raph Levien <raph@google.com>
Date: Mon, 30 Nov 2015 15:04:59 -0800
Subject: Avoid integer overflows in parsing fonts

A malformed TTF can cause size calculations to overflow. This patch
checks the maximum reasonable value so that the total size fits in 32
bits. It also adds some explicit casting to avoid possible technical
undefined behavior when parsing sized unsigned values.

Bug: 25645298
Change-Id: Id4716132041a6f4f1fbb73ec4e445391cf7d9616
(cherry picked from commit 183c9ec2800baa2ce099ee260c6cbc6121cf1274)
---
 libs/minikin/CmapCoverage.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libs/minikin/CmapCoverage.cpp b/libs/minikin/CmapCoverage.cpp
index 4156d69..8be45d1 100644
--- a/libs/minikin/CmapCoverage.cpp
+++ b/libs/minikin/CmapCoverage.cpp
@@ -30,11 +30,12 @@ namespace android {
 
 // These could perhaps be optimized to use __builtin_bswap16 and friends.
 static uint32_t readU16(const uint8_t* data, size_t offset) {
-    return data[offset] << 8 | data[offset + 1];
+    return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
 }
 
 static uint32_t readU32(const uint8_t* data, size_t offset) {
-    return data[offset] << 24 | data[offset + 1] << 16 | data[offset + 2] << 8 | data[offset + 3];
+    return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
+        ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
 }
 
 static void addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) {
@@ -101,11 +102,13 @@ static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data,
     const size_t kGroupSize = 12;
     const size_t kStartCharCodeOffset = 0;
     const size_t kEndCharCodeOffset = 4;
+    const size_t kMaxNGroups = 0xfffffff0 / kGroupSize;  // protection against overflow
+    // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
     if (kFirstGroupOffset > size) {
         return false;
     }
     uint32_t nGroups = readU32(data, kNGroupsOffset);
-    if (kFirstGroupOffset + nGroups * kGroupSize > size) {
+    if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
         return false;
     }
     for (uint32_t i = 0; i < nGroups; i++) {
-- 
cgit v1.2.3


From ca8ac8acdad662230ae37998c6c4091bb39402b6 Mon Sep 17 00:00:00 2001
From: Raph Levien <raph@google.com>
Date: Wed, 6 Jan 2016 14:31:23 -0800
Subject: Reject fonts with invalid ranges in cmap

A corrupt or malicious font may have a negative size in its cmap
range, which in turn could lead to memory corruption. This patch
detects the case and rejects the font, and also includes an assertion
in the sparse bit set implementation if we missed any such case.

External issue:
https://code.google.com/p/android/issues/detail?id=192618

Bug: 26413177
Change-Id: Icc0c80e4ef389abba0964495b89aa0fae3e9f4b2
---
 libs/minikin/CmapCoverage.cpp | 41 +++++++++++++++++++++++++----------------
 libs/minikin/SparseBitSet.cpp |  2 ++
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/libs/minikin/CmapCoverage.cpp b/libs/minikin/CmapCoverage.cpp
index 8be45d1..4c9643a 100644
--- a/libs/minikin/CmapCoverage.cpp
+++ b/libs/minikin/CmapCoverage.cpp
@@ -50,7 +50,7 @@ static void addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) {
     }
 }
 
-// Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
+// Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
 static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
     const size_t kSegCountOffset = 6;
     const size_t kEndCountOffset = 14;
@@ -64,28 +64,32 @@ static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data,
         return false;
     }
     for (size_t i = 0; i < segCount; i++) {
-        int end = readU16(data, kEndCountOffset + 2 * i);
-        int start = readU16(data, kHeaderSize + 2 * (segCount + i));
-        int rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
+        uint32_t end = readU16(data, kEndCountOffset + 2 * i);
+        uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
+        if (end < start) {
+            // invalid segment range: size must be positive
+            return false;
+        }
+        uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
         if (rangeOffset == 0) {
-            int delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
+            uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
             if (((end + delta) & 0xffff) > end - start) {
                 addRange(coverage, start, end + 1);
             } else {
-                for (int j = start; j < end + 1; j++) {
+                for (uint32_t j = start; j < end + 1; j++) {
                     if (((j + delta) & 0xffff) != 0) {
                         addRange(coverage, j, j + 1);
                     }
                 }
             }
         } else {
-            for (int j = start; j < end + 1; j++) {
+            for (uint32_t j = start; j < end + 1; j++) {
                 uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset +
                     (i + j - start) * 2;
                 if (actualRangeOffset + 2 > size) {
                     return false;
                 }
-                int glyphId = readU16(data, actualRangeOffset);
+                uint32_t glyphId = readU16(data, actualRangeOffset);
                 if (glyphId != 0) {
                     addRange(coverage, j, j + 1);
                 }
@@ -115,6 +119,10 @@ static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data,
         uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
         uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
         uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
+        if (end < start) {
+            // invalid group range: size must be positive
+            return false;
+        }
         addRange(coverage, start, end + 1);  // file is inclusive, vector is exclusive
     }
     return true;
@@ -128,18 +136,19 @@ bool CmapCoverage::getCoverage(SparseBitSet& coverage, const uint8_t* cmap_data,
     const size_t kPlatformIdOffset = 0;
     const size_t kEncodingIdOffset = 2;
     const size_t kOffsetOffset = 4;
-    const int kMicrosoftPlatformId = 3;
-    const int kUnicodeBmpEncodingId = 1;
-    const int kUnicodeUcs4EncodingId = 10;
+    const uint16_t kMicrosoftPlatformId = 3;
+    const uint16_t kUnicodeBmpEncodingId = 1;
+    const uint16_t kUnicodeUcs4EncodingId = 10;
+    const uint32_t kNoTable = UINT32_MAX;
     if (kHeaderSize > cmap_size) {
         return false;
     }
-    int numTables = readU16(cmap_data, kNumTablesOffset);
+    uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
     if (kHeaderSize + numTables * kTableSize > cmap_size) {
         return false;
     }
-    int bestTable = -1;
-    for (int i = 0; i < numTables; i++) {
+    uint32_t bestTable = kNoTable;
+    for (uint32_t i = 0; i < numTables; i++) {
         uint16_t platformId = readU16(cmap_data, kHeaderSize + i * kTableSize + kPlatformIdOffset);
         uint16_t encodingId = readU16(cmap_data, kHeaderSize + i * kTableSize + kEncodingIdOffset);
         if (platformId == kMicrosoftPlatformId && encodingId == kUnicodeUcs4EncodingId) {
@@ -152,11 +161,11 @@ bool CmapCoverage::getCoverage(SparseBitSet& coverage, const uint8_t* cmap_data,
 #ifdef PRINTF_DEBUG
     printf("best table = %d\n", bestTable);
 #endif
-    if (bestTable < 0) {
+    if (bestTable == kNoTable) {
         return false;
     }
     uint32_t offset = readU32(cmap_data, kHeaderSize + bestTable * kTableSize + kOffsetOffset);
-    if (offset + 2 > cmap_size) {
+    if (offset > cmap_size - 2) {
         return false;
     }
     uint16_t format = readU16(cmap_data, offset);
diff --git a/libs/minikin/SparseBitSet.cpp b/libs/minikin/SparseBitSet.cpp
index 7acb7ba..2265ff2 100644
--- a/libs/minikin/SparseBitSet.cpp
+++ b/libs/minikin/SparseBitSet.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cutils/log.h>
 #include <stddef.h>
 #include <string.h>
 #include <minikin/SparseBitSet.h>
@@ -71,6 +72,7 @@ void SparseBitSet::initFromRanges(const uint32_t* ranges, size_t nRanges) {
     for (size_t i = 0; i < nRanges; i++) {
         uint32_t start = ranges[i * 2];
         uint32_t end = ranges[i * 2 + 1];
+        LOG_ALWAYS_FATAL_IF(end < start);  // make sure range size is nonnegative
         uint32_t startPage = start >> kLogValuesPerPage;
         uint32_t endPage = (end - 1) >> kLogValuesPerPage;
         if (startPage >= nonzeroPageEnd) {
-- 
cgit v1.2.3


From 2e98eb6be123ec64070f7524ff24df538695d7c8 Mon Sep 17 00:00:00 2001
From: Raph Levien <raph@google.com>
Date: Thu, 29 Oct 2015 14:06:07 -0700
Subject: Tailor grapheme boundaries so sequence emoji are one grapheme - DO
 NOT MERGE

Make it so it's not possible to position the cursor inside an emoji
formed by a sequence including zero-width joiners.

Bug: 25368653
Change-Id: I67ec0874cd1505f3c82ab91492ffc3d39a52fae6
---
 libs/minikin/GraphemeBreak.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/libs/minikin/GraphemeBreak.cpp b/libs/minikin/GraphemeBreak.cpp
index f8f386c..56d5b23 100644
--- a/libs/minikin/GraphemeBreak.cpp
+++ b/libs/minikin/GraphemeBreak.cpp
@@ -22,6 +22,19 @@
 
 namespace android {
 
+// Returns true if the character appears before or after zwj in a zwj emoji sequence. See
+// http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
+bool isZwjEmoji(uint32_t c) {
+    return (c == 0x2764       // HEAVY BLACK HEART
+            || c == 0x1F468   // MAN
+            || c == 0x1F469   // WOMAN
+            || c == 0x1F48B   // KISS MARK
+            || c == 0x1F466   // BOY
+            || c == 0x1F467   // GIRL
+            || c == 0x1F441   // EYE
+            || c == 0x1F5E8); // LEFT SPEECH BUBBLE
+}
+
 bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
         size_t offset) {
     // This implementation closely follows Unicode Standard Annex #29 on
@@ -93,6 +106,19 @@ bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t co
             && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
         return false;
     }
+    // Tailoring: make emoji sequences with ZWJ a single grapheme cluster
+    if (c1 == 0x200D && isZwjEmoji(c2) && offset_back > start) {
+        // look at character before ZWJ to see that both can participate in an emoji zwj sequence
+        uint32_t c0 = 0;
+        U16_PREV(buf, start, offset_back, c0);
+        if (c0 == 0xFE0F && offset_back > start) {
+            // skip over emoji variation selector
+            U16_PREV(buf, start, offset_back, c0);
+        }
+        if (isZwjEmoji(c0)) {
+            return false;
+        }
+    }
     // Rule GB10, Any / Any
     return true;
 }
-- 
cgit v1.2.3