summaryrefslogtreecommitdiffstats
path: root/include/minikin/Hyphenator.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/minikin/Hyphenator.h')
-rw-r--r--include/minikin/Hyphenator.h39
1 files changed, 30 insertions, 9 deletions
diff --git a/include/minikin/Hyphenator.h b/include/minikin/Hyphenator.h
index 581c657..9605205 100644
--- a/include/minikin/Hyphenator.h
+++ b/include/minikin/Hyphenator.h
@@ -26,11 +26,8 @@
namespace android {
-class Trie {
-public:
- std::vector<uint8_t> result;
- std::unordered_map<uint16_t, Trie> succ;
-};
+// hyb file header; implementation details are in the .cpp file
+struct Header;
class Hyphenator {
public:
@@ -44,19 +41,43 @@ public:
// Example: word is "hyphen", result is [0 0 1 0 0 0], corresponding to "hy-phen".
void hyphenate(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+ // pattern data is in binary format, as described in doc/hyb_file_format.md. Note:
+ // the caller is responsible for ensuring that the lifetime of the pattern data is
+ // at least as long as the Hyphenator object.
+
+ // Note: nullptr is valid input, in which case the hyphenator only processes soft hyphens
+ static Hyphenator* loadBinary(const uint8_t* patternData);
+
private:
- void addPattern(const uint16_t* pattern, size_t size);
+ // apply soft hyphens only, ignoring patterns
+ void hyphenateSoft(uint8_t* result, const uint16_t* word, size_t len);
- void hyphenateSoft(std::vector<uint8_t>* result, const uint16_t* word, size_t len);
+ // try looking up word in alphabet table, return false if any code units fail to map
+ // Note that this methor writes len+2 entries into alpha_codes (including start and stop)
+ bool alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len);
+
+ // calculate hyphenation from patterns, assuming alphabet lookup has already been done
+ void hyphenateFromCodes(uint8_t* result, const uint16_t* codes, size_t len);
// TODO: these should become parameters, as they might vary by locale, screen size, and
// possibly explicit user control.
static const int MIN_PREFIX = 2;
static const int MIN_SUFFIX = 3;
- Trie root;
+ // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is used so
+ // that temporary buffers can be stack-allocated without waste, which is a slightly
+ // different use case. It measures UTF-16 code units.
+ static const size_t MAX_HYPHENATED_SIZE = 64;
+
+ const uint8_t* patternData;
+
+ // accessors for binary data
+ const Header* getHeader() const {
+ return reinterpret_cast<const Header*>(patternData);
+ }
+
};
} // namespace android
-#endif // MINIKIN_HYPHENATOR_H \ No newline at end of file
+#endif // MINIKIN_HYPHENATOR_H