summaryrefslogtreecommitdiffstats
path: root/include/minikin/WordBreaker.h
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2015-09-04 17:23:05 -0700
committerRaph Levien <raph@google.com>2016-02-16 22:05:07 -0800
commit57b6dae9894b9362ef04517ff477fd491f9d433b (patch)
tree0a3487f577cd3449c1789c0ecbc1c8ebbb5d9b8d /include/minikin/WordBreaker.h
parent070633ad657e20344fa9d9e7ab79ebb311365aa9 (diff)
downloadandroid_frameworks_minikin-57b6dae9894b9362ef04517ff477fd491f9d433b.tar.gz
android_frameworks_minikin-57b6dae9894b9362ef04517ff477fd491f9d433b.tar.bz2
android_frameworks_minikin-57b6dae9894b9362ef04517ff477fd491f9d433b.zip
Refine hyphenation around punctuation
Implement a WordBreaker that defines our concept of valid word boundaries, customizing the ICU behavior. Currently, we suppress line breaks at soft hyphens (these are handled specially). Also, the new WordBreaker class has methods that determine the start and end of the word (punctuation stripped) for the purpose of hyphenation. This patch, in its current form, doesn't handle email addresses and URLs specially, but the WordBreaker class is the correct place to do so. Also, special case handling of hyphens and dashes is still done in LineBreaker, but all of that should be moved to WordBreaker. Bug: 20126487 Bug: 20566159 Change-Id: I492cbad963f9b74a2915f010dad46bb91f97b2fe
Diffstat (limited to 'include/minikin/WordBreaker.h')
-rw-r--r--include/minikin/WordBreaker.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/include/minikin/WordBreaker.h b/include/minikin/WordBreaker.h
new file mode 100644
index 0000000..22275bd
--- /dev/null
+++ b/include/minikin/WordBreaker.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A wrapper around ICU's line break iterator, that gives customized line
+ * break opportunities, as well as identifying words for the purpose of
+ * hyphenation.
+ */
+
+#ifndef MINIKIN_WORD_BREAKER_H
+#define MINIKIN_WORD_BREAKER_H
+
+#include "unicode/brkiter.h"
+#include <memory>
+
+namespace android {
+
+class WordBreaker {
+public:
+ ~WordBreaker() {
+ finish();
+ }
+
+ void setLocale(const icu::Locale& locale);
+
+ void setText(const uint16_t* data, size_t size);
+
+ // Advance iterator to next word break. Return offset, or -1 if EOT
+ ssize_t next();
+
+ // Current offset of iterator, equal to 0 at BOT or last return from next()
+ ssize_t current() const;
+
+ // After calling next(), wordStart() and wordEnd() are offsets defining the previous
+ // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
+ ssize_t wordStart() const;
+
+ ssize_t wordEnd() const;
+
+ void finish();
+
+private:
+ std::unique_ptr<icu::BreakIterator> mBreakIterator;
+ UText mUText = UTEXT_INITIALIZER;
+ const uint16_t* mText = nullptr;
+ size_t mTextSize;
+ ssize_t mLast;
+ ssize_t mCurrent;
+ bool mIteratorWasReset;
+};
+
+} // namespace
+
+#endif // MINIKIN_WORD_BREAKER_H