summaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
authorRaph Levien <raph@google.com>2015-08-27 13:50:00 -0700
committerRoozbeh Pournader <roozbeh@google.com>2015-10-14 16:26:37 -0700
commit6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a (patch)
treea5ce563e5326639274f08cedb8d64e85a667f091 /app
parent90a09c3f36d98530822392446884b8af68035908 (diff)
downloadandroid_frameworks_minikin-6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a.tar.gz
android_frameworks_minikin-6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a.tar.bz2
android_frameworks_minikin-6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a.zip
Binary format for hyphenation patterns
In the current state, hyphenation in all languages than Sanskrit seems to work (case-folding edge cases). Thus, we just disable Sanskrit. Packed tries are implemented, but not the finite state machine (space/speed tradeoff). This commit contains a throw-away test app, which runs on the host. I think I want to replace it with unit tests, but I'm including it in the CL because it's useful during development. Bug: 21562869 Bug: 21826930 Bug: 23317038 Bug: 23317904 Bug: 24570591 Change-Id: I7479a565a4a062fa319651c2c14c0fa18c5ceaea (cherry picked from commit f0be43de02a1e07308d3d95408349c3c7f973430)
Diffstat (limited to 'app')
-rw-r--r--app/Android.mk36
-rw-r--r--app/HyphTool.cpp62
2 files changed, 98 insertions, 0 deletions
diff --git a/app/Android.mk b/app/Android.mk
new file mode 100644
index 0000000..2038683
--- /dev/null
+++ b/app/Android.mk
@@ -0,0 +1,36 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# see how_to_run.txt for instructions on running these tests
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := hyphtool
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_STATIC_LIBRARIES := libminikin_host
+
+# Shared libraries which are dependencies of minikin; these are not automatically
+# pulled in by the build system (and thus sadly must be repeated).
+
+LOCAL_SHARED_LIBRARIES := \
+ liblog \
+ libicuuc-host
+
+LOCAL_SRC_FILES += \
+ HyphTool.cpp
+
+include $(BUILD_HOST_EXECUTABLE)
diff --git a/app/HyphTool.cpp b/app/HyphTool.cpp
new file mode 100644
index 0000000..730abad
--- /dev/null
+++ b/app/HyphTool.cpp
@@ -0,0 +1,62 @@
+#include <stdio.h>
+#include <sys/stat.h>
+#include <string.h>
+
+#include "utils/Log.h"
+
+#include <vector>
+#include <minikin/Hyphenator.h>
+
+using android::Hyphenator;
+
+Hyphenator* loadHybFile(const char* fn) {
+ struct stat statbuf;
+ int status = stat(fn, &statbuf);
+ if (status < 0) {
+ fprintf(stderr, "error opening %s\n", fn);
+ return nullptr;
+ }
+ size_t size = statbuf.st_size;
+ FILE* f = fopen(fn, "rb");
+ if (f == NULL) {
+ fprintf(stderr, "error opening %s\n", fn);
+ return nullptr;
+ }
+ uint8_t* buf = new uint8_t[size];
+ size_t read_size = fread(buf, 1, size, f);
+ if (read_size < size) {
+ fprintf(stderr, "error reading %s\n", fn);
+ delete[] buf;
+ return nullptr;
+ }
+ return Hyphenator::loadBinary(buf);
+}
+
+int main(int argc, char** argv) {
+ Hyphenator* hyph = loadHybFile("/tmp/en.hyb"); // should also be configurable
+ std::vector<uint8_t> result;
+ std::vector<uint16_t> word;
+ if (argc < 2) {
+ fprintf(stderr, "usage: hyphtool word\n");
+ return 1;
+ }
+ char* asciiword = argv[1];
+ size_t len = strlen(asciiword);
+ for (size_t i = 0; i < len; i++) {
+ uint32_t c = asciiword[i];
+ if (c == '-') {
+ c = 0x00AD;
+ }
+ // ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :(
+ word.push_back(c);
+ }
+ hyph->hyphenate(&result, word.data(), word.size());
+ for (size_t i = 0; i < len; i++) {
+ if (result[i] != 0) {
+ printf("-");
+ }
+ printf("%c", word[i]);
+ }
+ printf("\n");
+ return 0;
+}