diff options
author | Raph Levien <raph@google.com> | 2015-08-27 13:50:00 -0700 |
---|---|---|
committer | Roozbeh Pournader <roozbeh@google.com> | 2015-10-14 16:26:37 -0700 |
commit | 6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a (patch) | |
tree | a5ce563e5326639274f08cedb8d64e85a667f091 /app | |
parent | 90a09c3f36d98530822392446884b8af68035908 (diff) | |
download | android_frameworks_minikin-6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a.tar.gz android_frameworks_minikin-6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a.tar.bz2 android_frameworks_minikin-6e2cccdc518f8d3424c84ae6fbe0e87ae3c3f66a.zip |
Binary format for hyphenation patterns
In the current state, hyphenation in all languages than Sanskrit seems
to work (case-folding edge cases). Thus, we just disable Sanskrit.
Packed tries are implemented, but not the finite state machine
(space/speed tradeoff).
This commit contains a throw-away test app, which runs on the host.
I think I want to replace it with unit tests, but I'm including it in
the CL because it's useful during development.
Bug: 21562869
Bug: 21826930
Bug: 23317038
Bug: 23317904
Bug: 24570591
Change-Id: I7479a565a4a062fa319651c2c14c0fa18c5ceaea
(cherry picked from commit f0be43de02a1e07308d3d95408349c3c7f973430)
Diffstat (limited to 'app')
-rw-r--r-- | app/Android.mk | 36 | ||||
-rw-r--r-- | app/HyphTool.cpp | 62 |
2 files changed, 98 insertions, 0 deletions
diff --git a/app/Android.mk b/app/Android.mk new file mode 100644 index 0000000..2038683 --- /dev/null +++ b/app/Android.mk @@ -0,0 +1,36 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# see how_to_run.txt for instructions on running these tests + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := hyphtool +LOCAL_MODULE_TAGS := optional + +LOCAL_STATIC_LIBRARIES := libminikin_host + +# Shared libraries which are dependencies of minikin; these are not automatically +# pulled in by the build system (and thus sadly must be repeated). + +LOCAL_SHARED_LIBRARIES := \ + liblog \ + libicuuc-host + +LOCAL_SRC_FILES += \ + HyphTool.cpp + +include $(BUILD_HOST_EXECUTABLE) diff --git a/app/HyphTool.cpp b/app/HyphTool.cpp new file mode 100644 index 0000000..730abad --- /dev/null +++ b/app/HyphTool.cpp @@ -0,0 +1,62 @@ +#include <stdio.h> +#include <sys/stat.h> +#include <string.h> + +#include "utils/Log.h" + +#include <vector> +#include <minikin/Hyphenator.h> + +using android::Hyphenator; + +Hyphenator* loadHybFile(const char* fn) { + struct stat statbuf; + int status = stat(fn, &statbuf); + if (status < 0) { + fprintf(stderr, "error opening %s\n", fn); + return nullptr; + } + size_t size = statbuf.st_size; + FILE* f = fopen(fn, "rb"); + if (f == NULL) { + fprintf(stderr, "error opening %s\n", fn); + return nullptr; + } + uint8_t* buf = new uint8_t[size]; + size_t read_size = fread(buf, 1, size, f); + if (read_size < size) { + fprintf(stderr, "error reading %s\n", fn); + delete[] buf; + return nullptr; + } + return Hyphenator::loadBinary(buf); +} + +int main(int argc, char** argv) { + Hyphenator* hyph = loadHybFile("/tmp/en.hyb"); // should also be configurable + std::vector<uint8_t> result; + std::vector<uint16_t> word; + if (argc < 2) { + fprintf(stderr, "usage: hyphtool word\n"); + return 1; + } + char* asciiword = argv[1]; + size_t len = strlen(asciiword); + for (size_t i = 0; i < len; i++) { + uint32_t c = asciiword[i]; + if (c == '-') { + c = 0x00AD; + } + // ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :( + word.push_back(c); + } + hyph->hyphenate(&result, word.data(), word.size()); + for (size_t i = 0; i < len; i++) { + if (result[i] != 0) { + printf("-"); + } + printf("%c", word[i]); + } + printf("\n"); + return 0; +} |