summaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
authorAmith Yamasani <yamasani@google.com>2010-03-09 12:46:57 -0800
committerAmith Yamasani <yamasani@google.com>2010-03-09 15:01:09 -0800
commit07b1603a3f9611f6d15dd7fcedf883d6ef8e5817 (patch)
tree3a88daaee27b886909a5af8a646b41dfb794a9f8 /native
parent81c52293f84ce475ac6b1661f4a4b92703405247 (diff)
downloadandroid_packages_inputmethods_LatinIME-07b1603a3f9611f6d15dd7fcedf883d6ef8e5817.tar.gz
android_packages_inputmethods_LatinIME-07b1603a3f9611f6d15dd7fcedf883d6ef8e5817.tar.bz2
android_packages_inputmethods_LatinIME-07b1603a3f9611f6d15dd7fcedf883d6ef8e5817.zip
Don't let the native code target be included twice when unbundling.
Move java code to a different directory so that the unbundled version doesn't try to compile the native code again. Change-Id: I05cf9e643824ddc448821f69805ccb0240c5b986
Diffstat (limited to 'native')
-rw-r--r--native/Android.mk28
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp222
-rw-r--r--native/src/basechars.h172
-rw-r--r--native/src/dictionary.cpp317
-rw-r--r--native/src/dictionary.h83
5 files changed, 822 insertions, 0 deletions
diff --git a/native/Android.mk b/native/Android.mk
new file mode 100644
index 000000000..9ba9f75ec
--- /dev/null
+++ b/native/Android.mk
@@ -0,0 +1,28 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
+
+LOCAL_SRC_FILES := \
+ jni/com_android_inputmethod_latin_BinaryDictionary.cpp \
+ src/dictionary.cpp
+
+LOCAL_C_INCLUDES += \
+ external/icu4c/common \
+ $(JNI_H_INCLUDE)
+
+LOCAL_LDLIBS := -lm
+
+LOCAL_PRELINK_MODULE := false
+
+LOCAL_SHARED_LIBRARIES := \
+ libandroid_runtime \
+ libcutils \
+ libutils \
+ libicuuc
+
+LOCAL_MODULE := libjni_latinime
+
+LOCAL_MODULE_TAGS := user
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
new file mode 100644
index 000000000..d068f3faf
--- /dev/null
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -0,0 +1,222 @@
+/*
+**
+** Copyright 2009, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#define LOG_TAG "BinaryDictionary"
+#include "utils/Log.h"
+
+#include <stdio.h>
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <nativehelper/jni.h>
+#include "utils/AssetManager.h"
+#include "utils/Asset.h"
+
+#include "dictionary.h"
+
+// ----------------------------------------------------------------------------
+
+using namespace latinime;
+
+using namespace android;
+
+static jfieldID sDescriptorField;
+static jfieldID sAssetManagerNativeField;
+static jmethodID sAddWordMethod;
+static jfieldID sDictLength;
+
+//
+// helper function to throw an exception
+//
+static void throwException(JNIEnv *env, const char* ex, const char* fmt, int data)
+{
+ if (jclass cls = env->FindClass(ex)) {
+ char msg[1000];
+ sprintf(msg, fmt, data);
+ env->ThrowNew(cls, msg);
+ env->DeleteLocalRef(cls);
+ }
+}
+
+static jint latinime_BinaryDictionary_open
+ (JNIEnv *env, jobject object, jobject assetManager, jstring resourceString,
+ jint typedLetterMultiplier, jint fullWordMultiplier)
+{
+ // Get the native file descriptor from the FileDescriptor object
+ AssetManager *am = (AssetManager*) env->GetIntField(assetManager, sAssetManagerNativeField);
+ if (!am) {
+ LOGE("DICT: Couldn't get AssetManager native peer\n");
+ return 0;
+ }
+ const char *resourcePath = env->GetStringUTFChars(resourceString, NULL);
+
+ Asset *dictAsset = am->openNonAsset(resourcePath, Asset::ACCESS_BUFFER);
+ if (dictAsset == NULL) {
+ LOGE("DICT: Couldn't get asset %s\n", resourcePath);
+ env->ReleaseStringUTFChars(resourceString, resourcePath);
+ return 0;
+ }
+
+ void *dict = (void*) dictAsset->getBuffer(false);
+ if (dict == NULL) {
+ LOGE("DICT: Dictionary buffer is null\n");
+ env->ReleaseStringUTFChars(resourceString, resourcePath);
+ return 0;
+ }
+ Dictionary *dictionary = new Dictionary(dict, typedLetterMultiplier, fullWordMultiplier);
+ dictionary->setAsset(dictAsset);
+ env->SetIntField(object, sDictLength, (jint) dictAsset->getLength());
+
+ env->ReleaseStringUTFChars(resourceString, resourcePath);
+ return (jint) dictionary;
+}
+
+static int latinime_BinaryDictionary_getSuggestions(
+ JNIEnv *env, jobject object, jint dict, jintArray inputArray, jint arraySize,
+ jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxWords,
+ jint maxAlternatives, jint skipPos, jintArray nextLettersArray, jint nextLettersSize)
+{
+ Dictionary *dictionary = (Dictionary*) dict;
+ if (dictionary == NULL)
+ return 0;
+
+ int *frequencies = env->GetIntArrayElements(frequencyArray, NULL);
+ int *inputCodes = env->GetIntArrayElements(inputArray, NULL);
+ jchar *outputChars = env->GetCharArrayElements(outputArray, NULL);
+ int *nextLetters = nextLettersArray != NULL ? env->GetIntArrayElements(nextLettersArray, NULL)
+ : NULL;
+
+ int count = dictionary->getSuggestions(inputCodes, arraySize, (unsigned short*) outputChars, frequencies,
+ maxWordLength, maxWords, maxAlternatives, skipPos, nextLetters, nextLettersSize);
+
+ env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
+ env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
+ env->ReleaseCharArrayElements(outputArray, outputChars, 0);
+ if (nextLetters) {
+ env->ReleaseIntArrayElements(nextLettersArray, nextLetters, 0);
+ }
+
+ return count;
+}
+
+static jboolean latinime_BinaryDictionary_isValidWord
+ (JNIEnv *env, jobject object, jint dict, jcharArray wordArray, jint wordLength)
+{
+ Dictionary *dictionary = (Dictionary*) dict;
+ if (dictionary == NULL) return (jboolean) false;
+
+ jchar *word = env->GetCharArrayElements(wordArray, NULL);
+ jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
+ env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
+
+ return result;
+}
+
+static void latinime_BinaryDictionary_close
+ (JNIEnv *env, jobject object, jint dict)
+{
+ Dictionary *dictionary = (Dictionary*) dict;
+ ((Asset*) dictionary->getAsset())->close();
+ delete (Dictionary*) dict;
+}
+
+// ----------------------------------------------------------------------------
+
+static JNINativeMethod gMethods[] = {
+ {"openNative", "(Landroid/content/res/AssetManager;Ljava/lang/String;II)I",
+ (void*)latinime_BinaryDictionary_open},
+ {"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
+ {"getSuggestionsNative", "(I[II[C[IIIII[II)I", (void*)latinime_BinaryDictionary_getSuggestions},
+ {"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}
+};
+
+static int registerNativeMethods(JNIEnv* env, const char* className,
+ JNINativeMethod* gMethods, int numMethods)
+{
+ jclass clazz;
+
+ clazz = env->FindClass(className);
+ if (clazz == NULL) {
+ fprintf(stderr,
+ "Native registration unable to find class '%s'\n", className);
+ return JNI_FALSE;
+ }
+ if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) {
+ fprintf(stderr, "RegisterNatives failed for '%s'\n", className);
+ return JNI_FALSE;
+ }
+
+ return JNI_TRUE;
+}
+
+static int registerNatives(JNIEnv *env)
+{
+ const char* const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
+ jclass clazz;
+
+ clazz = env->FindClass("java/io/FileDescriptor");
+ if (clazz == NULL) {
+ LOGE("Can't find %s", "java/io/FileDescriptor");
+ return -1;
+ }
+ sDescriptorField = env->GetFieldID(clazz, "descriptor", "I");
+
+ clazz = env->FindClass("android/content/res/AssetManager");
+ if (clazz == NULL) {
+ LOGE("Can't find %s", "java/io/FileDescriptor");
+ return -1;
+ }
+ sAssetManagerNativeField = env->GetFieldID(clazz, "mObject", "I");
+
+ // Get the field pointer for the dictionary length
+ clazz = env->FindClass(kClassPathName);
+ if (clazz == NULL) {
+ LOGE("Can't find %s", kClassPathName);
+ return -1;
+ }
+ sDictLength = env->GetFieldID(clazz, "mDictLength", "I");
+
+ return registerNativeMethods(env,
+ kClassPathName, gMethods, sizeof(gMethods) / sizeof(gMethods[0]));
+}
+
+/*
+ * Returns the JNI version on success, -1 on failure.
+ */
+jint JNI_OnLoad(JavaVM* vm, void* reserved)
+{
+ JNIEnv* env = NULL;
+ jint result = -1;
+
+ if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
+ fprintf(stderr, "ERROR: GetEnv failed\n");
+ goto bail;
+ }
+ assert(env != NULL);
+
+ if (!registerNatives(env)) {
+ fprintf(stderr, "ERROR: BinaryDictionary native registration failed\n");
+ goto bail;
+ }
+
+ /* success -- return valid version number */
+ result = JNI_VERSION_1_4;
+
+bail:
+ return result;
+}
diff --git a/native/src/basechars.h b/native/src/basechars.h
new file mode 100644
index 000000000..5a4406606
--- /dev/null
+++ b/native/src/basechars.h
@@ -0,0 +1,172 @@
+/**
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ */
+static unsigned short BASE_CHARS[] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
+ 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+ 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
+ 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
+ 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
+ 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
+ 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
+ 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
+ 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
+ // Manually changed df to 73
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
+ 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
+ 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
+ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
+ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
+ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
+ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
+ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
+ 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
+ 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
+ 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
+ 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
+ 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
+ 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
+ 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
+ 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
+ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
+ 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
+ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
+ 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
+ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
+ 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
+ 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
+ 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
+ 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
+ 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
+ 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
+ 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
+ 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
+ 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
+ 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
+ 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
+ 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
+ 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
+ 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
+ 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
+ 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
+ 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
+ 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
+ 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
+ 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
+ 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
+ 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
+ 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
+ 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
+ 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
+ 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
+ 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
+ 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
+ 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
+ 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
+ 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
+ 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
+ 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
+ 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
+ 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
+ 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
+ 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
+ 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
+ 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
+ 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
+ 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
+ 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
+ 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
+ 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
+ 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
+ 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
+ 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
+ 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
+ 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
+ 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
+ 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
+ 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
+ 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
+ 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
+ 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
+ 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
+ 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
+ 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
+ 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
+ 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
+ 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
+ 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
+ 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
+ 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
+ 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
+ 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
+ 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
+ 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
+ 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
+ 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
+ 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
+ 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
+ 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
+ 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
+ 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
+ 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
+ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
+ 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
+ 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
+ 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
+ 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
+ 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
+ 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
+ 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
+ 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
+ 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
+ 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
+ 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
+ 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
+ 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
+ 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
+ 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
+ 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
+ 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
+ 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
+ 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
+ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
+ 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
+};
+
+// generated with:
+// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp
new file mode 100644
index 000000000..6e6f44182
--- /dev/null
+++ b/native/src/dictionary.cpp
@@ -0,0 +1,317 @@
+/*
+**
+** Copyright 2009, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <cutils/log.h>
+
+#include <unicode/uchar.h>
+
+//#define USE_ASSET_MANAGER
+
+#ifdef USE_ASSET_MANAGER
+#include <utils/AssetManager.h>
+#include <utils/Asset.h>
+#endif
+
+#include "dictionary.h"
+#include "basechars.h"
+
+#define DEBUG_DICT 0
+
+namespace latinime {
+
+Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier)
+{
+ mDict = (unsigned char*) dict;
+ mTypedLetterMultiplier = typedLetterMultiplier;
+ mFullWordMultiplier = fullWordMultiplier;
+}
+
+Dictionary::~Dictionary()
+{
+}
+
+int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
+ int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
+ int *nextLetters, int nextLettersSize)
+{
+ int suggWords;
+ mFrequencies = frequencies;
+ mOutputChars = outWords;
+ mInputCodes = codes;
+ mInputLength = codesSize;
+ mMaxAlternatives = maxAlternatives;
+ mMaxWordLength = maxWordLength;
+ mMaxWords = maxWords;
+ mSkipPos = skipPos;
+ mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+ mNextLettersFrequencies = nextLetters;
+ mNextLettersSize = nextLettersSize;
+
+ getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
+
+ // Get the word count
+ suggWords = 0;
+ while (suggWords < mMaxWords && mFrequencies[suggWords] > 0) suggWords++;
+ if (DEBUG_DICT) LOGI("Returning %d words", suggWords);
+
+ if (DEBUG_DICT) {
+ LOGI("Next letters: ");
+ for (int k = 0; k < nextLettersSize; k++) {
+ if (mNextLettersFrequencies[k] > 0) {
+ LOGI("%c = %d,", k, mNextLettersFrequencies[k]);
+ }
+ }
+ LOGI("\n");
+ }
+ return suggWords;
+}
+
+void
+Dictionary::registerNextLetter(unsigned short c)
+{
+ if (c < mNextLettersSize) {
+ mNextLettersFrequencies[c]++;
+ }
+}
+
+unsigned short
+Dictionary::getChar(int *pos)
+{
+ unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF);
+ // If the code is 255, then actual 16 bit code follows (in big endian)
+ if (ch == 0xFF) {
+ ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF);
+ (*pos) += 2;
+ }
+ return ch;
+}
+
+int
+Dictionary::getAddress(int *pos)
+{
+ int address = 0;
+ if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) {
+ *pos += 1;
+ } else {
+ address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16;
+ address += (mDict[*pos + 1] & 0xFF) << 8;
+ address += (mDict[*pos + 2] & 0xFF);
+ *pos += 3;
+ }
+ return address;
+}
+
+int
+Dictionary::wideStrLen(unsigned short *str)
+{
+ if (!str) return 0;
+ unsigned short *end = str;
+ while (*end)
+ end++;
+ return end - str;
+}
+
+bool
+Dictionary::addWord(unsigned short *word, int length, int frequency)
+{
+ word[length] = 0;
+ if (DEBUG_DICT) {
+ char s[length + 1];
+ for (int i = 0; i <= length; i++) s[i] = word[i];
+ LOGI("Found word = %s, freq = %d : \n", s, frequency);
+ }
+
+ // Find the right insertion point
+ int insertAt = 0;
+ while (insertAt < mMaxWords) {
+ if (frequency > mFrequencies[insertAt]
+ || (mFrequencies[insertAt] == frequency
+ && length < wideStrLen(mOutputChars + insertAt * mMaxWordLength))) {
+ break;
+ }
+ insertAt++;
+ }
+ if (insertAt < mMaxWords) {
+ memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
+ (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
+ (mMaxWords - insertAt - 1) * sizeof(mFrequencies[0]));
+ mFrequencies[insertAt] = frequency;
+ memmove((char*) mOutputChars + (insertAt + 1) * mMaxWordLength * sizeof(short),
+ (char*) mOutputChars + (insertAt ) * mMaxWordLength * sizeof(short),
+ (mMaxWords - insertAt - 1) * sizeof(short) * mMaxWordLength);
+ unsigned short *dest = mOutputChars + (insertAt ) * mMaxWordLength;
+ while (length--) {
+ *dest++ = *word++;
+ }
+ *dest = 0; // NULL terminate
+ if (DEBUG_DICT) LOGI("Added word at %d\n", insertAt);
+ return true;
+ }
+ return false;
+}
+
+unsigned short
+Dictionary::toLowerCase(unsigned short c) {
+ if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
+ c = BASE_CHARS[c];
+ }
+ if (c >='A' && c <= 'Z') {
+ c |= 32;
+ } else if (c > 127) {
+ c = u_tolower(c);
+ }
+ return c;
+}
+
+bool
+Dictionary::sameAsTyped(unsigned short *word, int length)
+{
+ if (length != mInputLength) {
+ return false;
+ }
+ int *inputCodes = mInputCodes;
+ while (length--) {
+ if ((unsigned int) *inputCodes != (unsigned int) *word) {
+ return false;
+ }
+ inputCodes += mMaxAlternatives;
+ word++;
+ }
+ return true;
+}
+
+static char QUOTE = '\'';
+
+void
+Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
+ int diffs)
+{
+ // Optimization: Prune out words that are too long compared to how much was typed.
+ if (depth > maxDepth) {
+ return;
+ }
+ if (diffs > mMaxEditDistance) {
+ return;
+ }
+ int count = getCount(&pos);
+ int *currentChars = NULL;
+ if (mInputLength <= inputIndex) {
+ completion = true;
+ } else {
+ currentChars = mInputCodes + (inputIndex * mMaxAlternatives);
+ }
+
+ for (int i = 0; i < count; i++) {
+ unsigned short c = getChar(&pos);
+ unsigned short lowerC = toLowerCase(c);
+ bool terminal = getTerminal(&pos);
+ int childrenAddress = getAddress(&pos);
+ int freq = 1;
+ if (terminal) freq = getFreq(&pos);
+ // If we are only doing completions, no need to look at the typed characters.
+ if (completion) {
+ mWord[depth] = c;
+ if (terminal) {
+ addWord(mWord, depth + 1, freq * snr);
+ if (depth >= mInputLength && mSkipPos < 0) {
+ registerNextLetter(mWord[mInputLength]);
+ }
+ }
+ if (childrenAddress != 0) {
+ getWordsRec(childrenAddress, depth + 1, maxDepth,
+ completion, snr, inputIndex, diffs);
+ }
+ } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
+ // Skip the ' or other letter and continue deeper
+ mWord[depth] = c;
+ if (childrenAddress != 0) {
+ getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
+ }
+ } else {
+ int j = 0;
+ while (currentChars[j] > 0) {
+ if (currentChars[j] == lowerC || currentChars[j] == c) {
+ int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
+ mWord[depth] = c;
+ if (mInputLength == inputIndex + 1) {
+ if (terminal) {
+ if (//INCLUDE_TYPED_WORD_IF_VALID ||
+ !sameAsTyped(mWord, depth + 1)) {
+ int finalFreq = freq * snr * addedWeight;
+ if (mSkipPos < 0) finalFreq *= mFullWordMultiplier;
+ addWord(mWord, depth + 1, finalFreq);
+ }
+ }
+ if (childrenAddress != 0) {
+ getWordsRec(childrenAddress, depth + 1,
+ maxDepth, true, snr * addedWeight, inputIndex + 1,
+ diffs + (j > 0));
+ }
+ } else if (childrenAddress != 0) {
+ getWordsRec(childrenAddress, depth + 1, maxDepth,
+ false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
+ }
+ }
+ j++;
+ if (mSkipPos >= 0) break;
+ }
+ }
+ }
+}
+
+bool
+Dictionary::isValidWord(unsigned short *word, int length)
+{
+ return isValidWordRec(0, word, 0, length);
+}
+
+bool
+Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
+ int count = getCount(&pos);
+ unsigned short currentChar = (unsigned short) word[offset];
+ for (int j = 0; j < count; j++) {
+ unsigned short c = getChar(&pos);
+ int terminal = getTerminal(&pos);
+ int childPos = getAddress(&pos);
+ if (c == currentChar) {
+ if (offset == length - 1) {
+ if (terminal) {
+ return true;
+ }
+ } else {
+ if (childPos != 0) {
+ if (isValidWordRec(childPos, word, offset + 1, length)) {
+ return true;
+ }
+ }
+ }
+ }
+ if (terminal) {
+ getFreq(&pos);
+ }
+ // There could be two instances of each alphabet - upper and lower case. So continue
+ // looking ...
+ }
+ return false;
+}
+
+
+} // namespace latinime
diff --git a/native/src/dictionary.h b/native/src/dictionary.h
new file mode 100644
index 000000000..3749f3d88
--- /dev/null
+++ b/native/src/dictionary.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICTIONARY_H
+#define LATINIME_DICTIONARY_H
+
+namespace latinime {
+
+// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
+#define ADDRESS_MASK 0x3FFFFF
+
+// The bit that decides if an address follows in the next 22 bits
+#define FLAG_ADDRESS_MASK 0x40
+// The bit that decides if this is a terminal node for a word. The node could still have children,
+// if the word has other endings.
+#define FLAG_TERMINAL_MASK 0x80
+
+class Dictionary {
+public:
+ Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
+ int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
+ int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
+ int *nextLetters, int nextLettersSize);
+ bool isValidWord(unsigned short *word, int length);
+ void setAsset(void *asset) { mAsset = asset; }
+ void *getAsset() { return mAsset; }
+ ~Dictionary();
+
+private:
+
+ int getAddress(int *pos);
+ bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
+ int getFreq(int *pos) { return mDict[(*pos)++] & 0xFF; }
+ int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
+ unsigned short getChar(int *pos);
+ int wideStrLen(unsigned short *str);
+
+ bool sameAsTyped(unsigned short *word, int length);
+ bool addWord(unsigned short *word, int length, int frequency);
+ unsigned short toLowerCase(unsigned short c);
+ void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
+ int inputIndex, int diffs);
+ bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
+ void registerNextLetter(unsigned short c);
+
+ unsigned char *mDict;
+ void *mAsset;
+
+ int *mFrequencies;
+ int mMaxWords;
+ int mMaxWordLength;
+ unsigned short *mOutputChars;
+ int *mInputCodes;
+ int mInputLength;
+ int mMaxAlternatives;
+ unsigned short mWord[128];
+ int mSkipPos;
+ int mMaxEditDistance;
+
+ int mFullWordMultiplier;
+ int mTypedLetterMultiplier;
+ int *mNextLettersFrequencies;
+ int mNextLettersSize;
+};
+
+// ----------------------------------------------------------------------------
+
+}; // namespace latinime
+
+#endif // LATINIME_DICTIONARY_H