summaryrefslogtreecommitdiffstats
path: root/native
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-11-11 21:21:03 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-11-11 21:21:04 +0000
commitda99cfc29d71f7aa417b4646234d532ed5dcb7de (patch)
treeee7e78c20d6e2c8b3434654f30659c65cf20e42c /native
parent7a3e6242103b7637171c6430bb9cca916583bfc0 (diff)
parentcd105409739db85f5f325e7cca322dadb12cded7 (diff)
downloadandroid_packages_inputmethods_LatinIME-da99cfc29d71f7aa417b4646234d532ed5dcb7de.tar.gz
android_packages_inputmethods_LatinIME-da99cfc29d71f7aa417b4646234d532ed5dcb7de.tar.bz2
android_packages_inputmethods_LatinIME-da99cfc29d71f7aa417b4646234d532ed5dcb7de.zip
Merge "Introduce OffdeviceIntermediateDict for dicttolkit."
Diffstat (limited to 'native')
-rw-r--r--native/dicttoolkit/NativeFileList.mk7
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp126
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h48
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h79
-rw-r--r--native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h48
-rw-r--r--native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp84
6 files changed, 391 insertions, 1 deletions
diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk
index 925ec458b..b39a24890 100644
--- a/native/dicttoolkit/NativeFileList.mk
+++ b/native/dicttoolkit/NativeFileList.mk
@@ -22,8 +22,13 @@ LATIN_IME_DICT_TOOLKIT_SRC_FILES := \
help_executor.cpp \
info_executor.cpp \
makedict_executor.cpp) \
+ $(addprefix offdevice_intermediate_dict/, \
+ offdevice_intermediate_dict.cpp) \
utils/command_utils.cpp
LATIN_IME_DICT_TOOLKIT_TEST_FILES := \
dict_toolkit_defines_test.cpp \
- utils/command_utils_test.cpp
+ $(addprefix offdevice_intermediate_dict/, \
+ offdevice_intermediate_dict_test.cpp) \
+ $(addprefix utils/, \
+ command_utils_test.cpp)
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
new file mode 100644
index 000000000..af28131cf
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
+ const CodePointArrayView codePoints = wordProperty.getCodePoints();
+ if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
+ return false;
+ }
+ return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
+}
+
+bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
+ const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
+ auto ptNodeList = ptNodeArray.getMutablePtNodeList();
+ auto ptNodeIt = ptNodeList->begin();
+ for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
+ const auto &ptNode = *ptNodeIt;
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[0] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[0] > ptNodeCodePoints[0]) {
+ break;
+ }
+ size_t i = 1;
+ for (; i < codePoints.size(); ++i) {
+ if (i >= ptNodeCodePoints.size()) {
+ // Add new child.
+ return addWordInner(codePoints.skip(i), wordProperty,
+ ptNode->getChildrenPtNodeArray());
+ }
+ if (codePoints[i] != ptNodeCodePoints[i]) {
+ break;
+ }
+ }
+ if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
+ // All code points matched.
+ if (ptNode->getWordProperty()) {
+ // Adding the same word multiple times is not supported.
+ return false;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ return true;
+ }
+ // The (i+1)-th elements are different.
+ // Create and Add new parent ptNode for the common part.
+ auto newPtNode = codePoints.size() == i
+ ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
+ : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
+ ptNodeList->insert(ptNodeIt, newPtNode);
+ OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
+ newPtNode->getChildrenPtNodeArray();
+ // Add new child for the existing ptNode.
+ childrenPtNodeArray.getMutablePtNodeList()->push_back(
+ std::make_shared<OffdeviceIntermediateDictPtNode>(
+ ptNodeCodePoints.skip(i), *ptNode));
+ ptNodeList->erase(ptNodeIt);
+ if (codePoints.size() != i) {
+ // Add a child for the new word.
+ return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
+ }
+ return true;
+ }
+ ptNodeList->insert(ptNodeIt,
+ std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
+ return true;
+}
+
+const WordProperty *OffdeviceIntermediateDict::getWordProperty(
+ const CodePointArrayView codePoints) const {
+ const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
+ for (size_t i = 0; i < codePoints.size();) {
+ bool foundNext = false;
+ for (const auto ptNode : ptNodeArray->getPtNodeList()) {
+ const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+ if (codePoints[i] < ptNodeCodePoints[0]) {
+ continue;
+ }
+ if (codePoints[i] > ptNodeCodePoints[0]
+ || codePoints.size() < ptNodeCodePoints.size()) {
+ return nullptr;
+ }
+ for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
+ if (codePoints[i + j] != ptNodeCodePoints[j]) {
+ return nullptr;
+ }
+ }
+ i += ptNodeCodePoints.size();
+ if (i == codePoints.size()) {
+ return ptNode->getWordProperty();
+ }
+ ptNodeArray = &ptNode->getChildrenPtNodeArray();
+ foundNext = true;
+ break;
+ }
+ if (!foundNext) {
+ break;
+ }
+ }
+ return nullptr;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
new file mode 100644
index 000000000..dfdb6a897
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/**
+ * On memory patricia trie to represent a dictionary.
+ */
+class OffdeviceIntermediateDict final {
+ public:
+ bool addWord(const WordProperty &wordProperty);
+ // The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
+ const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
+
+ OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
+
+ bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
+ OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
new file mode 100644
index 000000000..721ccd778
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode final {
+ public:
+ // Non-terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(nullptr) {}
+
+ // Terminal
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const WordProperty &wordProperty)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ // Replacing PtNodeCodePoints.
+ OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNodeCodePoints.toVector()),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
+
+ // Replacing WordProperty.
+ OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
+ const OffdeviceIntermediateDictPtNode &ptNode)
+ : mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
+ mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+ mWortProperty(new WordProperty(wordProperty)) {}
+
+ const WordProperty *getWordProperty() const {
+ return mWortProperty.get();
+ }
+
+ const CodePointArrayView getPtNodeCodePoints() const {
+ return CodePointArrayView(mPtNodeCodePoints);
+ }
+
+ OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
+ return mChildrenPtNodeArray;
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
+
+ const std::vector<int> mPtNodeCodePoints;
+ OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
+ const std::unique_ptr<WordProperty> mWortProperty;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
new file mode 100644
index 000000000..f87456ce0
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+
+#include <list>
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode;
+
+class OffdeviceIntermediateDictPtNodeArray final {
+ public:
+ const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
+ return mPtNodes;
+ }
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
+ return &mPtNodes;
+ }
+
+ private:
+ DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
+
+ std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp
new file mode 100644
index 000000000..3bcb89e46
--- /dev/null
+++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+const std::vector<int> getCodePointVector(const char *str) {
+ std::vector<int> codePoints;
+ while (*str) {
+ codePoints.push_back(*str);
+ ++str;
+ }
+ return codePoints;
+}
+
+const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) {
+ return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>());
+}
+
+TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) {
+ OffdeviceIntermediateDict dict;
+ EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView()));
+
+ const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd"));
+ EXPECT_TRUE(dict.addWord(wordProperty0));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
+
+ const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh"));
+ EXPECT_TRUE(dict.addWord(wordProperty1));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
+
+ const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab"));
+ EXPECT_TRUE(dict.addWord(wordProperty2));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
+
+ const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg"));
+ EXPECT_TRUE(dict.addWord(wordProperty3));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
+
+ const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef"));
+ EXPECT_TRUE(dict.addWord(wordProperty4));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
+
+ const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef"));
+ EXPECT_TRUE(dict.addWord(wordProperty5));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
+
+ const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd"));
+ EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail.";
+
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
+ EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime