summaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin
diff options
context:
space:
mode:
Diffstat (limited to 'tests/src/com/android/inputmethod/latin')
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java126
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java81
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java105
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java2
4 files changed, 282 insertions, 32 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index e96c934cb..b3aed820a 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -156,7 +156,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, validLongWord, probability);
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
- binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */,
+ binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
+ 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
@@ -174,7 +175,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
- binaryDictionary.addUnigramEntry(word, probability,
+ binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
+ Dictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isPossiblyOffensive */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
@@ -710,6 +712,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final boolean isPossiblyOffensive = random.nextBoolean();
// TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability,
+ null /* shortcutTarget */, Dictionary.NOT_A_PROBABILITY,
false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
@@ -724,7 +727,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive);
assertEquals(false, wordProperty.mHasNgrams);
+ assertEquals(false, wordProperty.mHasShortcuts);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
+ assertTrue(wordProperty.mShortcutTargets.isEmpty());
}
for (int i = 0; i < BIGRAM_COUNT; i++) {
@@ -852,10 +857,125 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(bigramSet.isEmpty());
}
+ public void testAddShortcuts() {
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
+
+ final int unigramProbability = 100;
+ final int shortcutProbability = 10;
+ binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
+ shortcutProbability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */);
+ WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
+ false /* isBeginningOfSentence */);
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
+ assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
+ final int updatedShortcutProbability = 2;
+ binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
+ updatedShortcutProbability, false /* isBeginningOfSentence */,
+ false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */);
+ wordProperty = binaryDictionary.getWordProperty("aaa",
+ false /* isBeginningOfSentence */);
+ assertEquals(1, wordProperty.mShortcutTargets.size());
+ assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
+ assertEquals(updatedShortcutProbability,
+ wordProperty.mShortcutTargets.get(0).getProbability());
+ binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
+ shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isPossiblyOffensive */, 0 /* timestamp */);
+ final HashMap<String, Integer> shortcutTargets = new HashMap<>();
+ shortcutTargets.put("zzz", updatedShortcutProbability);
+ shortcutTargets.put("yyy", shortcutProbability);
+ wordProperty = binaryDictionary.getWordProperty("aaa",
+ false /* isBeginningOfSentence */);
+ assertEquals(2, wordProperty.mShortcutTargets.size());
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
+ assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
+ shortcutTarget.getProbability());
+ shortcutTargets.remove(shortcutTarget.mWord);
+ }
+ shortcutTargets.put("zzz", updatedShortcutProbability);
+ shortcutTargets.put("yyy", shortcutProbability);
+ binaryDictionary.flushWithGC();
+ wordProperty = binaryDictionary.getWordProperty("aaa",
+ false /* isBeginningOfSentence */);
+ assertEquals(2, wordProperty.mShortcutTargets.size());
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
+ assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
+ shortcutTarget.getProbability());
+ shortcutTargets.remove(shortcutTarget.mWord);
+ }
+ }
+
+ public void testAddManyShortcuts() {
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
+ final int UNIGRAM_COUNT = 1000;
+ final int SHORTCUT_COUNT = 10000;
+ final int codePointSetSize = 20;
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ final ArrayList<String> words = new ArrayList<>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
+ final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
+
+ for (int i = 0; i < UNIGRAM_COUNT; i++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ final int unigramProbability = random.nextInt(0xFF);
+ addUnigramWord(binaryDictionary, word, unigramProbability);
+ words.add(word);
+ unigramProbabilities.put(word, unigramProbability);
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ }
+ for (int i = 0; i < SHORTCUT_COUNT; i++) {
+ final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
+ final int shortcutProbability = random.nextInt(0xF);
+ final String word = words.get(random.nextInt(words.size()));
+ final int unigramProbability = unigramProbabilities.get(word);
+ binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
+ shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
+ false /* isPossiblyOffensive */, 0 /* timestamp */);
+ if (shortcutTargets.containsKey(word)) {
+ final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
+ shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
+ } else {
+ final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
+ shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
+ shortcutTargets.put(word, shortcutTargetsOfWord);
+ }
+ if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDictionary.flushWithGC();
+ }
+ }
+
+ for (final String word : words) {
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
+ false /* isBeginningOfSentence */);
+ assertEquals((int)unigramProbabilities.get(word),
+ wordProperty.mProbabilityInfo.mProbability);
+ if (!shortcutTargets.containsKey(word)) {
+ // The word does not have shortcut targets.
+ continue;
+ }
+ assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
+ for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ final String targetCodePonts = shortcutTarget.mWord;
+ assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
+ shortcutTarget.getProbability());
+ }
+ }
+ }
+
public void testPossiblyOffensiveAttributeMaintained() {
final BinaryDictionary binaryDictionary =
getEmptyBinaryDictionary(FormatSpec.VERSION403);
- binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0);
+ binaryDictionary.addUnigramEntry("ddd", 100, null, Dictionary.NOT_A_PROBABILITY,
+ false, true, true, 0);
WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
assertEquals(true, wordProperty.mIsPossiblyOffensive);
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index bd5136583..ce905c499 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -16,7 +16,9 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@@ -90,6 +92,38 @@ public class BinaryDictEncoderUtils {
}
/**
+ * Compute the size of a shortcut in bytes.
+ */
+ private static int getShortcutSize(final WeightedString shortcut,
+ final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+ int size = FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
+ final String word = shortcut.mWord;
+ final int length = word.length();
+ for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
+ final int codePoint = word.codePointAt(i);
+ size += CharEncoding.getCharSize(codePoint, codePointToOneByteCodeMap);
+ }
+ size += FormatSpec.PTNODE_TERMINATOR_SIZE;
+ return size;
+ }
+
+ /**
+ * Compute the size of a shortcut list in bytes.
+ *
+ * This is known in advance and does not change according to position in the file
+ * like address lists do.
+ */
+ static int getShortcutListSize(final ArrayList<WeightedString> shortcutList,
+ final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+ if (null == shortcutList || shortcutList.isEmpty()) return 0;
+ int size = FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
+ for (final WeightedString shortcut : shortcutList) {
+ size += getShortcutSize(shortcut, codePointToOneByteCodeMap);
+ }
+ return size;
+ }
+
+ /**
* Compute the maximum size of a PtNode, assuming 3-byte addresses for everything.
*
* @param ptNode the PtNode to compute the size of.
@@ -103,6 +137,8 @@ public class BinaryDictEncoderUtils {
size += FormatSpec.PTNODE_FREQUENCY_SIZE;
}
size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address
+ // TODO: Use codePointToOneByteCodeMap for shortcuts.
+ size += getShortcutListSize(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */);
if (null != ptNode.mBigrams) {
size += (FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE
+ FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE)
@@ -205,6 +241,27 @@ public class BinaryDictEncoderUtils {
}
}
+ @UsedForTesting
+ static void writeUIntToDictBuffer(final DictBuffer dictBuffer, final int value,
+ final int size) {
+ switch(size) {
+ case 4:
+ dictBuffer.put((byte) ((value >> 24) & 0xFF));
+ /* fall through */
+ case 3:
+ dictBuffer.put((byte) ((value >> 16) & 0xFF));
+ /* fall through */
+ case 2:
+ dictBuffer.put((byte) ((value >> 8) & 0xFF));
+ /* fall through */
+ case 1:
+ dictBuffer.put((byte) (value & 0xFF));
+ break;
+ default:
+ /* nop */
+ }
+ }
+
// End utility methods
// This method is responsible for finding a nice ordering of the nodes that favors run-time
@@ -334,6 +391,9 @@ public class BinaryDictEncoderUtils {
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
nodeSize + size, ptNode.mChildren));
}
+ // TODO: Use codePointToOneByteCodeMap for shortcuts.
+ nodeSize += getShortcutListSize(ptNode.mShortcutTargets,
+ null /* codePointToOneByteCodeMap */);
if (null != ptNode.mBigrams) {
for (WeightedString bigram : ptNode.mBigrams) {
final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
@@ -508,13 +568,14 @@ public class BinaryDictEncoderUtils {
* @param hasMultipleChars whether the PtNode has multiple chars.
* @param isTerminal whether the PtNode is terminal.
* @param childrenAddressSize the size of a children address.
+ * @param hasShortcuts whether the PtNode has shortcuts.
* @param hasBigrams whether the PtNode has bigrams.
* @param isNotAWord whether the PtNode is not a word.
* @param isPossiblyOffensive whether the PtNode is a possibly offensive entry.
* @return the flags
*/
static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal,
- final int childrenAddressSize, final boolean hasBigrams,
+ final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
final boolean isNotAWord, final boolean isPossiblyOffensive) {
byte flags = 0;
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
@@ -535,6 +596,7 @@ public class BinaryDictEncoderUtils {
default:
throw new RuntimeException("Node with a strange address");
}
+ if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS;
if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
if (isPossiblyOffensive) flags |= FormatSpec.FLAG_IS_POSSIBLY_OFFENSIVE;
@@ -544,6 +606,7 @@ public class BinaryDictEncoderUtils {
/* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset) {
return (byte) makePtNodeFlags(node.mChars.length > 1, node.isTerminal(),
getByteSize(childrenOffset),
+ node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(),
node.mBigrams != null && !node.mBigrams.isEmpty(),
node.mIsNotAWord, node.mIsPossiblyOffensive);
}
@@ -558,7 +621,7 @@ public class BinaryDictEncoderUtils {
* @param word the second bigram, for debugging purposes
* @return the flags
*/
- /* package */ static int makeBigramFlags(final boolean more, final int offset,
+ /* package */ static final int makeBigramFlags(final boolean more, final int offset,
final int bigramFrequency, final int unigramFrequency, final String word) {
int bigramFlags = (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0)
+ (offset < 0 ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0);
@@ -627,7 +690,19 @@ public class BinaryDictEncoderUtils {
return discretizedFrequency > 0 ? discretizedFrequency : 0;
}
- /* package */ static int getChildrenPosition(final PtNode ptNode,
+ /**
+ * Makes the flag value for a shortcut.
+ *
+ * @param more whether there are more attributes after this one.
+ * @param frequency the frequency of the attribute, 0..15
+ * @return the flags
+ */
+ static final int makeShortcutFlags(final boolean more, final int frequency) {
+ return (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0)
+ + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY);
+ }
+
+ /* package */ static final int getChildrenPosition(final PtNode ptNode,
final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate
+ getNodeHeaderSize(ptNode, codePointToOneByteCodeMap);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 6ba1ef966..a42f0a93d 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -82,6 +82,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
public static final class PtNode {
private static final int NOT_A_TERMINAL = -1;
final int mChars[];
+ ArrayList<WeightedString> mShortcutTargets;
ArrayList<WeightedString> mBigrams;
// null == mProbabilityInfo indicates this is not a terminal.
ProbabilityInfo mProbabilityInfo;
@@ -99,23 +100,26 @@ public final class FusionDictionary implements Iterable<WordProperty> {
int mCachedAddressBeforeUpdate; // The address of this PtNode (before update)
int mCachedAddressAfterUpdate; // The address of this PtNode (after update)
- public PtNode(final int[] chars, final ArrayList<WeightedString> bigrams,
- final ProbabilityInfo probabilityInfo, final boolean isNotAWord,
- final boolean isPossiblyOffensive) {
+ public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo,
+ final boolean isNotAWord, final boolean isPossiblyOffensive) {
mChars = chars;
mProbabilityInfo = probabilityInfo;
mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
mIsNotAWord = isNotAWord;
mIsPossiblyOffensive = isPossiblyOffensive;
}
- public PtNode(final int[] chars, final ArrayList<WeightedString> bigrams,
- final ProbabilityInfo probabilityInfo, final boolean isNotAWord,
- final boolean isPossiblyOffensive, final PtNodeArray children) {
+ public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo,
+ final boolean isNotAWord, final boolean isPossiblyOffensive,
+ final PtNodeArray children) {
mChars = chars;
mProbabilityInfo = probabilityInfo;
+ mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = children;
mIsNotAWord = isNotAWord;
@@ -149,6 +153,14 @@ public final class FusionDictionary implements Iterable<WordProperty> {
return mIsPossiblyOffensive;
}
+ public ArrayList<WeightedString> getShortcutTargets() {
+ // We don't want write permission to escape outside the package, so we return a copy
+ if (null == mShortcutTargets) return null;
+ final ArrayList<WeightedString> copyOfShortcutTargets =
+ new ArrayList<>(mShortcutTargets);
+ return copyOfShortcutTargets;
+ }
+
public ArrayList<WeightedString> getBigrams() {
// We don't want write permission to escape outside the package, so we return a copy
if (null == mBigrams) return null;
@@ -179,6 +191,24 @@ public final class FusionDictionary implements Iterable<WordProperty> {
}
/**
+ * Gets the shortcut target for the given word. Returns null if the word is not in the
+ * shortcut list.
+ */
+ public WeightedString getShortcut(final String word) {
+ // TODO: Don't do a linear search
+ if (mShortcutTargets != null) {
+ final int size = mShortcutTargets.size();
+ for (int i = 0; i < size; ++i) {
+ WeightedString shortcut = mShortcutTargets.get(i);
+ if (shortcut.mWord.equals(word)) {
+ return shortcut;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
* Gets the bigram for the given word.
* Returns null if the word is not in the bigrams list.
*/
@@ -202,9 +232,27 @@ public final class FusionDictionary implements Iterable<WordProperty> {
* updated if they are higher than the existing ones.
*/
void update(final ProbabilityInfo probabilityInfo,
+ final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams,
final boolean isNotAWord, final boolean isPossiblyOffensive) {
mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo);
+ if (shortcutTargets != null) {
+ if (mShortcutTargets == null) {
+ mShortcutTargets = shortcutTargets;
+ } else {
+ final int size = shortcutTargets.size();
+ for (int i = 0; i < size; ++i) {
+ final WeightedString shortcut = shortcutTargets.get(i);
+ final WeightedString existingShortcut = getShortcut(shortcut.mWord);
+ if (existingShortcut == null) {
+ mShortcutTargets.add(shortcut);
+ } else {
+ existingShortcut.mProbabilityInfo = ProbabilityInfo.max(
+ existingShortcut.mProbabilityInfo, shortcut.mProbabilityInfo);
+ }
+ }
+ }
+ }
if (bigrams != null) {
if (mBigrams == null) {
mBigrams = bigrams;
@@ -264,16 +312,19 @@ public final class FusionDictionary implements Iterable<WordProperty> {
* Helper method to add a word as a string.
*
* This method adds a word to the dictionary with the given frequency. Optional
- * lists of bigrams can be passed here. For each word inside,
+ * lists of bigrams and shortcuts can be passed here. For each word inside,
* they will be added to the dictionary as necessary.
- * @param word the word to add.
+ *
+ * @param word the word to add.
* @param probabilityInfo probability information of the word.
+ * @param shortcutTargets a list of shortcut targets for this word, or null.
* @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
* @param isPossiblyOffensive true if this word is possibly offensive
*/
public void add(final String word, final ProbabilityInfo probabilityInfo,
- final boolean isNotAWord, final boolean isPossiblyOffensive) {
- add(getCodePoints(word), probabilityInfo, isNotAWord, isPossiblyOffensive);
+ final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord,
+ final boolean isPossiblyOffensive) {
+ add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord, isPossiblyOffensive);
}
/**
@@ -307,7 +358,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
if (ptNode0 != null) {
final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1);
if (ptNode1 == null) {
- add(getCodePoints(word1), new ProbabilityInfo(0), false /* isNotAWord */,
+ add(getCodePoints(word1), new ProbabilityInfo(0), null, false /* isNotAWord */,
false /* isPossiblyOffensive */);
// The PtNode for the first word may have moved by the above insertion,
// if word1 and word2 share a common stem that happens not to have been
@@ -325,12 +376,15 @@ public final class FusionDictionary implements Iterable<WordProperty> {
*
* The shortcuts, if any, have to be in the dictionary already. If they aren't,
* an exception is thrown.
- * @param word the word, as an int array.
+ *
+ * @param word the word, as an int array.
* @param probabilityInfo the probability information of the word.
- * @param isNotAWord true if this is not a word for spellchecking purposes (shortcut only or so)
+ * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
+ * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
* @param isPossiblyOffensive true if this word is possibly offensive
*/
private void add(final int[] word, final ProbabilityInfo probabilityInfo,
+ final ArrayList<WeightedString> shortcutTargets,
final boolean isNotAWord, final boolean isPossiblyOffensive) {
assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY);
if (word.length >= DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
@@ -360,7 +414,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
// No node at this point to accept the word. Create one.
final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]);
final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length),
- null /* bigrams */, probabilityInfo, isNotAWord,
+ shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord,
isPossiblyOffensive);
currentNodeArray.mData.add(insertionIndex, newPtNode);
if (DBG) checkStack(currentNodeArray);
@@ -371,14 +425,14 @@ public final class FusionDictionary implements Iterable<WordProperty> {
// The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old PtNode was not a terminal,
// make it one by filling in its frequency and other attributes
- currentPtNode.update(probabilityInfo, null, isNotAWord,
+ currentPtNode.update(probabilityInfo, shortcutTargets, null, isNotAWord,
isPossiblyOffensive);
} else {
// The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this.
final PtNode newNode = new PtNode(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
- null /* bigrams */, probabilityInfo,
+ shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isPossiblyOffensive);
currentPtNode.mChildren = new PtNodeArray();
currentPtNode.mChildren.mData.add(newNode);
@@ -387,7 +441,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the
// new shortcuts to the existing shortcut list if it already exists.
- currentPtNode.update(probabilityInfo, null,
+ currentPtNode.update(probabilityInfo, shortcutTargets, null,
currentPtNode.mIsNotAWord && isNotAWord,
currentPtNode.mIsPossiblyOffensive || isPossiblyOffensive);
} else {
@@ -396,7 +450,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
PtNodeArray newChildren = new PtNodeArray();
final PtNode newOldWord = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex,
- currentPtNode.mChars.length),
+ currentPtNode.mChars.length), currentPtNode.mShortcutTargets,
currentPtNode.mBigrams, currentPtNode.mProbabilityInfo,
currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive,
currentPtNode.mChildren);
@@ -406,17 +460,17 @@ public final class FusionDictionary implements Iterable<WordProperty> {
if (charIndex + differentCharIndex >= word.length) {
newParent = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
- null /* bigrams */, probabilityInfo,
+ shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isPossiblyOffensive, newChildren);
} else {
newParent = new PtNode(
Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex),
- null /* bigrams */, null /* probabilityInfo */,
- false /* isNotAWord */, false /* isPossiblyOffensive */,
- newChildren);
+ null /* shortcutTargets */, null /* bigrams */,
+ null /* probabilityInfo */, false /* isNotAWord */,
+ false /* isPossiblyOffensive */, newChildren);
final PtNode newWord = new PtNode(Arrays.copyOfRange(word,
charIndex + differentCharIndex, word.length),
- null /* bigrams */, probabilityInfo,
+ shortcutTargets, null /* bigrams */, probabilityInfo,
isNotAWord, isPossiblyOffensive);
final int addIndex = word[charIndex + differentCharIndex]
> currentPtNode.mChars[differentCharIndex] ? 1 : 0;
@@ -478,7 +532,7 @@ public final class FusionDictionary implements Iterable<WordProperty> {
private static int findInsertionIndex(final PtNodeArray nodeArray, int character) {
final ArrayList<PtNode> data = nodeArray.mData;
final PtNode reference = new PtNode(new int[] { character },
- null /* bigrams */, null /* probabilityInfo */,
+ null /* shortcutTargets */, null /* bigrams */, null /* probabilityInfo */,
false /* isNotAWord */, false /* isPossiblyOffensive */);
int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR);
return result >= 0 ? result : -result - 1;
@@ -615,7 +669,8 @@ public final class FusionDictionary implements Iterable<WordProperty> {
}
if (currentPtNode.isTerminal()) {
return new WordProperty(mCurrentString.toString(),
- currentPtNode.mProbabilityInfo, currentPtNode.mBigrams,
+ currentPtNode.mProbabilityInfo,
+ currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive);
}
} else {
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 746431dfa..63ea89c1d 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -84,7 +84,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
// Insert unigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
- wordProperty.mIsNotAWord,
+ wordProperty.mShortcutTargets, wordProperty.mIsNotAWord,
wordProperty.mIsPossiblyOffensive);
}
// Insert bigrams into the fusion dictionary.