diff options
Diffstat (limited to 'tests/src/com/android/inputmethod/latin')
4 files changed, 282 insertions, 32 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index e96c934cb..b3aed820a 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -156,7 +156,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { addUnigramWord(binaryDictionary, validLongWord, probability); addUnigramWord(binaryDictionary, invalidLongWord, probability); // Too long short cut. - binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */, + binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, + 10 /* shortcutProbability */, false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP); addUnigramWord(binaryDictionary, "abc", probability); @@ -174,7 +175,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability) { - binaryDictionary.addUnigramEntry(word, probability, + binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, + Dictionary.NOT_A_PROBABILITY /* shortcutProbability */, false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isPossiblyOffensive */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); @@ -710,6 +712,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final boolean isPossiblyOffensive = random.nextBoolean(); // TODO: Add tests for historical info. binaryDictionary.addUnigramEntry(word, unigramProbability, + null /* shortcutTarget */, Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, BinaryDictionary.NOT_A_VALID_TIMESTAMP); if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { @@ -724,7 +727,9 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(isNotAWord, wordProperty.mIsNotAWord); assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive); assertEquals(false, wordProperty.mHasNgrams); + assertEquals(false, wordProperty.mHasShortcuts); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); + assertTrue(wordProperty.mShortcutTargets.isEmpty()); } for (int i = 0; i < BIGRAM_COUNT; i++) { @@ -852,10 +857,125 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertTrue(bigramSet.isEmpty()); } + public void testAddShortcuts() { + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); + + final int unigramProbability = 100; + final int shortcutProbability = 10; + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", + shortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */); + WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); + final int updatedShortcutProbability = 2; + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", + updatedShortcutProbability, false /* isBeginningOfSentence */, + false /* isNotAWord */, false /* isPossiblyOffensive */, 0 /* timestamp */); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(1, wordProperty.mShortcutTargets.size()); + assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); + assertEquals(updatedShortcutProbability, + wordProperty.mShortcutTargets.get(0).getProbability()); + binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isPossiblyOffensive */, 0 /* timestamp */); + final HashMap<String, Integer> shortcutTargets = new HashMap<>(); + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); + shortcutTargets.remove(shortcutTarget.mWord); + } + shortcutTargets.put("zzz", updatedShortcutProbability); + shortcutTargets.put("yyy", shortcutProbability); + binaryDictionary.flushWithGC(); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); + assertEquals(2, wordProperty.mShortcutTargets.size()); + for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); + shortcutTargets.remove(shortcutTarget.mWord); + } + } + + public void testAddManyShortcuts() { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int SHORTCUT_COUNT = 10000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + final ArrayList<String> words = new ArrayList<>(); + final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); + final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); + final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + words.add(word); + unigramProbabilities.put(word, unigramProbability); + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + for (int i = 0; i < SHORTCUT_COUNT; i++) { + final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); + final int shortcutProbability = random.nextInt(0xF); + final String word = words.get(random.nextInt(words.size())); + final int unigramProbability = unigramProbabilities.get(word); + binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, + shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, + false /* isPossiblyOffensive */, 0 /* timestamp */); + if (shortcutTargets.containsKey(word)) { + final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + } else { + final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); + shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); + shortcutTargets.put(word, shortcutTargetsOfWord); + } + if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + } + + for (final String word : words) { + final WordProperty wordProperty = binaryDictionary.getWordProperty(word, + false /* isBeginningOfSentence */); + assertEquals((int)unigramProbabilities.get(word), + wordProperty.mProbabilityInfo.mProbability); + if (!shortcutTargets.containsKey(word)) { + // The word does not have shortcut targets. + continue; + } + assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); + for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { + final String targetCodePonts = shortcutTarget.mWord; + assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), + shortcutTarget.getProbability()); + } + } + } + public void testPossiblyOffensiveAttributeMaintained() { final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0); + binaryDictionary.addUnigramEntry("ddd", 100, null, Dictionary.NOT_A_PROBABILITY, + false, true, true, 0); WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false); assertEquals(true, wordProperty.mIsPossiblyOffensive); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index bd5136583..ce905c499 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -16,7 +16,9 @@ package com.android.inputmethod.latin.makedict; +import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -90,6 +92,38 @@ public class BinaryDictEncoderUtils { } /** + * Compute the size of a shortcut in bytes. + */ + private static int getShortcutSize(final WeightedString shortcut, + final HashMap<Integer, Integer> codePointToOneByteCodeMap) { + int size = FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; + final String word = shortcut.mWord; + final int length = word.length(); + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + size += CharEncoding.getCharSize(codePoint, codePointToOneByteCodeMap); + } + size += FormatSpec.PTNODE_TERMINATOR_SIZE; + return size; + } + + /** + * Compute the size of a shortcut list in bytes. + * + * This is known in advance and does not change according to position in the file + * like address lists do. + */ + static int getShortcutListSize(final ArrayList<WeightedString> shortcutList, + final HashMap<Integer, Integer> codePointToOneByteCodeMap) { + if (null == shortcutList || shortcutList.isEmpty()) return 0; + int size = FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; + for (final WeightedString shortcut : shortcutList) { + size += getShortcutSize(shortcut, codePointToOneByteCodeMap); + } + return size; + } + + /** * Compute the maximum size of a PtNode, assuming 3-byte addresses for everything. * * @param ptNode the PtNode to compute the size of. @@ -103,6 +137,8 @@ public class BinaryDictEncoderUtils { size += FormatSpec.PTNODE_FREQUENCY_SIZE; } size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address + // TODO: Use codePointToOneByteCodeMap for shortcuts. + size += getShortcutListSize(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */); if (null != ptNode.mBigrams) { size += (FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE + FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE) @@ -205,6 +241,27 @@ public class BinaryDictEncoderUtils { } } + @UsedForTesting + static void writeUIntToDictBuffer(final DictBuffer dictBuffer, final int value, + final int size) { + switch(size) { + case 4: + dictBuffer.put((byte) ((value >> 24) & 0xFF)); + /* fall through */ + case 3: + dictBuffer.put((byte) ((value >> 16) & 0xFF)); + /* fall through */ + case 2: + dictBuffer.put((byte) ((value >> 8) & 0xFF)); + /* fall through */ + case 1: + dictBuffer.put((byte) (value & 0xFF)); + break; + default: + /* nop */ + } + } + // End utility methods // This method is responsible for finding a nice ordering of the nodes that favors run-time @@ -334,6 +391,9 @@ public class BinaryDictEncoderUtils { nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray, nodeSize + size, ptNode.mChildren)); } + // TODO: Use codePointToOneByteCodeMap for shortcuts. + nodeSize += getShortcutListSize(ptNode.mShortcutTargets, + null /* codePointToOneByteCodeMap */); if (null != ptNode.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) { final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, @@ -508,13 +568,14 @@ public class BinaryDictEncoderUtils { * @param hasMultipleChars whether the PtNode has multiple chars. * @param isTerminal whether the PtNode is terminal. * @param childrenAddressSize the size of a children address. + * @param hasShortcuts whether the PtNode has shortcuts. * @param hasBigrams whether the PtNode has bigrams. * @param isNotAWord whether the PtNode is not a word. * @param isPossiblyOffensive whether the PtNode is a possibly offensive entry. * @return the flags */ static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal, - final int childrenAddressSize, final boolean hasBigrams, + final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams, final boolean isNotAWord, final boolean isPossiblyOffensive) { byte flags = 0; if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS; @@ -535,6 +596,7 @@ public class BinaryDictEncoderUtils { default: throw new RuntimeException("Node with a strange address"); } + if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS; if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS; if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD; if (isPossiblyOffensive) flags |= FormatSpec.FLAG_IS_POSSIBLY_OFFENSIVE; @@ -544,6 +606,7 @@ public class BinaryDictEncoderUtils { /* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset) { return (byte) makePtNodeFlags(node.mChars.length > 1, node.isTerminal(), getByteSize(childrenOffset), + node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(), node.mBigrams != null && !node.mBigrams.isEmpty(), node.mIsNotAWord, node.mIsPossiblyOffensive); } @@ -558,7 +621,7 @@ public class BinaryDictEncoderUtils { * @param word the second bigram, for debugging purposes * @return the flags */ - /* package */ static int makeBigramFlags(final boolean more, final int offset, + /* package */ static final int makeBigramFlags(final boolean more, final int offset, final int bigramFrequency, final int unigramFrequency, final String word) { int bigramFlags = (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0) + (offset < 0 ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0); @@ -627,7 +690,19 @@ public class BinaryDictEncoderUtils { return discretizedFrequency > 0 ? discretizedFrequency : 0; } - /* package */ static int getChildrenPosition(final PtNode ptNode, + /** + * Makes the flag value for a shortcut. + * + * @param more whether there are more attributes after this one. + * @param frequency the frequency of the attribute, 0..15 + * @return the flags + */ + static final int makeShortcutFlags(final boolean more, final int frequency) { + return (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0) + + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY); + } + + /* package */ static final int getChildrenPosition(final PtNode ptNode, final HashMap<Integer, Integer> codePointToOneByteCodeMap) { int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate + getNodeHeaderSize(ptNode, codePointToOneByteCodeMap); diff --git a/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 6ba1ef966..a42f0a93d 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/tests/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -82,6 +82,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { public static final class PtNode { private static final int NOT_A_TERMINAL = -1; final int mChars[]; + ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mBigrams; // null == mProbabilityInfo indicates this is not a terminal. ProbabilityInfo mProbabilityInfo; @@ -99,23 +100,26 @@ public final class FusionDictionary implements Iterable<WordProperty> { int mCachedAddressBeforeUpdate; // The address of this PtNode (before update) int mCachedAddressAfterUpdate; // The address of this PtNode (after update) - public PtNode(final int[] chars, final ArrayList<WeightedString> bigrams, - final ProbabilityInfo probabilityInfo, final boolean isNotAWord, - final boolean isPossiblyOffensive) { + public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets, + final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo, + final boolean isNotAWord, final boolean isPossiblyOffensive) { mChars = chars; mProbabilityInfo = probabilityInfo; mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability; + mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; mIsNotAWord = isNotAWord; mIsPossiblyOffensive = isPossiblyOffensive; } - public PtNode(final int[] chars, final ArrayList<WeightedString> bigrams, - final ProbabilityInfo probabilityInfo, final boolean isNotAWord, - final boolean isPossiblyOffensive, final PtNodeArray children) { + public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets, + final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo, + final boolean isNotAWord, final boolean isPossiblyOffensive, + final PtNodeArray children) { mChars = chars; mProbabilityInfo = probabilityInfo; + mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = children; mIsNotAWord = isNotAWord; @@ -149,6 +153,14 @@ public final class FusionDictionary implements Iterable<WordProperty> { return mIsPossiblyOffensive; } + public ArrayList<WeightedString> getShortcutTargets() { + // We don't want write permission to escape outside the package, so we return a copy + if (null == mShortcutTargets) return null; + final ArrayList<WeightedString> copyOfShortcutTargets = + new ArrayList<>(mShortcutTargets); + return copyOfShortcutTargets; + } + public ArrayList<WeightedString> getBigrams() { // We don't want write permission to escape outside the package, so we return a copy if (null == mBigrams) return null; @@ -179,6 +191,24 @@ public final class FusionDictionary implements Iterable<WordProperty> { } /** + * Gets the shortcut target for the given word. Returns null if the word is not in the + * shortcut list. + */ + public WeightedString getShortcut(final String word) { + // TODO: Don't do a linear search + if (mShortcutTargets != null) { + final int size = mShortcutTargets.size(); + for (int i = 0; i < size; ++i) { + WeightedString shortcut = mShortcutTargets.get(i); + if (shortcut.mWord.equals(word)) { + return shortcut; + } + } + } + return null; + } + + /** * Gets the bigram for the given word. * Returns null if the word is not in the bigrams list. */ @@ -202,9 +232,27 @@ public final class FusionDictionary implements Iterable<WordProperty> { * updated if they are higher than the existing ones. */ void update(final ProbabilityInfo probabilityInfo, + final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams, final boolean isNotAWord, final boolean isPossiblyOffensive) { mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo); + if (shortcutTargets != null) { + if (mShortcutTargets == null) { + mShortcutTargets = shortcutTargets; + } else { + final int size = shortcutTargets.size(); + for (int i = 0; i < size; ++i) { + final WeightedString shortcut = shortcutTargets.get(i); + final WeightedString existingShortcut = getShortcut(shortcut.mWord); + if (existingShortcut == null) { + mShortcutTargets.add(shortcut); + } else { + existingShortcut.mProbabilityInfo = ProbabilityInfo.max( + existingShortcut.mProbabilityInfo, shortcut.mProbabilityInfo); + } + } + } + } if (bigrams != null) { if (mBigrams == null) { mBigrams = bigrams; @@ -264,16 +312,19 @@ public final class FusionDictionary implements Iterable<WordProperty> { * Helper method to add a word as a string. * * This method adds a word to the dictionary with the given frequency. Optional - * lists of bigrams can be passed here. For each word inside, + * lists of bigrams and shortcuts can be passed here. For each word inside, * they will be added to the dictionary as necessary. - * @param word the word to add. + * + * @param word the word to add. * @param probabilityInfo probability information of the word. + * @param shortcutTargets a list of shortcut targets for this word, or null. * @param isNotAWord true if this should not be considered a word (e.g. shortcut only) * @param isPossiblyOffensive true if this word is possibly offensive */ public void add(final String word, final ProbabilityInfo probabilityInfo, - final boolean isNotAWord, final boolean isPossiblyOffensive) { - add(getCodePoints(word), probabilityInfo, isNotAWord, isPossiblyOffensive); + final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord, + final boolean isPossiblyOffensive) { + add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord, isPossiblyOffensive); } /** @@ -307,7 +358,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { if (ptNode0 != null) { final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1); if (ptNode1 == null) { - add(getCodePoints(word1), new ProbabilityInfo(0), false /* isNotAWord */, + add(getCodePoints(word1), new ProbabilityInfo(0), null, false /* isNotAWord */, false /* isPossiblyOffensive */); // The PtNode for the first word may have moved by the above insertion, // if word1 and word2 share a common stem that happens not to have been @@ -325,12 +376,15 @@ public final class FusionDictionary implements Iterable<WordProperty> { * * The shortcuts, if any, have to be in the dictionary already. If they aren't, * an exception is thrown. - * @param word the word, as an int array. + * + * @param word the word, as an int array. * @param probabilityInfo the probability information of the word. - * @param isNotAWord true if this is not a word for spellchecking purposes (shortcut only or so) + * @param shortcutTargets an optional list of shortcut targets for this word (null if none). + * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) * @param isPossiblyOffensive true if this word is possibly offensive */ private void add(final int[] word, final ProbabilityInfo probabilityInfo, + final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord, final boolean isPossiblyOffensive) { assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY); if (word.length >= DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) { @@ -360,7 +414,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { // No node at this point to accept the word. Create one. final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]); final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length), - null /* bigrams */, probabilityInfo, isNotAWord, + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, isPossiblyOffensive); currentNodeArray.mData.add(insertionIndex, newPtNode); if (DBG) checkStack(currentNodeArray); @@ -371,14 +425,14 @@ public final class FusionDictionary implements Iterable<WordProperty> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old PtNode was not a terminal, // make it one by filling in its frequency and other attributes - currentPtNode.update(probabilityInfo, null, isNotAWord, + currentPtNode.update(probabilityInfo, shortcutTargets, null, isNotAWord, isPossiblyOffensive); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final PtNode newNode = new PtNode( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - null /* bigrams */, probabilityInfo, + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, isPossiblyOffensive); currentPtNode.mChildren = new PtNodeArray(); currentPtNode.mChildren.mData.add(newNode); @@ -387,7 +441,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the // new shortcuts to the existing shortcut list if it already exists. - currentPtNode.update(probabilityInfo, null, + currentPtNode.update(probabilityInfo, shortcutTargets, null, currentPtNode.mIsNotAWord && isNotAWord, currentPtNode.mIsPossiblyOffensive || isPossiblyOffensive); } else { @@ -396,7 +450,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { PtNodeArray newChildren = new PtNodeArray(); final PtNode newOldWord = new PtNode( Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex, - currentPtNode.mChars.length), + currentPtNode.mChars.length), currentPtNode.mShortcutTargets, currentPtNode.mBigrams, currentPtNode.mProbabilityInfo, currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive, currentPtNode.mChildren); @@ -406,17 +460,17 @@ public final class FusionDictionary implements Iterable<WordProperty> { if (charIndex + differentCharIndex >= word.length) { newParent = new PtNode( Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), - null /* bigrams */, probabilityInfo, + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, isPossiblyOffensive, newChildren); } else { newParent = new PtNode( Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), - null /* bigrams */, null /* probabilityInfo */, - false /* isNotAWord */, false /* isPossiblyOffensive */, - newChildren); + null /* shortcutTargets */, null /* bigrams */, + null /* probabilityInfo */, false /* isNotAWord */, + false /* isPossiblyOffensive */, newChildren); final PtNode newWord = new PtNode(Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - null /* bigrams */, probabilityInfo, + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, isPossiblyOffensive); final int addIndex = word[charIndex + differentCharIndex] > currentPtNode.mChars[differentCharIndex] ? 1 : 0; @@ -478,7 +532,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { private static int findInsertionIndex(final PtNodeArray nodeArray, int character) { final ArrayList<PtNode> data = nodeArray.mData; final PtNode reference = new PtNode(new int[] { character }, - null /* bigrams */, null /* probabilityInfo */, + null /* shortcutTargets */, null /* bigrams */, null /* probabilityInfo */, false /* isNotAWord */, false /* isPossiblyOffensive */); int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR); return result >= 0 ? result : -result - 1; @@ -615,7 +669,8 @@ public final class FusionDictionary implements Iterable<WordProperty> { } if (currentPtNode.isTerminal()) { return new WordProperty(mCurrentString.toString(), - currentPtNode.mProbabilityInfo, currentPtNode.mBigrams, + currentPtNode.mProbabilityInfo, + currentPtNode.mShortcutTargets, currentPtNode.mBigrams, currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive); } } else { diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 746431dfa..63ea89c1d 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -84,7 +84,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { // Insert unigrams into the fusion dictionary. for (final WordProperty wordProperty : wordProperties) { fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo, - wordProperty.mIsNotAWord, + wordProperty.mShortcutTargets, wordProperty.mIsNotAWord, wordProperty.mIsPossiblyOffensive); } // Insert bigrams into the fusion dictionary. |