summaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
diff options
context:
space:
mode:
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java')
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java81
1 files changed, 78 insertions, 3 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index bd5136583..ce905c499 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -16,7 +16,9 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@@ -90,6 +92,38 @@ public class BinaryDictEncoderUtils {
}
/**
+ * Compute the size of a shortcut in bytes.
+ */
+ private static int getShortcutSize(final WeightedString shortcut,
+ final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+ int size = FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
+ final String word = shortcut.mWord;
+ final int length = word.length();
+ for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
+ final int codePoint = word.codePointAt(i);
+ size += CharEncoding.getCharSize(codePoint, codePointToOneByteCodeMap);
+ }
+ size += FormatSpec.PTNODE_TERMINATOR_SIZE;
+ return size;
+ }
+
+ /**
+ * Compute the size of a shortcut list in bytes.
+ *
+ * This is known in advance and does not change according to position in the file
+ * like address lists do.
+ */
+ static int getShortcutListSize(final ArrayList<WeightedString> shortcutList,
+ final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+ if (null == shortcutList || shortcutList.isEmpty()) return 0;
+ int size = FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
+ for (final WeightedString shortcut : shortcutList) {
+ size += getShortcutSize(shortcut, codePointToOneByteCodeMap);
+ }
+ return size;
+ }
+
+ /**
* Compute the maximum size of a PtNode, assuming 3-byte addresses for everything.
*
* @param ptNode the PtNode to compute the size of.
@@ -103,6 +137,8 @@ public class BinaryDictEncoderUtils {
size += FormatSpec.PTNODE_FREQUENCY_SIZE;
}
size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address
+ // TODO: Use codePointToOneByteCodeMap for shortcuts.
+ size += getShortcutListSize(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */);
if (null != ptNode.mBigrams) {
size += (FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE
+ FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE)
@@ -205,6 +241,27 @@ public class BinaryDictEncoderUtils {
}
}
+ @UsedForTesting
+ static void writeUIntToDictBuffer(final DictBuffer dictBuffer, final int value,
+ final int size) {
+ switch(size) {
+ case 4:
+ dictBuffer.put((byte) ((value >> 24) & 0xFF));
+ /* fall through */
+ case 3:
+ dictBuffer.put((byte) ((value >> 16) & 0xFF));
+ /* fall through */
+ case 2:
+ dictBuffer.put((byte) ((value >> 8) & 0xFF));
+ /* fall through */
+ case 1:
+ dictBuffer.put((byte) (value & 0xFF));
+ break;
+ default:
+ /* nop */
+ }
+ }
+
// End utility methods
// This method is responsible for finding a nice ordering of the nodes that favors run-time
@@ -334,6 +391,9 @@ public class BinaryDictEncoderUtils {
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
nodeSize + size, ptNode.mChildren));
}
+ // TODO: Use codePointToOneByteCodeMap for shortcuts.
+ nodeSize += getShortcutListSize(ptNode.mShortcutTargets,
+ null /* codePointToOneByteCodeMap */);
if (null != ptNode.mBigrams) {
for (WeightedString bigram : ptNode.mBigrams) {
final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
@@ -508,13 +568,14 @@ public class BinaryDictEncoderUtils {
* @param hasMultipleChars whether the PtNode has multiple chars.
* @param isTerminal whether the PtNode is terminal.
* @param childrenAddressSize the size of a children address.
+ * @param hasShortcuts whether the PtNode has shortcuts.
* @param hasBigrams whether the PtNode has bigrams.
* @param isNotAWord whether the PtNode is not a word.
* @param isPossiblyOffensive whether the PtNode is a possibly offensive entry.
* @return the flags
*/
static int makePtNodeFlags(final boolean hasMultipleChars, final boolean isTerminal,
- final int childrenAddressSize, final boolean hasBigrams,
+ final int childrenAddressSize, final boolean hasShortcuts, final boolean hasBigrams,
final boolean isNotAWord, final boolean isPossiblyOffensive) {
byte flags = 0;
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
@@ -535,6 +596,7 @@ public class BinaryDictEncoderUtils {
default:
throw new RuntimeException("Node with a strange address");
}
+ if (hasShortcuts) flags |= FormatSpec.FLAG_HAS_SHORTCUT_TARGETS;
if (hasBigrams) flags |= FormatSpec.FLAG_HAS_BIGRAMS;
if (isNotAWord) flags |= FormatSpec.FLAG_IS_NOT_A_WORD;
if (isPossiblyOffensive) flags |= FormatSpec.FLAG_IS_POSSIBLY_OFFENSIVE;
@@ -544,6 +606,7 @@ public class BinaryDictEncoderUtils {
/* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset) {
return (byte) makePtNodeFlags(node.mChars.length > 1, node.isTerminal(),
getByteSize(childrenOffset),
+ node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(),
node.mBigrams != null && !node.mBigrams.isEmpty(),
node.mIsNotAWord, node.mIsPossiblyOffensive);
}
@@ -558,7 +621,7 @@ public class BinaryDictEncoderUtils {
* @param word the second bigram, for debugging purposes
* @return the flags
*/
- /* package */ static int makeBigramFlags(final boolean more, final int offset,
+ /* package */ static final int makeBigramFlags(final boolean more, final int offset,
final int bigramFrequency, final int unigramFrequency, final String word) {
int bigramFlags = (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0)
+ (offset < 0 ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0);
@@ -627,7 +690,19 @@ public class BinaryDictEncoderUtils {
return discretizedFrequency > 0 ? discretizedFrequency : 0;
}
- /* package */ static int getChildrenPosition(final PtNode ptNode,
+ /**
+ * Makes the flag value for a shortcut.
+ *
+ * @param more whether there are more attributes after this one.
+ * @param frequency the frequency of the attribute, 0..15
+ * @return the flags
+ */
+ static final int makeShortcutFlags(final boolean more, final int frequency) {
+ return (more ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0)
+ + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY);
+ }
+
+ /* package */ static final int getChildrenPosition(final PtNode ptNode,
final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate
+ getNodeHeaderSize(ptNode, codePointToOneByteCodeMap);