summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2014-10-21 17:31:00 +0900
committerJean Chalard <jchalard@google.com>2014-11-06 13:17:08 +0900
commitf6b0e32df38da4e2130bdbfc8875ea2d19054caf (patch)
treed1de790f9a9aa95be6898a236e8357d990719276 /tools
parent5564317f837164c67749423fa78f917ed2ae9e14 (diff)
downloadandroid_packages_inputmethods_LatinIME-f6b0e32df38da4e2130bdbfc8875ea2d19054caf.tar.gz
android_packages_inputmethods_LatinIME-f6b0e32df38da4e2130bdbfc8875ea2d19054caf.tar.bz2
android_packages_inputmethods_LatinIME-f6b0e32df38da4e2130bdbfc8875ea2d19054caf.zip
Add a *FAST* dictionary header reader.
It's still unused as of this change but the next change will use it As a reference point, generating the metadata for Bayo takes 3'02" on my machine with the info command; it's down to 16" if made to use this instead. The gains increases with the number of dictionaries obviously. Change-Id: I0eeea2d8f81bb74b0d1570af658e91b56f7c2b79
Diffstat (limited to 'tools')
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java53
1 files changed, 53 insertions, 0 deletions
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 49a6e8e14..7894e17c4 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -19,6 +19,10 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@@ -34,6 +38,8 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -142,6 +148,53 @@ public final class BinaryDictOffdeviceUtils {
}
}
+ public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
+ // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
+ // than this. Revisit this if needed later.
+ private final int MAX_HEADER_LENGTH = 32 * 1024;
+ @Override @Nonnull
+ public DictionaryHeader process(final InputStream input) throws IOException,
+ UnsupportedFormatException {
+ // Do everything as curtly and ad-hoc as possible for performance.
+ final byte[] tmpBuffer = new byte[12];
+ if (tmpBuffer.length != input.read(tmpBuffer)) {
+ throw new UnsupportedFormatException("File too short, not a dictionary");
+ }
+ // Ad-hoc check for the magic number. See FormatSpec.java as well as
+ // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
+ final int MAGIC_NUMBER_START_OFFSET = 0;
+ final int VERSION_START_OFFSET = 4;
+ final int HEADER_SIZE_OFFSET = 8;
+ final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
+ + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
+ + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
+ if (magicNumber != FormatSpec.MAGIC_NUMBER) {
+ throw new UnsupportedFormatException("Wrong magic number");
+ }
+ final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
+ + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
+ if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201) {
+ throw new UnsupportedFormatException("Only versions 2 and 201 are supported");
+ }
+ final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) >> 24)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) >> 16)
+ + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) >> 8)
+ + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
+ if (totalHeaderSize > MAX_HEADER_LENGTH) {
+ throw new UnsupportedFormatException("Header too large");
+ }
+ final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
+ if (headerBuffer.length != input.read(headerBuffer)) {
+ throw new UnsupportedFormatException("File shorter than specified in the header");
+ }
+ final HashMap<String, String> attributes =
+ BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
+ return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
+ new FormatOptions(version, false /* hasTimestamp */));
+ }
+ }
+
public static void copy(final InputStream input, final OutputStream output) throws IOException {
final byte[] buffer = new byte[COPY_BUFFER_SIZE];
for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {