summaryrefslogtreecommitdiffstats
path: root/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
blob: 94d1ae8bb15d48a0ec1eb617e67377b31c4cff0a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/**
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.android.inputmethod.latin.dicttool;

import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;

public class Diff extends Dicttool.Command {
    public static final String COMMAND = "diff";

    public Diff() {
    }

    @Override
    public String getHelp() {
        return COMMAND + " [-p] <dict> <dict> : shows differences between two dictionaries.\n"
                + "  If -p (plumbing) option is given, produce output suitable for a script";
    }

    @Override
    public void run() {
        if (mArgs.length < 2) {
            throw new RuntimeException("Not enough arguments for command " + COMMAND);
        }
        final boolean plumbing;
        if ("-p".equals(mArgs[0])) {
            plumbing = true;
            mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
            if (mArgs.length != 2) { // There should be only 2 arguments left
                throw new RuntimeException("Wrong number of arguments for command " + COMMAND);
            }
        } else {
            plumbing = false;
        }
        final FusionDictionary dict0 =
                BinaryDictOffdeviceUtils.getDictionary(mArgs[0], false /* report */);
        if (null == dict0) throw new RuntimeException("Can't read dictionary " + mArgs[0]);
        final FusionDictionary dict1 =
                BinaryDictOffdeviceUtils.getDictionary(mArgs[1], false /* report */);
        if (null == dict1) throw new RuntimeException("Can't read dictionary " + mArgs[1]);
        if (!plumbing) {
            System.out.println("Header :");
            diffHeaders(dict0, dict1);
            if (languageDiffers(dict0, dict1)) {
                // We only check for the language here. The rationale is that one may meaningfully
                // diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a
                // pt_BR dict is almost certainly only interested in header-level diff, and the word
                // diff would be very large, meaningless, and annoying.
                return;
            }
            System.out.println("Body :");
        }
        diffWords(dict0, dict1);
    }

    private static boolean languageDiffers(final FusionDictionary dict0,
            final FusionDictionary dict1) {
        // If either of the dictionaries have no locale, assume it's okay
        if (null == dict0.mOptions.mAttributes.get("locale")) return false;
        if (null == dict1.mOptions.mAttributes.get("locale")) return false;
        final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0];
        final String dict1Lang = dict1.mOptions.mAttributes.get("locale").split("_", 3)[0];
        return !dict0Lang.equals(dict1Lang);
    }

    private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
        boolean hasDifferences = false;
        final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes);
        for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
            if (!dict0.mOptions.mAttributes.get(optionKey).equals(
                    dict1.mOptions.mAttributes.get(optionKey))) {
                System.out.println("  " + optionKey + " : "
                        + dict0.mOptions.mAttributes.get(optionKey) + " <=> "
                        + dict1.mOptions.mAttributes.get(optionKey));
                hasDifferences = true;
            }
            options1.remove(optionKey);
        }
        for (final String optionKey : options1.keySet()) {
            System.out.println("  " + optionKey + " : null <=> " + options1.get(optionKey));
            hasDifferences = true;
        }
        if (!hasDifferences) {
            System.out.println("  No differences");
        }
    }

    private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
        boolean hasDifferences = false;
        for (final WordProperty word0Property : dict0) {
            final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
                    word0Property.mWord);
            if (null == word1PtNode) {
                // This word is not in dict1
                System.out.println("Deleted: " + word0Property.mWord + " "
                        + word0Property.getProbability());
                hasDifferences = true;
            } else {
                // We found the word. Compare frequencies, shortcuts, bigrams
                if (word0Property.getProbability() != word1PtNode.getProbability()) {
                    System.out.println("Probability changed: " + word0Property.mWord + " "
                            + word0Property.getProbability() + " -> "
                            + word1PtNode.getProbability());
                    hasDifferences = true;
                }
                if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
                    System.out.println("Not a word: " + word0Property.mWord + " "
                            + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
                    hasDifferences = true;
                }
                if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
                    System.out.println("Blacklist: " + word0Property.mWord + " "
                            + word0Property.mIsBlacklistEntry + " -> "
                            + word1PtNode.getIsBlacklistEntry());
                    hasDifferences = true;
                }
                hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
                        "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
                hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
                        "Shortcut", word0Property.mShortcutTargets,
                        word1PtNode.getShortcutTargets());
            }
        }
        for (final WordProperty word1Property : dict1) {
            final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
                    word1Property.mWord);
            if (null == word0PtNode) {
                // This word is not in dict0
                System.out.println("Added: " + word1Property.mWord + " "
                        + word1Property.getProbability());
                hasDifferences = true;
            }
        }
        if (!hasDifferences) {
            System.out.println("  No differences");
        }
    }

    private static boolean hasAttributesDifferencesAndPrintThemIfAny(final String word,
            final String type, final ArrayList<WeightedString> list0,
            final ArrayList<WeightedString> list1) {
        if (null == list1) {
            if (null == list0) return false;
            for (final WeightedString attribute0 : list0) {
                System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
                        + attribute0.getProbability());
            }
            return true;
        }
        boolean hasDifferences = false;
        if (null != list0) {
            for (final WeightedString attribute0 : list0) {
                // The following tests with #equals(). The WeightedString#equals() method returns
                // true if both the string and the frequency are the same.
                if (!list1.contains(attribute0)) {
                    hasDifferences = true;
                    // Search for a word with the same string but a different frequency
                    boolean foundString = false;
                    for (final WeightedString attribute1 : list1) {
                        if (attribute0.mWord.equals(attribute1.mWord)) {
                            System.out.println(type + " freq changed: " + word + " "
                                    + attribute0.mWord + " " + attribute0.getProbability() + " -> "
                                    + attribute1.getProbability());
                            list1.remove(attribute1);
                            foundString = true;
                            break;
                        }
                    }
                    if (!foundString) {
                        // We come here if we haven't found any matching string.
                        System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
                                + attribute0.getProbability());
                    }
                } else {
                    list1.remove(attribute0);
                }
            }
        }
        // We removed any matching word that we found, so now list1 only contains words that
        // are not included in list0.
        for (final WeightedString attribute1 : list1) {
            hasDifferences = true;
            System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
                    + attribute1.getProbability());
        }
        return hasDifferences;
    }
}