aboutsummaryrefslogtreecommitdiffstats
path: root/tools/java/org/unicode/cldr/test/CheckWidths.java
blob: fd3687cae967547efe5541295d9634e8d0a47b5a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
package org.unicode.cldr.test;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
import org.unicode.cldr.util.ApproximateWidth;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.Level;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.RegexLookup;
import org.unicode.cldr.util.SupplementalDataInfo;

public class CheckWidths extends CheckCLDR {
    // remember to add this class to the list in CheckCLDR.getCheckAll
    // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* -t.*CheckWidths.*
    private static CoverageLevel2 coverageLevel;
    private Level requiredLevel;

    SupplementalDataInfo supplementalData;

    private static final double EM = ApproximateWidth.getWidth("月");

    private static final boolean DEBUG = true;

    private enum Measure {
        CODE_POINTS, DISPLAY_WIDTH
    }

    private enum LimitType {
        MINIMUM, MAXIMUM
    }

    private enum Special {
        NONE, QUOTES, PLACEHOLDERS, NUMBERSYMBOLS, NUMBERFORMAT
    }

    private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}");

    private static class Limit {
        final double warningReference;
        final double errorReference;
        final LimitType limit;
        final Measure measure;
        final Special special;
        final String message;
        final Subtype subtype;
        final boolean debug;

        public Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug) {
            this.debug = debug;
            this.warningReference = warningReference;
            this.errorReference = errorReference;
            this.limit = limit;
            this.measure = measure;
            this.special = special;
            switch (limit) {
            case MINIMUM:
                this.message = measure == Measure.CODE_POINTS
                    ? "Expected no fewer than {0} character(s), but was {1}."
                    : "Too narrow by about {2}% (with common fonts).";
                this.subtype = Subtype.valueTooNarrow;
                break;
            case MAXIMUM:
                this.message = measure == Measure.CODE_POINTS
                    ? "Expected no more than {0} character(s), but was {1}."
                    : "Too wide by about {2}% (with common fonts).";
                this.subtype = Subtype.valueTooWide;
                break;
            default:
                throw new IllegalArgumentException();
            }
        }

        public Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) {
            this(d, e, displayWidth, maximum, placeholders, false);
        }

        boolean hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive) {
            switch (special) {
            case NUMBERFORMAT:
                String[] values = value.split(";", 2);
                // If it's a number format with positive and negative subpatterns, just check the longer one.
                value = (values.length == 2 && values[1].length() > values[0].length()) ? values[1] : values[0];
                value = value.replace("'", "");
                break;
            case QUOTES:
                value = value.replace("'", "");
                break;
            case PLACEHOLDERS:
                value = PLACEHOLDER_PATTERN.matcher(value).replaceAll("");
                break;
            case NUMBERSYMBOLS:
                value = value.replaceAll("[\u200E\u200F\u061C]", ""); // don't include LRM/RLM/ALM when checking length of number symbols
                break;
            default:
            }
            double valueMeasure = measure == Measure.CODE_POINTS ? value.codePointCount(0, value.length()) : ApproximateWidth.getWidth(value);
            CheckStatus.Type errorType = CheckStatus.warningType;
            switch (limit) {
            case MINIMUM:
                if (valueMeasure >= warningReference) {
                    return false;
                }
                if (valueMeasure < errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
                    errorType = CheckStatus.errorType;
                }
                break;
            case MAXIMUM:
                if (valueMeasure <= warningReference) {
                    return false;
                }
                if (valueMeasure > errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
                    errorType = CheckStatus.errorType;
                }
                break;
            }
            // the 115 is so that we don't show small percentages
            // the /10 ...*10 is to round to multiples of 10% percent
            double percent = (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) * 10;
            result.add(new CheckStatus().setCause(cause)
                .setMainType(errorType)
                .setSubtype(subtype)
                .setMessage(message, warningReference, valueMeasure, percent));
            return true;
        }
    }

    // WARNING: errors must occur before warnings!!
    // we allow unusual units and English units to be a little longer
    static final String ALLOW_LONGER = "(area-acre" +
        "|area-square-foot" +
        "|area-square-mile" +
        "|length-foot" +
        "|length-inch" +
        "|length-mile" +
        "|length-light-year" +
        "|length-yard" +
        "|mass-ounce" +
        "|mass-pound" +
        "|power-horsepower" +
        "|pressure-inch-hg" +
        "|pressure-millimeter-of-mercury" +
        "|speed-mile-per-hour" +
        "|temperature-fahrenheit" +
        "|volume-cubic-mile" +
        "|acceleration-g-force" +
        "|speed-kilometer-per-hour" +
        "|speed-meter-per-second" +
        "|pressure-pound-per-square-inch" +
        ")";

    static final String ALLOW_LONGEST = "consumption-liter-per-100kilometers";

    static RegexLookup<Limit[]> lookup = new RegexLookup<Limit[]>()
        .setPatternTransform(RegexLookup.RegexFinderTransformPath)
        .addVariable("%A", "\"[^\"]+\"")
        .addVariable("%P", "\"[ap]m\"")
        .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm
        .add("//ldml/delimiters/(quotation|alternateQuotation)", new Limit[] {
            new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE)
        })

        // Numeric items should be no more than a single character

        .add("//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", new Limit[] {
            new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NUMBERSYMBOLS)
        })

        // Now widths
        // The following are rough measures, just to check strange cases

        .add("//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", new Limit[] {
            new Limit(2 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        .add("//ldml/localeDisplayNames/localeDisplayPattern/", new Limit[] { // {0}: {1}, {0} ({1}), ,
            new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        .add("//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", new Limit[] { // {0} and {1}
            new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        .add("//ldml/dates/timeZoneNames/fallbackFormat", new Limit[] { // {1} ({0})
            new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        .add("//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", new Limit[] { // {0} Time,
            // +HH:mm;-HH:mm
            new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        .add("//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", new Limit[] { // GMT{0}, GMT
            new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        // Era Abbreviations

        // Allow longer for Japanese calendar eras
        .add("//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
            new Limit(12 * EM, 16 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })
        // Allow longer for ROC calendar eras
        .add("//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
            new Limit(4 * EM, 8 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })
        .add("//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", new Limit[] {
            new Limit(3 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })

        // am/pm abbreviated
        .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", new Limit[] {
            new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })
        // other day periods abbreviated
        .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", new Limit[] {
            new Limit(8 * EM, 12 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })
        // am/pm wide
        .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", new Limit[] {
            new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })
        // other day periods wide
        .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", new Limit[] {
            new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })

        // Narrow items

        .add("//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", new Limit[] {
            new Limit(1.5 * EM, 2.25 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        })
        // \"(?!am|pm)[^\"]+\"\\

        // Compact number formats

        .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1",
            new Limit[] {
                new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT)
            })
        // Catch -future/past Narrow units  and allow much wider values
        .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"[^\"]+-(future|past)\"]/unitPattern", new Limit[] {
            new Limit(10 * EM, 15 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })
        // Catch widest units and allow a bit wider
        .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGEST + "\"]/unitPattern", new Limit[] {
            new Limit(5 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })
        // Catch special units and allow a bit wider
        .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGER + "\"]/unitPattern", new Limit[] {
            new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })
        // Narrow units
        .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=%A]/unitPattern", new Limit[] {
            new Limit(3 * EM, 4 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })
        // Short units
        .add("//ldml/units/unitLength[@type=\"short\"]/unit[@type=%A]/unitPattern", new Limit[] {
            new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        // Currency Symbols
        .add("//ldml/numbers/currencies/currency[@type=%A]/symbol", new Limit[] {
            new Limit(3 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
        })

        // "grinning cat face with smiling eyes" should be normal max ~= 160 em
        // emoji names (not keywords)
        .add("//ldml/annotations/annotation[@cp=%A][@type=%A]", new Limit[] {
            new Limit(20 * EM, 100 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
        });

    static {
        System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling eyes"));
    }

    Set<Limit> found = new LinkedHashSet<Limit>();

    @Override
    public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result) {
        if (value == null) {
            return this; // skip
        }
        //        String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]";
        //        if (path.startsWith(testPrefix)) {
        //            int i = 0;
        //        }
        // Limits item0 =
        // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]");
        // item0.check("123456789", result, this);

        Limit[] items = lookup.get(path);
        CLDRFile.Status status = new CLDRFile.Status();
        this.getCldrFileToCheck().getSourceLocaleID(path, status);
        // This was put in specifically to deal with the fact that we added a bunch of new units in CLDR 26
        // and didn't put the narrow forms of them into modern coverage.  If/when the narrow forms of all units
        // are modern coverage, then we can safely remove the aliasedAndComprehensive check.  Right now if an
        // item is aliased and coverage is comprehensive, then it can't generate anything worse than a warning.
        Boolean aliasedAndComprenehsive = (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0)
            && (status.pathWhereFound.compareTo(path) != 0);
        if (items != null) {
            for (Limit item : items) {
                if (item.hasProblem(value, result, this, aliasedAndComprenehsive)) {
                    if (DEBUG && !found.contains(item)) {
                        found.add(item);
                    }
                    break; // only one error per item
                }
            }
        }
        return this;
    }

    public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
        List<CheckStatus> possibleErrors) {
        final String localeID = cldrFileToCheck.getLocaleID();
        supplementalData = SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory());
        coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID);

        super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
        return this;
    }
}