diff options
author | Fredrik Roubert <roubert@google.com> | 2018-05-07 13:50:38 +0200 |
---|---|---|
committer | Fredrik Roubert <roubert@google.com> | 2018-05-09 12:16:14 +0200 |
commit | 333a3d868716b770fa7dbada1f70e6cc2edaaa2c (patch) | |
tree | 38320dc4aada964ecf2fa2d16813b9db8cabab2f /tools/java/org/unicode/cldr/util/BNF.java | |
parent | 6ccd41d8c8152bf1fcb39b3e98ec5fbc096b911c (diff) | |
download | android_external_cldr-333a3d868716b770fa7dbada1f70e6cc2edaaa2c.tar.gz android_external_cldr-333a3d868716b770fa7dbada1f70e6cc2edaaa2c.tar.bz2 android_external_cldr-333a3d868716b770fa7dbada1f70e6cc2edaaa2c.zip |
Copy CLDR 32.0.1 from unicode.org to aosp/cldr-release-32-0-1.
These files were exported from the CLDR Subversion repository by running
the following commands:
svn export --force https://unicode.org/repos/cldr/tags/release-32-0-1 .
git clean -dfX
git add -A .
Bug: 79438058
Change-Id: I0b4a10f3a3f49b52ffe4bdb3c60cb1cc65de8cc1
Diffstat (limited to 'tools/java/org/unicode/cldr/util/BNF.java')
-rw-r--r-- | tools/java/org/unicode/cldr/util/BNF.java | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/tools/java/org/unicode/cldr/util/BNF.java b/tools/java/org/unicode/cldr/util/BNF.java new file mode 100644 index 0000000..bbb2bf0 --- /dev/null +++ b/tools/java/org/unicode/cldr/util/BNF.java @@ -0,0 +1,329 @@ +/* + ******************************************************************************* + * Copyright (C) 2002-2012, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package org.unicode.cldr.util; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import com.ibm.icu.text.UnicodeSet; + +public class BNF { + private Map map = new HashMap(); + private Set variables = new HashSet(); + private Pick pick = null; + private Pick.Target target = null; + private Tokenizer t; + private Quoter quoter; + private Random random; + + public String next() { + return target.next(); + } + + public String getInternal() { + return pick.getInternal(0, new HashSet()); + } + + /* + + "weight = integer '%';" + + "range = '{' integer (',' integer?)? '}' weight*;" + + "quote = '@';" + + "star = '*' weight*;" + + "plus = '+' weight*;" + + "maybe = '?' weight?;" + + "quantifier = range | star | maybe | plus;" + + "core = string | unicodeSet | '(' alternation ')';" + + "sequence = (core quantifier*)+;" + + "alternation = sequence (weight? ('|' sequence weight?)+)?;" + + "rule = string '=' alternation;"; + + + * Match 0 or more times + + Match 1 or more times + ? Match 1 or 0 times + {n} Match exactly n times + {n,} Match at least n times + {n,m} Match at least n but not more than m times + + + + */ + + public BNF(Random random, Quoter quoter) { + this.random = random; + this.quoter = quoter; + t = new Tokenizer(); + } + + public BNF addRules(String rules) { + t.setSource(rules); + while (addRule()) { + } + return this; // for chaining + } + + public BNF complete() { + // check that the rules match the variables, except for $root in rules + Set ruleSet = map.keySet(); + // add also + variables.add("$root"); + variables.addAll(t.getLookedUpItems()); + if (!ruleSet.equals(variables)) { + String msg = showDiff(variables, ruleSet); + if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg; + String temp = showDiff(ruleSet, variables); + if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp; + if (msg.length() == 0) msg = temp; + else if (temp.length() != 0) { + msg = msg + "; " + temp; + } + error(msg); + } + + if (!ruleSet.equals(variables)) { + String msg = showDiff(variables, ruleSet); + if (msg.length() != 0) msg = "Missing definitions for: " + msg; + String temp = showDiff(ruleSet, variables); + if (temp.length() != 0) temp = "Defined but not used: " + temp; + if (msg.length() == 0) msg = temp; + else if (temp.length() != 0) { + msg = msg + "; " + temp; + } + error(msg); + } + + // replace variables by definitions + Iterator it = ruleSet.iterator(); + while (it.hasNext()) { + String key = (String) it.next(); + Pick expression = (Pick) map.get(key); + Iterator it2 = ruleSet.iterator(); + if (false && key.equals("$crlf")) { + System.out.println("debug") ; + } + while (it2.hasNext()) { + Object key2 = it2.next(); + if (key.equals(key2)) continue; + Pick expression2 = (Pick) map.get(key2); + expression2.replace(key, expression); + } + } + pick = (Pick) map.get("$root"); + target = Pick.Target.make(pick, random, quoter); + // TODO remove temp collections + return this; + } + + String showDiff(Set a, Set b) { + Set temp = new HashSet(); + temp.addAll(a); + temp.removeAll(b); + if (temp.size() == 0) return ""; + StringBuffer buffer = new StringBuffer(); + Iterator it = temp.iterator(); + while (it.hasNext()) { + if (buffer.length() != 0) buffer.append(", "); + buffer.append(it.next().toString()); + } + return buffer.toString(); + } + + void error(String msg) { + throw new IllegalArgumentException(msg + + "\r\n" + t.toString()); + } + + private boolean addRule() { + int type = t.next(); + if (type == Tokenizer.DONE) return false; + if (type != Tokenizer.STRING) error("missing weight"); + String s = t.getString(); + if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable"); + if (t.next() != '=') error("missing ="); + int startBody = t.index; + Pick rule = getAlternation(); + if (rule == null) error("missing expression"); + t.addSymbol(s, t.getSource(), startBody, t.index); + if (t.next() != ';') error("missing ;"); + return addPick(s, rule); + } + + protected boolean addPick(String s, Pick rule) { + Object temp = map.get(s); + if (temp != null) error("duplicate variable"); + if (rule.name == null) rule.name(s); + map.put(s, rule); + return true; + } + + public BNF addSet(String variable, UnicodeSet set) { + if (set != null) { + String body = set.toString(); + t.addSymbol(variable, body, 0, body.length()); + addPick(variable, Pick.codePoint(set)); + } + return this; + } + + int maxRepeat = 99; + + Pick qualify(Pick item) { + int[] weights; + int type = t.next(); + switch(type) { + case '@': + return new Pick.Quote(item); + case '~': + return new Pick.Morph(item); + case '?': + int weight = getWeight(); + if (weight == NO_WEIGHT) weight = 50; + weights = new int[] {100-weight, weight}; + return Pick.repeat(0, 1, weights, item); + case '*': + weights = getWeights(); + return Pick.repeat(1, maxRepeat, weights, item); + case '+': + weights = getWeights(); + return Pick.repeat(1, maxRepeat, weights, item); + case '{': + if (t.next() != Tokenizer.NUMBER) error("missing number"); + int start = (int) t.getNumber(); + int end = start; + type = t.next(); + if (type == ',') { + end = maxRepeat; + type = t.next(); + if (type == Tokenizer.NUMBER) { + end = (int)t.getNumber(); + type = t.next(); + } + } + if (type != '}') error("missing }"); + weights = getWeights(); + return Pick.repeat(start, end, weights, item); + } + t.backup(); + return item; + } + + Pick getCore() { + int token = t.next(); + if (token == Tokenizer.STRING) { + String s = t.getString(); + if (s.charAt(0) == '$') variables.add(s); + return Pick.string(s); + } + if (token == Tokenizer.UNICODESET) { + return Pick.codePoint(t.getUnicodeSet()); + } + if (token != '(') { + t.backup(); + return null; + } + Pick temp = getAlternation(); + token = t.next(); + if (token != ')') error("missing )"); + return temp; + } + + Pick getSequence() { + Pick.Sequence result = null; + Pick last = null; + while (true) { + Pick item = getCore(); + if (item == null) { + if (result != null) return result; + if (last != null) return last; + error("missing item"); + } + // qualify it as many times as possible + Pick oldItem; + do { + oldItem = item; + item = qualify(item); + } while (item != oldItem); + // add it in + if (last == null) { + last = item; + } else { + if (result == null) result = Pick.makeSequence().and2(last); + result = result.and2(item); + } + } + } + + // for simplicity, we just use recursive descent + Pick getAlternation() { + Pick.Alternation result = null; + Pick last = null; + int lastWeight = NO_WEIGHT; + while (true) { + Pick temp = getSequence(); + if (temp == null) error("empty alternation"); + int weight = getWeight(); + if (weight == NO_WEIGHT) weight = 1; + if (last == null) { + last = temp; + lastWeight = weight; + } else { + if (result == null) result = Pick.makeAlternation().or2(lastWeight, last); + result = result.or2(weight, temp); + } + int token = t.next(); + if (token != '|') { + t.backup(); + if (result != null) return result; + if (last != null) return last; + } + } + } + + private static final int NO_WEIGHT = Integer.MIN_VALUE; + + int getWeight() { + int weight; + int token = t.next(); + if (token != Tokenizer.NUMBER) { + t.backup(); + return NO_WEIGHT; + } + weight = (int)t.getNumber(); + token = t.next(); + if (token != '%') error("missing %"); + return weight; + } + + int[] getWeights() { + ArrayList list = new ArrayList(); + while (true) { + int weight = getWeight(); + if (weight == NO_WEIGHT) break; + list.add(new Integer(weight)); + } + if (list.size() == 0) return null; + int[] result = new int[list.size()]; + for (int i = 0; i < list.size(); ++i) { + result[i] = ((Integer)list.get(i)).intValue(); + } + return result; + } + + public int getMaxRepeat() { + return maxRepeat; + } + + public BNF setMaxRepeat(int maxRepeat) { + this.maxRepeat = maxRepeat; + return this; + } +} |