summaryrefslogtreecommitdiffstats
path: root/common
diff options
context:
space:
mode:
authorShimeng (Simon) Wang <swang@google.com>2011-07-18 17:20:09 -0700
committerShimeng (Simon) Wang <swang@google.com>2011-07-18 17:20:09 -0700
commit829b84ec6b4f5fac7eea8da1de3378ce47033838 (patch)
tree59ef92576312f6303bcf0941070ec9055d990a59 /common
parente8adc2935e128eec6cae87ebfb3959b5bc6ec219 (diff)
downloadandroid_frameworks_ex-829b84ec6b4f5fac7eea8da1de3378ce47033838.tar.gz
android_frameworks_ex-829b84ec6b4f5fac7eea8da1de3378ce47033838.tar.bz2
android_frameworks_ex-829b84ec6b4f5fac7eea8da1de3378ce47033838.zip
Enhance the URL Regex script to handle i18n TLDs.
Decode punycode and put in Unicode scripts also into the Regex matcher, this will match internationalized characters in TopLevelDomains. issue: 4384739 Change-Id: Ic1aac6e05509f00ef3a2c19a06cacda6e9cd8b42
Diffstat (limited to 'common')
-rwxr-xr-xcommon/tools/make-iana-tld-pattern.py14
1 files changed, 10 insertions, 4 deletions
diff --git a/common/tools/make-iana-tld-pattern.py b/common/tools/make-iana-tld-pattern.py
index de81c58..d7cca93 100755
--- a/common/tools/make-iana-tld-pattern.py
+++ b/common/tools/make-iana-tld-pattern.py
@@ -5,9 +5,9 @@ from urllib2 import urlopen
TLD_PREFIX = r"""
/**
* Regular expression to match all IANA top-level domains.
- * List accurate as of 2010/02/05. List taken from:
+ * List accurate as of 2011/07/18. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
- * This pattern is auto-generated by frameworks/base/common/tools/make-iana-tld-pattern.py
+ * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
*/
public static final String TOP_LEVEL_DOMAIN_STR =
"""
@@ -16,9 +16,9 @@ TLD_SUFFIX = '";'
URL_PREFIX = r"""
/**
* Regular expression to match all IANA top-level domains for WEB_URL.
- * List accurate as of 2010/02/05. List taken from:
+ * List accurate as of 2011/07/18. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
- * This pattern is auto-generated by frameworks/base/common/tools/make-iana-tld-pattern.py
+ * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
*/
public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL =
"(?:"
@@ -140,5 +140,11 @@ if __name__ == "__main__":
if len(domain) > 0:
getBucket(buckets, domain[0]).add(domain.strip())
+ if domain.startswith('xn--'):
+ puny = domain.strip()[4:]
+ result = puny.decode('punycode')
+ result = repr(result)
+ getBucket(buckets, 'xn--').add(result[2:-1])
+
makePattern(TLD_PREFIX, TLD_SUFFIX, buckets, isWebUrl=False)
makePattern(URL_PREFIX, URL_SUFFIX, buckets, isWebUrl=True)