diff options
author | Shimeng (Simon) Wang <swang@google.com> | 2011-07-18 17:20:09 -0700 |
---|---|---|
committer | Shimeng (Simon) Wang <swang@google.com> | 2011-07-18 17:20:09 -0700 |
commit | 829b84ec6b4f5fac7eea8da1de3378ce47033838 (patch) | |
tree | 59ef92576312f6303bcf0941070ec9055d990a59 /common | |
parent | e8adc2935e128eec6cae87ebfb3959b5bc6ec219 (diff) | |
download | android_frameworks_ex-829b84ec6b4f5fac7eea8da1de3378ce47033838.tar.gz android_frameworks_ex-829b84ec6b4f5fac7eea8da1de3378ce47033838.tar.bz2 android_frameworks_ex-829b84ec6b4f5fac7eea8da1de3378ce47033838.zip |
Enhance the URL Regex script to handle i18n TLDs.
Decode punycode and put in Unicode scripts also into the Regex matcher,
this will match internationalized characters in TopLevelDomains.
issue: 4384739
Change-Id: Ic1aac6e05509f00ef3a2c19a06cacda6e9cd8b42
Diffstat (limited to 'common')
-rwxr-xr-x | common/tools/make-iana-tld-pattern.py | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/common/tools/make-iana-tld-pattern.py b/common/tools/make-iana-tld-pattern.py index de81c58..d7cca93 100755 --- a/common/tools/make-iana-tld-pattern.py +++ b/common/tools/make-iana-tld-pattern.py @@ -5,9 +5,9 @@ from urllib2 import urlopen TLD_PREFIX = r""" /** * Regular expression to match all IANA top-level domains. - * List accurate as of 2010/02/05. List taken from: + * List accurate as of 2011/07/18. List taken from: * http://data.iana.org/TLD/tlds-alpha-by-domain.txt - * This pattern is auto-generated by frameworks/base/common/tools/make-iana-tld-pattern.py + * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py */ public static final String TOP_LEVEL_DOMAIN_STR = """ @@ -16,9 +16,9 @@ TLD_SUFFIX = '";' URL_PREFIX = r""" /** * Regular expression to match all IANA top-level domains for WEB_URL. - * List accurate as of 2010/02/05. List taken from: + * List accurate as of 2011/07/18. List taken from: * http://data.iana.org/TLD/tlds-alpha-by-domain.txt - * This pattern is auto-generated by frameworks/base/common/tools/make-iana-tld-pattern.py + * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py */ public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL = "(?:" @@ -140,5 +140,11 @@ if __name__ == "__main__": if len(domain) > 0: getBucket(buckets, domain[0]).add(domain.strip()) + if domain.startswith('xn--'): + puny = domain.strip()[4:] + result = puny.decode('punycode') + result = repr(result) + getBucket(buckets, 'xn--').add(result[2:-1]) + makePattern(TLD_PREFIX, TLD_SUFFIX, buckets, isWebUrl=False) makePattern(URL_PREFIX, URL_SUFFIX, buckets, isWebUrl=True) |