diff options
Diffstat (limited to 'src/com/android/dialer/lookup/whitepages/WhitePagesApi.java')
-rw-r--r-- | src/com/android/dialer/lookup/whitepages/WhitePagesApi.java | 353 |
1 files changed, 353 insertions, 0 deletions
diff --git a/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java b/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java new file mode 100644 index 000000000..5b266bf9c --- /dev/null +++ b/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java @@ -0,0 +1,353 @@ +/* + * Copyright (C) 2014 Xiao-Long Chen <chenxiaolong@cxl.epac.to> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.dialer.lookup.whitepages; + +import android.content.Context; +import android.net.Uri; +import android.text.TextUtils; +import android.util.Log; + +import com.android.dialer.lookup.LookupSettings; +import com.android.dialer.lookup.LookupUtils; + +import org.apache.http.client.methods.HttpGet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class WhitePagesApi { + private static final String TAG = WhitePagesApi.class.getSimpleName(); + + public static final int UNITED_STATES = 0; + public static final int CANADA = 1; + + private static final String NEARBY_URL_UNITED_STATES = + "http://www.whitepages.com/search/ReversePhone?full_phone="; + private static final String NEARBY_URL_CANADA = + "http://www.whitepages.ca/search/ReversePhone?full_phone="; + + private static final String PEOPLE_URL_UNITED_STATES = + "http://whitepages.com/search/FindPerson"; + + private static final String USER_AGENT = + "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"; + private static final String COOKIE_REGEX = "distil_RID=([A-Za-z0-9\\-]+)"; + private static final String COOKIE = "D_UID"; + + private static String mCookie; + + private WhitePagesApi() { + } + + public static ContactInfo[] peopleLookup(Context context, String name, + int maxResults) throws IOException { + String provider = LookupSettings.getPeopleLookupProvider(context); + + if (LookupSettings.PLP_WHITEPAGES.equals(provider)) { + Uri.Builder builder = Uri.parse(PEOPLE_URL_UNITED_STATES) + .buildUpon(); + builder.appendQueryParameter("who", name); + String lookupUrl = builder.build().toString(); + String output = httpGet(lookupUrl); + return parseOutputUnitedStates(output, maxResults); + } + // no-op + return null; + } + + private static ContactInfo[] parseOutputUnitedStates(String output, + int maxResults) throws IOException { + ArrayList<ContactInfo> people = new ArrayList<ContactInfo>(); + + Pattern regex = Pattern.compile( + "<li\\s[^>]+?http:\\/\\/schema\\.org\\/Person", Pattern.DOTALL); + Matcher m = regex.matcher(output); + + while (m.find()) { + if (people.size() == maxResults) { + break; + } + + // Find section of HTML with contact information + String section = extractXmlTag(output, m.start(), m.end(), "li"); + + // Skip entries with no phone number + if (section.contains("has-no-phone-icon")) { + continue; + } + + String name = LookupUtils.fromHtml(extractXmlRegex(section, + "<span[^>]+?itemprop=\"name\">", "span")); + + if (name == null) { + continue; + } + + // Address + String addrCountry = LookupUtils.fromHtml(extractXmlRegex(section, + "<span[^>]+?itemprop=\"addressCountry\">", "span")); + String addrState = LookupUtils.fromHtml(extractXmlRegex(section, + "<span[^>]+?itemprop=\"addressRegion\">", "span")); + String addrCity = LookupUtils.fromHtml(extractXmlRegex(section, + "<span[^>]+?itemprop=\"addressLocality\">", "span")); + + StringBuilder sb = new StringBuilder(); + + if (addrCity != null) { + sb.append(addrCity); + } + if (addrState != null) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(addrState); + } + if (addrCountry != null) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(addrCountry); + } + + // Website + Pattern p = Pattern.compile("href=\"(.+?)\""); + Matcher m2 = p.matcher(section); + String website = null; + if (m2.find()) { + website = "http://www.whitepages.com" + m2.group(1); + } + + // Phone number is on profile page, so skip if we can't get the + // website + if (website == null) { + continue; + } + + String profile = httpGet(website); + String phoneNumber = LookupUtils.fromHtml(extractXmlRegex(profile, + "<li[^>]+?class=\"no-overflow tel\">", "li")); + String address = parseAddressUnitedStates(profile); + + if (phoneNumber == null) { + Log.e(TAG, "Phone number is null. Either cookie is bad or regex is broken"); + continue; + } + + ContactInfo info = new ContactInfo(); + info.name = name; + info.city = sb.toString(); + info.address = address; + info.formattedNumber = phoneNumber; + info.website = website; + + people.add(info); + } + + return people.toArray(new ContactInfo[people.size()]); + } + + private static String extractXmlRegex(String str, String regex, String tag) { + Pattern p = Pattern.compile(regex, Pattern.DOTALL); + Matcher m = p.matcher(str); + if (m.find()) { + return extractXmlTag(str, m.start(), m.end(), tag); + } + return null; + } + + private static String extractXmlTag(String str, int realBegin, int begin, + String tag) { + int end = begin; + int tags = 1; + int maxLoop = 30; + + while (tags > 0) { + end = str.indexOf(tag, end + 1); + if (end < 0 || maxLoop < 0) { + break; + } + + if (str.charAt(end - 1) == '/' + && str.charAt(end + tag.length()) == '>') { + tags--; + } else if (str.charAt(end - 1) == '<') { + tags++; + } + + maxLoop--; + } + + int realEnd = str.indexOf(">", end) + 1; + + if (tags != 0) { + Log.e(TAG, "Failed to extract tag <" + tag + "> from XML/HTML"); + return null; + } + + return str.substring(realBegin, realEnd); + } + + public static ContactInfo reverseLookup(Context context, String number) + throws IOException { + String provider = LookupSettings.getReverseLookupProvider(context); + + String lookupUrl = null; + if (LookupSettings.RLP_WHITEPAGES.equals(provider)) { + lookupUrl = NEARBY_URL_UNITED_STATES; + } else if (LookupSettings.RLP_WHITEPAGES_CA.equals(provider)) { + lookupUrl = NEARBY_URL_CANADA; + } + String newLookupUrl = lookupUrl + number; + + String output = httpGet(newLookupUrl); + + // + + String name = null; + String phoneNumber = null; + String address = null; + + if (LookupSettings.RLP_WHITEPAGES.equals(provider)) { + name = parseNameUnitedStates(output); + phoneNumber = parseNumberUnitedStates(output); + address = parseAddressUnitedStates(output); + } else if (LookupSettings.RLP_WHITEPAGES_CA.equals(provider)) { + name = parseNameCanada(output); + // Canada's WhitePages does not provide a formatted number + address = parseAddressCanada(output); + } + + ContactInfo info = new ContactInfo(); + info.name = name; + info.address = address; + info.formattedNumber = phoneNumber != null ? phoneNumber : number; + info.website = lookupUrl + info.formattedNumber; + + return info; + } + + private static String httpGet(String url) throws IOException { + HttpGet get = new HttpGet(url); + + if (mCookie != null) { + get.setHeader("Cookie", COOKIE + "=" + mCookie); + } + + String output = LookupUtils.httpGet(get); + // If we can find a new cookie, use it + Pattern p = Pattern.compile(COOKIE_REGEX, Pattern.DOTALL); + Matcher m = p.matcher(output); + if (m.find()) { + mCookie = m.group(1).trim(); + Log.v(TAG, "Got new cookie"); + } + + // If we hit a page with a <meta> refresh and the error URL, reload. If + // this results in infinite recursion, then whatever. The thread is + // killed after 10 seconds. + p = Pattern.compile("<meta[^>]+http-equiv=\"refresh\"", Pattern.DOTALL); + m = p.matcher(output); + if (m.find() && output.contains("distil_r_captcha.html")) { + Log.w(TAG, "Got <meta> refresh. Reloading..."); + return httpGet(url); + } + + return output; + } + + private static String parseNameUnitedStates(String output) { + String name = LookupUtils.firstRegexResult(output, + "<h2.*?>Send (.*?)'s details to phone</h2>", true); + + // Use summary if name doesn't exist + if (name == null) { + name = LookupUtils.firstRegexResult(output, + "<span\\s*class=\"subtitle.*?>\\s*\n?(.*?)\n?\\s*</span>", true); + } + + if (name != null) { + name = name.replaceAll("&", "&"); + } + + return name; + } + + private static String parseNameCanada(String output) { + String name = LookupUtils.firstRegexResult(output, + "(<li\\s+class=\"listing_info\">.*?</li>)", true); + return LookupUtils.fromHtml(name); + } + + private static String parseNumberUnitedStates(String output) { + return LookupUtils.firstRegexResult(output, + "Full Number:</span>([0-9\\-\\+\\(\\)]+)</li>", true); + } + + private static String parseAddressUnitedStates(String output) { + String regexBase = "<span\\s+class=\"%s[^\"]+\"\\s*>([^<]*)</span>"; + + String addressPrimary = LookupUtils.firstRegexResult(output, + String.format(regexBase, "address-primary"), true); + String addressSecondary = LookupUtils.firstRegexResult(output, + String.format(regexBase, "address-secondary"), true); + String addressLocation = LookupUtils.firstRegexResult(output, + String.format(regexBase, "address-location"), true); + + StringBuilder sb = new StringBuilder(); + + if (!TextUtils.isEmpty(addressPrimary)) { + sb.append(addressPrimary); + } + if (!TextUtils.isEmpty(addressSecondary)) { + sb.append(", "); + sb.append(addressSecondary); + } + if (!TextUtils.isEmpty(addressLocation)) { + sb.append(", "); + sb.append(addressLocation); + } + + String address = sb.toString(); + if (address.length() == 0) { + address = null; + } + + return address; + } + + private static String parseAddressCanada(String output) { + String address = LookupUtils.firstRegexResult(output, + "<ol class=\"result people_result\">.*?(<li\\s+class=\"col_location\">.*?</li>)" + + ".*?</ol>", true); + + if (address != null) { + address = LookupUtils.fromHtml(address).replace("\n", ", "); + } + + return address; + } + + public static class ContactInfo { + String name; + String city; + String address; + String formattedNumber; + String website; + } +} |