path: root/src/com/android/dialer/lookup/whitepages/
diff options
Diffstat (limited to 'src/com/android/dialer/lookup/whitepages/')
1 files changed, 353 insertions, 0 deletions
diff --git a/src/com/android/dialer/lookup/whitepages/ b/src/com/android/dialer/lookup/whitepages/
new file mode 100644
index 000000000..5b266bf9c
--- /dev/null
+++ b/src/com/android/dialer/lookup/whitepages/
@@ -0,0 +1,353 @@
+ * Copyright (C) 2014 Xiao-Long Chen <>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import android.content.Context;
+import android.text.TextUtils;
+import android.util.Log;
+import org.apache.http.client.methods.HttpGet;
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+public class WhitePagesApi {
+ private static final String TAG = WhitePagesApi.class.getSimpleName();
+ public static final int UNITED_STATES = 0;
+ public static final int CANADA = 1;
+ private static final String NEARBY_URL_UNITED_STATES =
+ "";
+ private static final String NEARBY_URL_CANADA =
+ "";
+ private static final String PEOPLE_URL_UNITED_STATES =
+ "";
+ private static final String USER_AGENT =
+ "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0";
+ private static final String COOKIE_REGEX = "distil_RID=([A-Za-z0-9\\-]+)";
+ private static final String COOKIE = "D_UID";
+ private static String mCookie;
+ private WhitePagesApi() {
+ }
+ public static ContactInfo[] peopleLookup(Context context, String name,
+ int maxResults) throws IOException {
+ String provider = LookupSettings.getPeopleLookupProvider(context);
+ if (LookupSettings.PLP_WHITEPAGES.equals(provider)) {
+ Uri.Builder builder = Uri.parse(PEOPLE_URL_UNITED_STATES)
+ .buildUpon();
+ builder.appendQueryParameter("who", name);
+ String lookupUrl =;
+ String output = httpGet(lookupUrl);
+ return parseOutputUnitedStates(output, maxResults);
+ }
+ // no-op
+ return null;
+ }
+ private static ContactInfo[] parseOutputUnitedStates(String output,
+ int maxResults) throws IOException {
+ ArrayList<ContactInfo> people = new ArrayList<ContactInfo>();
+ Pattern regex = Pattern.compile(
+ "<li\\s[^>]+?http:\\/\\/schema\\.org\\/Person", Pattern.DOTALL);
+ Matcher m = regex.matcher(output);
+ while (m.find()) {
+ if (people.size() == maxResults) {
+ break;
+ }
+ // Find section of HTML with contact information
+ String section = extractXmlTag(output, m.start(), m.end(), "li");
+ // Skip entries with no phone number
+ if (section.contains("has-no-phone-icon")) {
+ continue;
+ }
+ String name = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"name\">", "span"));
+ if (name == null) {
+ continue;
+ }
+ // Address
+ String addrCountry = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"addressCountry\">", "span"));
+ String addrState = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"addressRegion\">", "span"));
+ String addrCity = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"addressLocality\">", "span"));
+ StringBuilder sb = new StringBuilder();
+ if (addrCity != null) {
+ sb.append(addrCity);
+ }
+ if (addrState != null) {
+ if (sb.length() > 0) {
+ sb.append(", ");
+ }
+ sb.append(addrState);
+ }
+ if (addrCountry != null) {
+ if (sb.length() > 0) {
+ sb.append(", ");
+ }
+ sb.append(addrCountry);
+ }
+ // Website
+ Pattern p = Pattern.compile("href=\"(.+?)\"");
+ Matcher m2 = p.matcher(section);
+ String website = null;
+ if (m2.find()) {
+ website = "" +;
+ }
+ // Phone number is on profile page, so skip if we can't get the
+ // website
+ if (website == null) {
+ continue;
+ }
+ String profile = httpGet(website);
+ String phoneNumber = LookupUtils.fromHtml(extractXmlRegex(profile,
+ "<li[^>]+?class=\"no-overflow tel\">", "li"));
+ String address = parseAddressUnitedStates(profile);
+ if (phoneNumber == null) {
+ Log.e(TAG, "Phone number is null. Either cookie is bad or regex is broken");
+ continue;
+ }
+ ContactInfo info = new ContactInfo();
+ = name;
+ = sb.toString();
+ info.address = address;
+ info.formattedNumber = phoneNumber;
+ = website;
+ people.add(info);
+ }
+ return people.toArray(new ContactInfo[people.size()]);
+ }
+ private static String extractXmlRegex(String str, String regex, String tag) {
+ Pattern p = Pattern.compile(regex, Pattern.DOTALL);
+ Matcher m = p.matcher(str);
+ if (m.find()) {
+ return extractXmlTag(str, m.start(), m.end(), tag);
+ }
+ return null;
+ }
+ private static String extractXmlTag(String str, int realBegin, int begin,
+ String tag) {
+ int end = begin;
+ int tags = 1;
+ int maxLoop = 30;
+ while (tags > 0) {
+ end = str.indexOf(tag, end + 1);
+ if (end < 0 || maxLoop < 0) {
+ break;
+ }
+ if (str.charAt(end - 1) == '/'
+ && str.charAt(end + tag.length()) == '>') {
+ tags--;
+ } else if (str.charAt(end - 1) == '<') {
+ tags++;
+ }
+ maxLoop--;
+ }
+ int realEnd = str.indexOf(">", end) + 1;
+ if (tags != 0) {
+ Log.e(TAG, "Failed to extract tag <" + tag + "> from XML/HTML");
+ return null;
+ }
+ return str.substring(realBegin, realEnd);
+ }
+ public static ContactInfo reverseLookup(Context context, String number)
+ throws IOException {
+ String provider = LookupSettings.getReverseLookupProvider(context);
+ String lookupUrl = null;
+ if (LookupSettings.RLP_WHITEPAGES.equals(provider)) {
+ } else if (LookupSettings.RLP_WHITEPAGES_CA.equals(provider)) {
+ lookupUrl = NEARBY_URL_CANADA;
+ }
+ String newLookupUrl = lookupUrl + number;
+ String output = httpGet(newLookupUrl);
+ //
+ String name = null;
+ String phoneNumber = null;
+ String address = null;
+ if (LookupSettings.RLP_WHITEPAGES.equals(provider)) {
+ name = parseNameUnitedStates(output);
+ phoneNumber = parseNumberUnitedStates(output);
+ address = parseAddressUnitedStates(output);
+ } else if (LookupSettings.RLP_WHITEPAGES_CA.equals(provider)) {
+ name = parseNameCanada(output);
+ // Canada's WhitePages does not provide a formatted number
+ address = parseAddressCanada(output);
+ }
+ ContactInfo info = new ContactInfo();
+ = name;
+ info.address = address;
+ info.formattedNumber = phoneNumber != null ? phoneNumber : number;
+ = lookupUrl + info.formattedNumber;
+ return info;
+ }
+ private static String httpGet(String url) throws IOException {
+ HttpGet get = new HttpGet(url);
+ if (mCookie != null) {
+ get.setHeader("Cookie", COOKIE + "=" + mCookie);
+ }
+ String output = LookupUtils.httpGet(get);
+ // If we can find a new cookie, use it
+ Pattern p = Pattern.compile(COOKIE_REGEX, Pattern.DOTALL);
+ Matcher m = p.matcher(output);
+ if (m.find()) {
+ mCookie =;
+ Log.v(TAG, "Got new cookie");
+ }
+ // If we hit a page with a <meta> refresh and the error URL, reload. If
+ // this results in infinite recursion, then whatever. The thread is
+ // killed after 10 seconds.
+ p = Pattern.compile("<meta[^>]+http-equiv=\"refresh\"", Pattern.DOTALL);
+ m = p.matcher(output);
+ if (m.find() && output.contains("distil_r_captcha.html")) {
+ Log.w(TAG, "Got <meta> refresh. Reloading...");
+ return httpGet(url);
+ }
+ return output;
+ }
+ private static String parseNameUnitedStates(String output) {
+ String name = LookupUtils.firstRegexResult(output,
+ "<h2.*?>Send (.*?)&#39;s details to phone</h2>", true);
+ // Use summary if name doesn't exist
+ if (name == null) {
+ name = LookupUtils.firstRegexResult(output,
+ "<span\\s*class=\"subtitle.*?>\\s*\n?(.*?)\n?\\s*</span>", true);
+ }
+ if (name != null) {
+ name = name.replaceAll("&amp;", "&");
+ }
+ return name;
+ }
+ private static String parseNameCanada(String output) {
+ String name = LookupUtils.firstRegexResult(output,
+ "(<li\\s+class=\"listing_info\">.*?</li>)", true);
+ return LookupUtils.fromHtml(name);
+ }
+ private static String parseNumberUnitedStates(String output) {
+ return LookupUtils.firstRegexResult(output,
+ "Full Number:</span>([0-9\\-\\+\\(\\)]+)</li>", true);
+ }
+ private static String parseAddressUnitedStates(String output) {
+ String regexBase = "<span\\s+class=\"%s[^\"]+\"\\s*>([^<]*)</span>";
+ String addressPrimary = LookupUtils.firstRegexResult(output,
+ String.format(regexBase, "address-primary"), true);
+ String addressSecondary = LookupUtils.firstRegexResult(output,
+ String.format(regexBase, "address-secondary"), true);
+ String addressLocation = LookupUtils.firstRegexResult(output,
+ String.format(regexBase, "address-location"), true);
+ StringBuilder sb = new StringBuilder();
+ if (!TextUtils.isEmpty(addressPrimary)) {
+ sb.append(addressPrimary);
+ }
+ if (!TextUtils.isEmpty(addressSecondary)) {
+ sb.append(", ");
+ sb.append(addressSecondary);
+ }
+ if (!TextUtils.isEmpty(addressLocation)) {
+ sb.append(", ");
+ sb.append(addressLocation);
+ }
+ String address = sb.toString();
+ if (address.length() == 0) {
+ address = null;
+ }
+ return address;
+ }
+ private static String parseAddressCanada(String output) {
+ String address = LookupUtils.firstRegexResult(output,
+ "<ol class=\"result people_result\">.*?(<li\\s+class=\"col_location\">.*?</li>)" +
+ ".*?</ol>", true);
+ if (address != null) {
+ address = LookupUtils.fromHtml(address).replace("\n", ", ");
+ }
+ return address;
+ }
+ public static class ContactInfo {
+ String name;
+ String city;
+ String address;
+ String formattedNumber;
+ String website;
+ }