summaryrefslogtreecommitdiffstats
path: root/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/com/android/dialer/lookup/whitepages/WhitePagesApi.java')
-rw-r--r--src/com/android/dialer/lookup/whitepages/WhitePagesApi.java353
1 files changed, 353 insertions, 0 deletions
diff --git a/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java b/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java
new file mode 100644
index 000000000..5b266bf9c
--- /dev/null
+++ b/src/com/android/dialer/lookup/whitepages/WhitePagesApi.java
@@ -0,0 +1,353 @@
+/*
+ * Copyright (C) 2014 Xiao-Long Chen <chenxiaolong@cxl.epac.to>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.dialer.lookup.whitepages;
+
+import android.content.Context;
+import android.net.Uri;
+import android.text.TextUtils;
+import android.util.Log;
+
+import com.android.dialer.lookup.LookupSettings;
+import com.android.dialer.lookup.LookupUtils;
+
+import org.apache.http.client.methods.HttpGet;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class WhitePagesApi {
+ private static final String TAG = WhitePagesApi.class.getSimpleName();
+
+ public static final int UNITED_STATES = 0;
+ public static final int CANADA = 1;
+
+ private static final String NEARBY_URL_UNITED_STATES =
+ "http://www.whitepages.com/search/ReversePhone?full_phone=";
+ private static final String NEARBY_URL_CANADA =
+ "http://www.whitepages.ca/search/ReversePhone?full_phone=";
+
+ private static final String PEOPLE_URL_UNITED_STATES =
+ "http://whitepages.com/search/FindPerson";
+
+ private static final String USER_AGENT =
+ "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0";
+ private static final String COOKIE_REGEX = "distil_RID=([A-Za-z0-9\\-]+)";
+ private static final String COOKIE = "D_UID";
+
+ private static String mCookie;
+
+ private WhitePagesApi() {
+ }
+
+ public static ContactInfo[] peopleLookup(Context context, String name,
+ int maxResults) throws IOException {
+ String provider = LookupSettings.getPeopleLookupProvider(context);
+
+ if (LookupSettings.PLP_WHITEPAGES.equals(provider)) {
+ Uri.Builder builder = Uri.parse(PEOPLE_URL_UNITED_STATES)
+ .buildUpon();
+ builder.appendQueryParameter("who", name);
+ String lookupUrl = builder.build().toString();
+ String output = httpGet(lookupUrl);
+ return parseOutputUnitedStates(output, maxResults);
+ }
+ // no-op
+ return null;
+ }
+
+ private static ContactInfo[] parseOutputUnitedStates(String output,
+ int maxResults) throws IOException {
+ ArrayList<ContactInfo> people = new ArrayList<ContactInfo>();
+
+ Pattern regex = Pattern.compile(
+ "<li\\s[^>]+?http:\\/\\/schema\\.org\\/Person", Pattern.DOTALL);
+ Matcher m = regex.matcher(output);
+
+ while (m.find()) {
+ if (people.size() == maxResults) {
+ break;
+ }
+
+ // Find section of HTML with contact information
+ String section = extractXmlTag(output, m.start(), m.end(), "li");
+
+ // Skip entries with no phone number
+ if (section.contains("has-no-phone-icon")) {
+ continue;
+ }
+
+ String name = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"name\">", "span"));
+
+ if (name == null) {
+ continue;
+ }
+
+ // Address
+ String addrCountry = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"addressCountry\">", "span"));
+ String addrState = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"addressRegion\">", "span"));
+ String addrCity = LookupUtils.fromHtml(extractXmlRegex(section,
+ "<span[^>]+?itemprop=\"addressLocality\">", "span"));
+
+ StringBuilder sb = new StringBuilder();
+
+ if (addrCity != null) {
+ sb.append(addrCity);
+ }
+ if (addrState != null) {
+ if (sb.length() > 0) {
+ sb.append(", ");
+ }
+ sb.append(addrState);
+ }
+ if (addrCountry != null) {
+ if (sb.length() > 0) {
+ sb.append(", ");
+ }
+ sb.append(addrCountry);
+ }
+
+ // Website
+ Pattern p = Pattern.compile("href=\"(.+?)\"");
+ Matcher m2 = p.matcher(section);
+ String website = null;
+ if (m2.find()) {
+ website = "http://www.whitepages.com" + m2.group(1);
+ }
+
+ // Phone number is on profile page, so skip if we can't get the
+ // website
+ if (website == null) {
+ continue;
+ }
+
+ String profile = httpGet(website);
+ String phoneNumber = LookupUtils.fromHtml(extractXmlRegex(profile,
+ "<li[^>]+?class=\"no-overflow tel\">", "li"));
+ String address = parseAddressUnitedStates(profile);
+
+ if (phoneNumber == null) {
+ Log.e(TAG, "Phone number is null. Either cookie is bad or regex is broken");
+ continue;
+ }
+
+ ContactInfo info = new ContactInfo();
+ info.name = name;
+ info.city = sb.toString();
+ info.address = address;
+ info.formattedNumber = phoneNumber;
+ info.website = website;
+
+ people.add(info);
+ }
+
+ return people.toArray(new ContactInfo[people.size()]);
+ }
+
+ private static String extractXmlRegex(String str, String regex, String tag) {
+ Pattern p = Pattern.compile(regex, Pattern.DOTALL);
+ Matcher m = p.matcher(str);
+ if (m.find()) {
+ return extractXmlTag(str, m.start(), m.end(), tag);
+ }
+ return null;
+ }
+
+ private static String extractXmlTag(String str, int realBegin, int begin,
+ String tag) {
+ int end = begin;
+ int tags = 1;
+ int maxLoop = 30;
+
+ while (tags > 0) {
+ end = str.indexOf(tag, end + 1);
+ if (end < 0 || maxLoop < 0) {
+ break;
+ }
+
+ if (str.charAt(end - 1) == '/'
+ && str.charAt(end + tag.length()) == '>') {
+ tags--;
+ } else if (str.charAt(end - 1) == '<') {
+ tags++;
+ }
+
+ maxLoop--;
+ }
+
+ int realEnd = str.indexOf(">", end) + 1;
+
+ if (tags != 0) {
+ Log.e(TAG, "Failed to extract tag <" + tag + "> from XML/HTML");
+ return null;
+ }
+
+ return str.substring(realBegin, realEnd);
+ }
+
+ public static ContactInfo reverseLookup(Context context, String number)
+ throws IOException {
+ String provider = LookupSettings.getReverseLookupProvider(context);
+
+ String lookupUrl = null;
+ if (LookupSettings.RLP_WHITEPAGES.equals(provider)) {
+ lookupUrl = NEARBY_URL_UNITED_STATES;
+ } else if (LookupSettings.RLP_WHITEPAGES_CA.equals(provider)) {
+ lookupUrl = NEARBY_URL_CANADA;
+ }
+ String newLookupUrl = lookupUrl + number;
+
+ String output = httpGet(newLookupUrl);
+
+ //
+
+ String name = null;
+ String phoneNumber = null;
+ String address = null;
+
+ if (LookupSettings.RLP_WHITEPAGES.equals(provider)) {
+ name = parseNameUnitedStates(output);
+ phoneNumber = parseNumberUnitedStates(output);
+ address = parseAddressUnitedStates(output);
+ } else if (LookupSettings.RLP_WHITEPAGES_CA.equals(provider)) {
+ name = parseNameCanada(output);
+ // Canada's WhitePages does not provide a formatted number
+ address = parseAddressCanada(output);
+ }
+
+ ContactInfo info = new ContactInfo();
+ info.name = name;
+ info.address = address;
+ info.formattedNumber = phoneNumber != null ? phoneNumber : number;
+ info.website = lookupUrl + info.formattedNumber;
+
+ return info;
+ }
+
+ private static String httpGet(String url) throws IOException {
+ HttpGet get = new HttpGet(url);
+
+ if (mCookie != null) {
+ get.setHeader("Cookie", COOKIE + "=" + mCookie);
+ }
+
+ String output = LookupUtils.httpGet(get);
+ // If we can find a new cookie, use it
+ Pattern p = Pattern.compile(COOKIE_REGEX, Pattern.DOTALL);
+ Matcher m = p.matcher(output);
+ if (m.find()) {
+ mCookie = m.group(1).trim();
+ Log.v(TAG, "Got new cookie");
+ }
+
+ // If we hit a page with a <meta> refresh and the error URL, reload. If
+ // this results in infinite recursion, then whatever. The thread is
+ // killed after 10 seconds.
+ p = Pattern.compile("<meta[^>]+http-equiv=\"refresh\"", Pattern.DOTALL);
+ m = p.matcher(output);
+ if (m.find() && output.contains("distil_r_captcha.html")) {
+ Log.w(TAG, "Got <meta> refresh. Reloading...");
+ return httpGet(url);
+ }
+
+ return output;
+ }
+
+ private static String parseNameUnitedStates(String output) {
+ String name = LookupUtils.firstRegexResult(output,
+ "<h2.*?>Send (.*?)&#39;s details to phone</h2>", true);
+
+ // Use summary if name doesn't exist
+ if (name == null) {
+ name = LookupUtils.firstRegexResult(output,
+ "<span\\s*class=\"subtitle.*?>\\s*\n?(.*?)\n?\\s*</span>", true);
+ }
+
+ if (name != null) {
+ name = name.replaceAll("&amp;", "&");
+ }
+
+ return name;
+ }
+
+ private static String parseNameCanada(String output) {
+ String name = LookupUtils.firstRegexResult(output,
+ "(<li\\s+class=\"listing_info\">.*?</li>)", true);
+ return LookupUtils.fromHtml(name);
+ }
+
+ private static String parseNumberUnitedStates(String output) {
+ return LookupUtils.firstRegexResult(output,
+ "Full Number:</span>([0-9\\-\\+\\(\\)]+)</li>", true);
+ }
+
+ private static String parseAddressUnitedStates(String output) {
+ String regexBase = "<span\\s+class=\"%s[^\"]+\"\\s*>([^<]*)</span>";
+
+ String addressPrimary = LookupUtils.firstRegexResult(output,
+ String.format(regexBase, "address-primary"), true);
+ String addressSecondary = LookupUtils.firstRegexResult(output,
+ String.format(regexBase, "address-secondary"), true);
+ String addressLocation = LookupUtils.firstRegexResult(output,
+ String.format(regexBase, "address-location"), true);
+
+ StringBuilder sb = new StringBuilder();
+
+ if (!TextUtils.isEmpty(addressPrimary)) {
+ sb.append(addressPrimary);
+ }
+ if (!TextUtils.isEmpty(addressSecondary)) {
+ sb.append(", ");
+ sb.append(addressSecondary);
+ }
+ if (!TextUtils.isEmpty(addressLocation)) {
+ sb.append(", ");
+ sb.append(addressLocation);
+ }
+
+ String address = sb.toString();
+ if (address.length() == 0) {
+ address = null;
+ }
+
+ return address;
+ }
+
+ private static String parseAddressCanada(String output) {
+ String address = LookupUtils.firstRegexResult(output,
+ "<ol class=\"result people_result\">.*?(<li\\s+class=\"col_location\">.*?</li>)" +
+ ".*?</ol>", true);
+
+ if (address != null) {
+ address = LookupUtils.fromHtml(address).replace("\n", ", ");
+ }
+
+ return address;
+ }
+
+ public static class ContactInfo {
+ String name;
+ String city;
+ String address;
+ String formattedNumber;
+ String website;
+ }
+}