summaryrefslogtreecommitdiffstats
path: root/java/com/android/contacts/common/util/SearchUtil.java
blob: 314d565b28abceb226543ff1d80a491ef855b08c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.contacts.common.util;

import android.support.annotation.VisibleForTesting;

/** Methods related to search. */
public class SearchUtil {

  /**
   * Given a string with lines delimited with '\n', finds the matching line to the given substring.
   *
   * @param contents The string to search.
   * @param substring The substring to search for.
   * @return A MatchedLine object containing the matching line and the startIndex of the substring
   *     match within that line.
   */
  public static MatchedLine findMatchingLine(String contents, String substring) {
    final MatchedLine matched = new MatchedLine();

    // Snippet may contain multiple lines separated by "\n".
    // Locate the lines of the content that contain the substring.
    final int index = SearchUtil.contains(contents, substring);
    if (index != -1) {
      // Match found.  Find the corresponding line.
      int start = index - 1;
      while (start > -1) {
        if (contents.charAt(start) == '\n') {
          break;
        }
        start--;
      }
      int end = index + 1;
      while (end < contents.length()) {
        if (contents.charAt(end) == '\n') {
          break;
        }
        end++;
      }
      matched.line = contents.substring(start + 1, end);
      matched.startIndex = index - (start + 1);
    }
    return matched;
  }

  /**
   * Similar to String.contains() with two main differences:
   *
   * <p>1) Only searches token prefixes. A token is defined as any combination of letters or
   * numbers.
   *
   * <p>2) Returns the starting index where the substring is found.
   *
   * @param value The string to search.
   * @param substring The substring to look for.
   * @return The starting index where the substring is found. {@literal -1} if substring is not
   *     found in value.
   */
  @VisibleForTesting
  static int contains(String value, String substring) {
    if (value.length() < substring.length()) {
      return -1;
    }

    // i18n support
    // Generate the code points for the substring once.
    // There will be a maximum of substring.length code points.  But may be fewer.
    // Since the array length is not an accurate size, we need to keep a separate variable.
    final int[] substringCodePoints = new int[substring.length()];
    int substringLength = 0; // may not equal substring.length()!!
    for (int i = 0; i < substring.length(); ) {
      final int codePoint = Character.codePointAt(substring, i);
      substringCodePoints[substringLength] = codePoint;
      substringLength++;
      i += Character.charCount(codePoint);
    }

    for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
      int numMatch = 0;
      for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
        int valueCp = Character.toLowerCase(value.codePointAt(j));
        int substringCp = substringCodePoints[numMatch];
        if (valueCp != substringCp) {
          break;
        }
        j += Character.charCount(valueCp);
      }
      if (numMatch == substringLength) {
        return i;
      }
    }
    return -1;
  }

  /**
   * Find the start of the next token. A token is composed of letters and numbers. Any other
   * character are considered delimiters.
   *
   * @param line The string to search for the next token.
   * @param startIndex The index to start searching. 0 based indexing.
   * @return The index for the start of the next token. line.length() if next token not found.
   */
  @VisibleForTesting
  static int findNextTokenStart(String line, int startIndex) {
    int index = startIndex;

    // If already in token, eat remainder of token.
    while (index <= line.length()) {
      if (index == line.length()) {
        // No more tokens.
        return index;
      }
      final int codePoint = line.codePointAt(index);
      if (!Character.isLetterOrDigit(codePoint)) {
        break;
      }
      index += Character.charCount(codePoint);
    }

    // Out of token, eat all consecutive delimiters.
    while (index <= line.length()) {
      if (index == line.length()) {
        return index;
      }
      final int codePoint = line.codePointAt(index);
      if (Character.isLetterOrDigit(codePoint)) {
        break;
      }
      index += Character.charCount(codePoint);
    }

    return index;
  }

  /**
   * Anything other than letter and numbers are considered delimiters. Remove start and end
   * delimiters since they are not relevant to search.
   *
   * @param query The query string to clean.
   * @return The cleaned query. Empty string if all characters are cleaned out.
   */
  public static String cleanStartAndEndOfSearchQuery(String query) {
    int start = 0;
    while (start < query.length()) {
      int codePoint = query.codePointAt(start);
      if (Character.isLetterOrDigit(codePoint)) {
        break;
      }
      start += Character.charCount(codePoint);
    }

    if (start == query.length()) {
      // All characters are delimiters.
      return "";
    }

    int end = query.length() - 1;
    while (end > -1) {
      if (Character.isLowSurrogate(query.charAt(end))) {
        // Assume valid i18n string.  There should be a matching high surrogate before it.
        end--;
      }
      int codePoint = query.codePointAt(end);
      if (Character.isLetterOrDigit(codePoint)) {
        break;
      }
      end--;
    }

    // end is a letter or digit.
    return query.substring(start, end + 1);
  }

  public static class MatchedLine {

    public int startIndex = -1;
    public String line;

    @Override
    public String toString() {
      return "MatchedLine{" + "line='" + line + '\'' + ", startIndex=" + startIndex + '}';
    }
  }
}