diff options
Diffstat (limited to 'guava-gwt/src-super/com/google/common/base/super/com/google/common/base/CharMatcher.java')
-rw-r--r-- | guava-gwt/src-super/com/google/common/base/super/com/google/common/base/CharMatcher.java | 1244 |
1 files changed, 0 insertions, 1244 deletions
diff --git a/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/CharMatcher.java b/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/CharMatcher.java deleted file mode 100644 index ed30766..0000000 --- a/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/CharMatcher.java +++ /dev/null @@ -1,1244 +0,0 @@ -/* - * Copyright (C) 2008 The Guava Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.google.common.base; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.common.annotations.Beta; -import com.google.common.annotations.GwtCompatible; - -import java.util.Arrays; - -import javax.annotation.CheckReturnValue; - -/** - * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does - * for any {@link Object}. Also offers basic text processing methods based on this function. - * Implementations are strongly encouraged to be side-effect-free and immutable. - * - * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean - * "any character {@code c} for which {@code this.matches(c)} returns {@code true}". - * - * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand - * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical - * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher} - * treats these just as two separate characters. - * - * <p>Example usages: <pre> - * String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput); - * if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre> - * - * <p>See the Guava User Guide article on <a href= - * "http://code.google.com/p/guava-libraries/wiki/StringsExplained#CharMatcher"> - * {@code CharMatcher}</a>. - * - * @author Kevin Bourrillion - * @since 1.0 - */ -@Beta // Possibly change from chars to code points; decide constants vs. methods -@GwtCompatible(emulated = true) -public abstract class CharMatcher implements Predicate<Character> { - // Constants - /** - * Determines whether a character is a breaking whitespace (that is, a whitespace which can be - * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a - * discussion of that term. - * - * @since 2.0 - */ - public static final CharMatcher BREAKING_WHITESPACE = new CharMatcher() { - @Override - public boolean matches(char c) { - switch (c) { - case '\t': - case '\n': - case '\013': - case '\f': - case '\r': - case ' ': - case '\u0085': - case '\u1680': - case '\u2028': - case '\u2029': - case '\u205f': - case '\u3000': - return true; - case '\u2007': - return false; - default: - return c >= '\u2000' && c <= '\u200a'; - } - } - - @Override - public String toString() { - return "CharMatcher.BREAKING_WHITESPACE"; - } - }; - - /** - * Determines whether a character is ASCII, meaning that its code point is less than 128. - */ - public static final CharMatcher ASCII = inRange('\0', '\u007f', "CharMatcher.ASCII"); - - private static class RangesMatcher extends CharMatcher { - private final char[] rangeStarts; - private final char[] rangeEnds; - - RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) { - super(description); - this.rangeStarts = rangeStarts; - this.rangeEnds = rangeEnds; - checkArgument(rangeStarts.length == rangeEnds.length); - for (int i = 0; i < rangeStarts.length; i++) { - checkArgument(rangeStarts[i] <= rangeEnds[i]); - if (i + 1 < rangeStarts.length) { - checkArgument(rangeEnds[i] < rangeStarts[i + 1]); - } - } - } - - @Override - public boolean matches(char c) { - int index = Arrays.binarySearch(rangeStarts, c); - if (index >= 0) { - return true; - } else { - index = ~index - 1; - return index >= 0 && c <= rangeEnds[index]; - } - } - } - - // Must be in ascending order. - private static final String ZEROES = "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6" - + "\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1b50\u1bb0" - + "\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10"; - - private static final String NINES; - static { - StringBuilder builder = new StringBuilder(ZEROES.length()); - for (int i = 0; i < ZEROES.length(); i++) { - builder.append((char) (ZEROES.charAt(i) + 9)); - } - NINES = builder.toString(); - } - - /** - * Determines whether a character is a digit according to - * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. - */ - public static final CharMatcher DIGIT = new RangesMatcher( - "CharMatcher.DIGIT", ZEROES.toCharArray(), NINES.toCharArray()); - - /** - * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's - * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}. - */ - public static final CharMatcher JAVA_DIGIT = new CharMatcher("CharMatcher.JAVA_DIGIT") { - @Override public boolean matches(char c) { - return Character.isDigit(c); - } - }; - - /** - * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's - * definition}. If you only care to match letters of the Latin alphabet, you can use {@code - * inRange('a', 'z').or(inRange('A', 'Z'))}. - */ - public static final CharMatcher JAVA_LETTER = new CharMatcher("CharMatcher.JAVA_LETTER") { - @Override public boolean matches(char c) { - return Character.isLetter(c); - } - }; - - /** - * Determines whether a character is a letter or digit according to {@link - * Character#isLetterOrDigit(char) Java's definition}. - */ - public static final CharMatcher JAVA_LETTER_OR_DIGIT = - new CharMatcher("CharMatcher.JAVA_LETTER_OR_DIGIT") { - @Override public boolean matches(char c) { - return Character.isLetterOrDigit(c); - } - }; - - /** - * Determines whether a character is upper case according to {@link Character#isUpperCase(char) - * Java's definition}. - */ - public static final CharMatcher JAVA_UPPER_CASE = - new CharMatcher("CharMatcher.JAVA_UPPER_CASE") { - @Override public boolean matches(char c) { - return Character.isUpperCase(c); - } - }; - - /** - * Determines whether a character is lower case according to {@link Character#isLowerCase(char) - * Java's definition}. - */ - public static final CharMatcher JAVA_LOWER_CASE = - new CharMatcher("CharMatcher.JAVA_LOWER_CASE") { - @Override public boolean matches(char c) { - return Character.isLowerCase(c); - } - }; - - /** - * Determines whether a character is an ISO control character as specified by {@link - * Character#isISOControl(char)}. - */ - public static final CharMatcher JAVA_ISO_CONTROL = - inRange('\u0000', '\u001f') - .or(inRange('\u007f', '\u009f')) - .withToString("CharMatcher.JAVA_ISO_CONTROL"); - - /** - * Determines whether a character is invisible; that is, if its Unicode category is any of - * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and - * PRIVATE_USE according to ICU4J. - */ - public static final CharMatcher INVISIBLE = new RangesMatcher("CharMatcher.INVISIBLE", ( - "\u0000\u007f\u00ad\u0600\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u206a\u3000\ud800\ufeff" - + "\ufff9\ufffa").toCharArray(), ( - "\u0020\u00a0\u00ad\u0604\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u206f\u3000\uf8ff\ufeff" - + "\ufff9\ufffb").toCharArray()); - - private static String showCharacter(char c) { - String hex = "0123456789ABCDEF"; - char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'}; - for (int i = 0; i < 4; i++) { - tmp[5 - i] = hex.charAt(c & 0xF); - c >>= 4; - } - return String.copyValueOf(tmp); - - } - - /** - * Determines whether a character is single-width (not double-width). When in doubt, this matcher - * errs on the side of returning {@code false} (that is, it tends to assume a character is - * double-width). - * - * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to - * date. - */ - public static final CharMatcher SINGLE_WIDTH = new RangesMatcher("CharMatcher.SINGLE_WIDTH", - "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(), - "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray()); - - /** Matches any character. */ - public static final CharMatcher ANY = - new FastMatcher("CharMatcher.ANY") { - @Override public boolean matches(char c) { - return true; - } - - @Override public int indexIn(CharSequence sequence) { - return (sequence.length() == 0) ? -1 : 0; - } - - @Override public int indexIn(CharSequence sequence, int start) { - int length = sequence.length(); - Preconditions.checkPositionIndex(start, length); - return (start == length) ? -1 : start; - } - - @Override public int lastIndexIn(CharSequence sequence) { - return sequence.length() - 1; - } - - @Override public boolean matchesAllOf(CharSequence sequence) { - checkNotNull(sequence); - return true; - } - - @Override public boolean matchesNoneOf(CharSequence sequence) { - return sequence.length() == 0; - } - - @Override public String removeFrom(CharSequence sequence) { - checkNotNull(sequence); - return ""; - } - - @Override public String replaceFrom(CharSequence sequence, char replacement) { - char[] array = new char[sequence.length()]; - Arrays.fill(array, replacement); - return new String(array); - } - - @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) { - StringBuilder retval = new StringBuilder(sequence.length() * replacement.length()); - for (int i = 0; i < sequence.length(); i++) { - retval.append(replacement); - } - return retval.toString(); - } - - @Override public String collapseFrom(CharSequence sequence, char replacement) { - return (sequence.length() == 0) ? "" : String.valueOf(replacement); - } - - @Override public String trimFrom(CharSequence sequence) { - checkNotNull(sequence); - return ""; - } - - @Override public int countIn(CharSequence sequence) { - return sequence.length(); - } - - @Override public CharMatcher and(CharMatcher other) { - return checkNotNull(other); - } - - @Override public CharMatcher or(CharMatcher other) { - checkNotNull(other); - return this; - } - - @Override public CharMatcher negate() { - return NONE; - } - }; - - /** Matches no characters. */ - public static final CharMatcher NONE = - new FastMatcher("CharMatcher.NONE") { - @Override public boolean matches(char c) { - return false; - } - - @Override public int indexIn(CharSequence sequence) { - checkNotNull(sequence); - return -1; - } - - @Override public int indexIn(CharSequence sequence, int start) { - int length = sequence.length(); - Preconditions.checkPositionIndex(start, length); - return -1; - } - - @Override public int lastIndexIn(CharSequence sequence) { - checkNotNull(sequence); - return -1; - } - - @Override public boolean matchesAllOf(CharSequence sequence) { - return sequence.length() == 0; - } - - @Override public boolean matchesNoneOf(CharSequence sequence) { - checkNotNull(sequence); - return true; - } - - @Override public String removeFrom(CharSequence sequence) { - return sequence.toString(); - } - - @Override public String replaceFrom(CharSequence sequence, char replacement) { - return sequence.toString(); - } - - @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) { - checkNotNull(replacement); - return sequence.toString(); - } - - @Override public String collapseFrom(CharSequence sequence, char replacement) { - return sequence.toString(); - } - - @Override public String trimFrom(CharSequence sequence) { - return sequence.toString(); - } - - @Override - public String trimLeadingFrom(CharSequence sequence) { - return sequence.toString(); - } - - @Override - public String trimTrailingFrom(CharSequence sequence) { - return sequence.toString(); - } - - @Override public int countIn(CharSequence sequence) { - checkNotNull(sequence); - return 0; - } - - @Override public CharMatcher and(CharMatcher other) { - checkNotNull(other); - return this; - } - - @Override public CharMatcher or(CharMatcher other) { - return checkNotNull(other); - } - - @Override public CharMatcher negate() { - return ANY; - } - }; - - // Static factories - - /** - * Returns a {@code char} matcher that matches only one specified character. - */ - public static CharMatcher is(final char match) { - String description = "CharMatcher.is('" + showCharacter(match) + "')"; - return new FastMatcher(description) { - @Override public boolean matches(char c) { - return c == match; - } - - @Override public String replaceFrom(CharSequence sequence, char replacement) { - return sequence.toString().replace(match, replacement); - } - - @Override public CharMatcher and(CharMatcher other) { - return other.matches(match) ? this : NONE; - } - - @Override public CharMatcher or(CharMatcher other) { - return other.matches(match) ? other : super.or(other); - } - - @Override public CharMatcher negate() { - return isNot(match); - } - }; - } - - /** - * Returns a {@code char} matcher that matches any character except the one specified. - * - * <p>To negate another {@code CharMatcher}, use {@link #negate()}. - */ - public static CharMatcher isNot(final char match) { - String description = "CharMatcher.isNot(" + Integer.toHexString(match) + ")"; - return new FastMatcher(description) { - @Override public boolean matches(char c) { - return c != match; - } - - @Override public CharMatcher and(CharMatcher other) { - return other.matches(match) ? super.and(other) : other; - } - - @Override public CharMatcher or(CharMatcher other) { - return other.matches(match) ? ANY : this; - } - - @Override public CharMatcher negate() { - return is(match); - } - }; - } - - /** - * Returns a {@code char} matcher that matches any character present in the given character - * sequence. - */ - public static CharMatcher anyOf(final CharSequence sequence) { - switch (sequence.length()) { - case 0: - return NONE; - case 1: - return is(sequence.charAt(0)); - case 2: - return isEither(sequence.charAt(0), sequence.charAt(1)); - default: - // continue below to handle the general case - } - // TODO(user): is it potentially worth just going ahead and building a precomputed matcher? - final char[] chars = sequence.toString().toCharArray(); - Arrays.sort(chars); - StringBuilder description = new StringBuilder("CharMatcher.anyOf(\""); - for (char c : chars) { - description.append(showCharacter(c)); - } - description.append("\")"); - return new CharMatcher(description.toString()) { - @Override public boolean matches(char c) { - return Arrays.binarySearch(chars, c) >= 0; - } - }; - } - - private static CharMatcher isEither( - final char match1, - final char match2) { - String description = "CharMatcher.anyOf(\"" + - showCharacter(match1) + showCharacter(match2) + "\")"; - return new FastMatcher(description) { - @Override public boolean matches(char c) { - return c == match1 || c == match2; - } - }; - } - - /** - * Returns a {@code char} matcher that matches any character not present in the given character - * sequence. - */ - public static CharMatcher noneOf(CharSequence sequence) { - return anyOf(sequence).negate(); - } - - /** - * Returns a {@code char} matcher that matches any character in a given range (both endpoints are - * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code - * CharMatcher.inRange('a', 'z')}. - * - * @throws IllegalArgumentException if {@code endInclusive < startInclusive} - */ - public static CharMatcher inRange(final char startInclusive, final char endInclusive) { - checkArgument(endInclusive >= startInclusive); - String description = "CharMatcher.inRange('" + - showCharacter(startInclusive) + "', '" + - showCharacter(endInclusive) + "')"; - return inRange(startInclusive, endInclusive, description); - } - - static CharMatcher inRange(final char startInclusive, final char endInclusive, - String description) { - return new FastMatcher(description) { - @Override public boolean matches(char c) { - return startInclusive <= c && c <= endInclusive; - } - }; - } - - /** - * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but - * which operates on primitive {@code char} instances instead. - */ - public static CharMatcher forPredicate(final Predicate<? super Character> predicate) { - checkNotNull(predicate); - if (predicate instanceof CharMatcher) { - return (CharMatcher) predicate; - } - String description = "CharMatcher.forPredicate(" + predicate + ")"; - return new CharMatcher(description) { - @Override public boolean matches(char c) { - return predicate.apply(c); - } - - @Override public boolean apply(Character character) { - return predicate.apply(checkNotNull(character)); - } - }; - } - - // State - final String description; - - // Constructors - - /** - * Sets the {@code toString()} from the given description. - */ - CharMatcher(String description) { - this.description = description; - } - - /** - * Constructor for use by subclasses. When subclassing, you may want to override - * {@code toString()} to provide a useful description. - */ - protected CharMatcher() { - description = super.toString(); - } - - // Abstract methods - - /** Determines a true or false value for the given character. */ - public abstract boolean matches(char c); - - // Non-static factories - - /** - * Returns a matcher that matches any character not matched by this matcher. - */ - public CharMatcher negate() { - return new NegatedMatcher(this); - } - - private static class NegatedMatcher extends CharMatcher { - final CharMatcher original; - - NegatedMatcher(String toString, CharMatcher original) { - super(toString); - this.original = original; - } - - NegatedMatcher(CharMatcher original) { - this(original + ".negate()", original); - } - - @Override public boolean matches(char c) { - return !original.matches(c); - } - - @Override public boolean matchesAllOf(CharSequence sequence) { - return original.matchesNoneOf(sequence); - } - - @Override public boolean matchesNoneOf(CharSequence sequence) { - return original.matchesAllOf(sequence); - } - - @Override public int countIn(CharSequence sequence) { - return sequence.length() - original.countIn(sequence); - } - - @Override public CharMatcher negate() { - return original; - } - - @Override - CharMatcher withToString(String description) { - return new NegatedMatcher(description, original); - } - } - - /** - * Returns a matcher that matches any character matched by both this matcher and {@code other}. - */ - public CharMatcher and(CharMatcher other) { - return new And(this, checkNotNull(other)); - } - - private static class And extends CharMatcher { - final CharMatcher first; - final CharMatcher second; - - And(CharMatcher a, CharMatcher b) { - this(a, b, "CharMatcher.and(" + a + ", " + b + ")"); - } - - And(CharMatcher a, CharMatcher b, String description) { - super(description); - first = checkNotNull(a); - second = checkNotNull(b); - } - - @Override - public boolean matches(char c) { - return first.matches(c) && second.matches(c); - } - - @Override - CharMatcher withToString(String description) { - return new And(first, second, description); - } - } - - /** - * Returns a matcher that matches any character matched by either this matcher or {@code other}. - */ - public CharMatcher or(CharMatcher other) { - return new Or(this, checkNotNull(other)); - } - - private static class Or extends CharMatcher { - final CharMatcher first; - final CharMatcher second; - - Or(CharMatcher a, CharMatcher b, String description) { - super(description); - first = checkNotNull(a); - second = checkNotNull(b); - } - - Or(CharMatcher a, CharMatcher b) { - this(a, b, "CharMatcher.or(" + a + ", " + b + ")"); - } - - @Override - public boolean matches(char c) { - return first.matches(c) || second.matches(c); - } - - @Override - CharMatcher withToString(String description) { - return new Or(first, second, description); - } - } - - /** - * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to - * query than the original; your mileage may vary. Precomputation takes time and is likely to be - * worthwhile only if the precomputed matcher is queried many thousands of times. - * - * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a - * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a - * worthwhile tradeoff in a browser. - */ - public CharMatcher precomputed() { - return Platform.precomputeCharMatcher(this); - } - - /** - * Subclasses should provide a new CharMatcher with the same characteristics as {@code this}, - * but with their {@code toString} method overridden with the new description. - * - * <p>This is unsupported by default. - */ - CharMatcher withToString(String description) { - throw new UnsupportedOperationException(); - } - - private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1; - - /** - * A matcher for which precomputation will not yield any significant benefit. - */ - abstract static class FastMatcher extends CharMatcher { - FastMatcher() { - super(); - } - - FastMatcher(String description) { - super(description); - } - - @Override - public final CharMatcher precomputed() { - return this; - } - - @Override - public CharMatcher negate() { - return new NegatedFastMatcher(this); - } - } - - static final class NegatedFastMatcher extends NegatedMatcher { - NegatedFastMatcher(CharMatcher original) { - super(original); - } - - NegatedFastMatcher(String toString, CharMatcher original) { - super(toString, original); - } - - @Override - public final CharMatcher precomputed() { - return this; - } - - @Override - CharMatcher withToString(String description) { - return new NegatedFastMatcher(description, original); - } - } - - private static boolean isSmall(int totalCharacters, int tableLength) { - return totalCharacters <= SmallCharMatcher.MAX_SIZE - && tableLength > (totalCharacters * Character.SIZE); - } - - // Text processing routines - - /** - * Returns {@code true} if a character sequence contains at least one matching character. - * Equivalent to {@code !matchesNoneOf(sequence)}. - * - * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each - * character, until this returns {@code true} or the end is reached. - * - * @param sequence the character sequence to examine, possibly empty - * @return {@code true} if this matcher matches at least one character in the sequence - * @since 8.0 - */ - public boolean matchesAnyOf(CharSequence sequence) { - return !matchesNoneOf(sequence); - } - - /** - * Returns {@code true} if a character sequence contains only matching characters. - * - * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each - * character, until this returns {@code false} or the end is reached. - * - * @param sequence the character sequence to examine, possibly empty - * @return {@code true} if this matcher matches every character in the sequence, including when - * the sequence is empty - */ - public boolean matchesAllOf(CharSequence sequence) { - for (int i = sequence.length() - 1; i >= 0; i--) { - if (!matches(sequence.charAt(i))) { - return false; - } - } - return true; - } - - /** - * Returns {@code true} if a character sequence contains no matching characters. Equivalent to - * {@code !matchesAnyOf(sequence)}. - * - * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each - * character, until this returns {@code false} or the end is reached. - * - * @param sequence the character sequence to examine, possibly empty - * @return {@code true} if this matcher matches every character in the sequence, including when - * the sequence is empty - */ - public boolean matchesNoneOf(CharSequence sequence) { - return indexIn(sequence) == -1; - } - - /** - * Returns the index of the first matching character in a character sequence, or {@code -1} if no - * matching character is present. - * - * <p>The default implementation iterates over the sequence in forward order calling {@link - * #matches} for each character. - * - * @param sequence the character sequence to examine from the beginning - * @return an index, or {@code -1} if no character matches - */ - public int indexIn(CharSequence sequence) { - int length = sequence.length(); - for (int i = 0; i < length; i++) { - if (matches(sequence.charAt(i))) { - return i; - } - } - return -1; - } - - /** - * Returns the index of the first matching character in a character sequence, starting from a - * given position, or {@code -1} if no character matches after that position. - * - * <p>The default implementation iterates over the sequence in forward order, beginning at {@code - * start}, calling {@link #matches} for each character. - * - * @param sequence the character sequence to examine - * @param start the first index to examine; must be nonnegative and no greater than {@code - * sequence.length()} - * @return the index of the first matching character, guaranteed to be no less than {@code start}, - * or {@code -1} if no character matches - * @throws IndexOutOfBoundsException if start is negative or greater than {@code - * sequence.length()} - */ - public int indexIn(CharSequence sequence, int start) { - int length = sequence.length(); - Preconditions.checkPositionIndex(start, length); - for (int i = start; i < length; i++) { - if (matches(sequence.charAt(i))) { - return i; - } - } - return -1; - } - - /** - * Returns the index of the last matching character in a character sequence, or {@code -1} if no - * matching character is present. - * - * <p>The default implementation iterates over the sequence in reverse order calling {@link - * #matches} for each character. - * - * @param sequence the character sequence to examine from the end - * @return an index, or {@code -1} if no character matches - */ - public int lastIndexIn(CharSequence sequence) { - for (int i = sequence.length() - 1; i >= 0; i--) { - if (matches(sequence.charAt(i))) { - return i; - } - } - return -1; - } - - /** - * Returns the number of matching characters found in a character sequence. - */ - public int countIn(CharSequence sequence) { - int count = 0; - for (int i = 0; i < sequence.length(); i++) { - if (matches(sequence.charAt(i))) { - count++; - } - } - return count; - } - - /** - * Returns a string containing all non-matching characters of a character sequence, in order. For - * example: <pre> {@code - * - * CharMatcher.is('a').removeFrom("bazaar")}</pre> - * - * ... returns {@code "bzr"}. - */ - @CheckReturnValue - public String removeFrom(CharSequence sequence) { - String string = sequence.toString(); - int pos = indexIn(string); - if (pos == -1) { - return string; - } - - char[] chars = string.toCharArray(); - int spread = 1; - - // This unusual loop comes from extensive benchmarking - OUT: while (true) { - pos++; - while (true) { - if (pos == chars.length) { - break OUT; - } - if (matches(chars[pos])) { - break; - } - chars[pos - spread] = chars[pos]; - pos++; - } - spread++; - } - return new String(chars, 0, pos - spread); - } - - /** - * Returns a string containing all matching characters of a character sequence, in order. For - * example: <pre> {@code - * - * CharMatcher.is('a').retainFrom("bazaar")}</pre> - * - * ... returns {@code "aaa"}. - */ - @CheckReturnValue - public String retainFrom(CharSequence sequence) { - return negate().removeFrom(sequence); - } - - /** - * Returns a string copy of the input character sequence, with each character that matches this - * matcher replaced by a given replacement character. For example: <pre> {@code - * - * CharMatcher.is('a').replaceFrom("radar", 'o')}</pre> - * - * ... returns {@code "rodor"}. - * - * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching - * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each - * character. - * - * @param sequence the character sequence to replace matching characters in - * @param replacement the character to append to the result string in place of each matching - * character in {@code sequence} - * @return the new string - */ - @CheckReturnValue - public String replaceFrom(CharSequence sequence, char replacement) { - String string = sequence.toString(); - int pos = indexIn(string); - if (pos == -1) { - return string; - } - char[] chars = string.toCharArray(); - chars[pos] = replacement; - for (int i = pos + 1; i < chars.length; i++) { - if (matches(chars[i])) { - chars[i] = replacement; - } - } - return new String(chars); - } - - /** - * Returns a string copy of the input character sequence, with each character that matches this - * matcher replaced by a given replacement sequence. For example: <pre> {@code - * - * CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre> - * - * ... returns {@code "yoohoo"}. - * - * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better - * off calling {@link #replaceFrom(CharSequence, char)} directly. - * - * @param sequence the character sequence to replace matching characters in - * @param replacement the characters to append to the result string in place of each matching - * character in {@code sequence} - * @return the new string - */ - @CheckReturnValue - public String replaceFrom(CharSequence sequence, CharSequence replacement) { - int replacementLen = replacement.length(); - if (replacementLen == 0) { - return removeFrom(sequence); - } - if (replacementLen == 1) { - return replaceFrom(sequence, replacement.charAt(0)); - } - - String string = sequence.toString(); - int pos = indexIn(string); - if (pos == -1) { - return string; - } - - int len = string.length(); - StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); - - int oldpos = 0; - do { - buf.append(string, oldpos, pos); - buf.append(replacement); - oldpos = pos + 1; - pos = indexIn(string, oldpos); - } while (pos != -1); - - buf.append(string, oldpos, len); - return buf.toString(); - } - - /** - * Returns a substring of the input character sequence that omits all characters this matcher - * matches from the beginning and from the end of the string. For example: <pre> {@code - * - * CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre> - * - * ... returns {@code "cat"}. - * - * <p>Note that: <pre> {@code - * - * CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre> - * - * ... is equivalent to {@link String#trim()}. - */ - @CheckReturnValue - public String trimFrom(CharSequence sequence) { - int len = sequence.length(); - int first; - int last; - - for (first = 0; first < len; first++) { - if (!matches(sequence.charAt(first))) { - break; - } - } - for (last = len - 1; last > first; last--) { - if (!matches(sequence.charAt(last))) { - break; - } - } - - return sequence.subSequence(first, last + 1).toString(); - } - - /** - * Returns a substring of the input character sequence that omits all characters this matcher - * matches from the beginning of the string. For example: <pre> {@code - * - * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre> - * - * ... returns {@code "catbab"}. - */ - @CheckReturnValue - public String trimLeadingFrom(CharSequence sequence) { - int len = sequence.length(); - for (int first = 0; first < len; first++) { - if (!matches(sequence.charAt(first))) { - return sequence.subSequence(first, len).toString(); - } - } - return ""; - } - - /** - * Returns a substring of the input character sequence that omits all characters this matcher - * matches from the end of the string. For example: <pre> {@code - * - * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre> - * - * ... returns {@code "abacat"}. - */ - @CheckReturnValue - public String trimTrailingFrom(CharSequence sequence) { - int len = sequence.length(); - for (int last = len - 1; last >= 0; last--) { - if (!matches(sequence.charAt(last))) { - return sequence.subSequence(0, last + 1).toString(); - } - } - return ""; - } - - /** - * Returns a string copy of the input character sequence, with each group of consecutive - * characters that match this matcher replaced by a single replacement character. For example: - * <pre> {@code - * - * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre> - * - * ... returns {@code "b-p-r"}. - * - * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching - * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each - * character. - * - * @param sequence the character sequence to replace matching groups of characters in - * @param replacement the character to append to the result string in place of each group of - * matching characters in {@code sequence} - * @return the new string - */ - @CheckReturnValue - public String collapseFrom(CharSequence sequence, char replacement) { - // This implementation avoids unnecessary allocation. - int len = sequence.length(); - for (int i = 0; i < len; i++) { - char c = sequence.charAt(i); - if (matches(c)) { - if (c == replacement - && (i == len - 1 || !matches(sequence.charAt(i + 1)))) { - // a no-op replacement - i++; - } else { - StringBuilder builder = new StringBuilder(len) - .append(sequence.subSequence(0, i)) - .append(replacement); - return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true); - } - } - } - // no replacement needed - return sequence.toString(); - } - - /** - * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that - * groups of matching characters at the start or end of the sequence are removed without - * replacement. - */ - @CheckReturnValue - public String trimAndCollapseFrom(CharSequence sequence, char replacement) { - // This implementation avoids unnecessary allocation. - int len = sequence.length(); - int first; - int last; - - for (first = 0; first < len && matches(sequence.charAt(first)); first++) {} - for (last = len - 1; last > first && matches(sequence.charAt(last)); last--) {} - - return (first == 0 && last == len - 1) - ? collapseFrom(sequence, replacement) - : finishCollapseFrom( - sequence, first, last + 1, replacement, - new StringBuilder(last + 1 - first), - false); - } - - private String finishCollapseFrom( - CharSequence sequence, int start, int end, char replacement, - StringBuilder builder, boolean inMatchingGroup) { - for (int i = start; i < end; i++) { - char c = sequence.charAt(i); - if (matches(c)) { - if (!inMatchingGroup) { - builder.append(replacement); - inMatchingGroup = true; - } - } else { - builder.append(c); - inMatchingGroup = false; - } - } - return builder.toString(); - } - - // Predicate interface - - /** - * Equivalent to {@link #matches}; provided only to satisfy the {@link Predicate} interface. When - * using a reference of type {@code CharMatcher}, invoke {@link #matches} directly instead. - */ - @Override public boolean apply(Character character) { - return matches(character); - } - - /** - * Returns a string representation of this {@code CharMatcher}, such as - * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}. - */ - @Override - public String toString() { - return description; - } - - /** - * A special-case CharMatcher for Unicode whitespace characters that is extremely - * efficient both in space required and in time to check for matches. - * - * Implementation details. - * It turns out that all current (early 2012) Unicode characters are unique modulo 79: - * so we can construct a lookup table of exactly 79 entries, and just check the character code - * mod 79, and see if that character is in the table. - * - * There is a 1 at the beginning of the table so that the null character is not listed - * as whitespace. - * - * Other things we tried that did not prove to be beneficial, mostly due to speed concerns: - * - * * Binary search into the sorted list of characters, i.e., what - * CharMatcher.anyOf() does</li> - * * Perfect hash function into a table of size 26 (using an offset table and a special - * Jenkins hash function)</li> - * * Perfect-ish hash function that required two lookups into a single table of size 26.</li> - * * Using a power-of-2 sized hash table (size 64) with linear probing.</li> - * - * --Christopher Swenson, February 2012. - */ - private static final String WHITESPACE_TABLE = "\u0001\u0000\u00a0\u0000\u0000\u0000\u0000\u0000" - + "\u0000\u0009\n\u000b\u000c\r\u0000\u0000\u2028\u2029\u0000\u0000\u0000\u0000\u0000\u202f" - + "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0020\u0000\u0000\u0000\u0000\u0000" - + "\u0000\u0000\u0000\u0000\u0000\u3000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000" - + "\u0000\u0000\u0085\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a" - + "\u0000\u0000\u0000\u0000\u0000\u205f\u1680\u0000\u0000\u180e\u0000\u0000\u0000"; - - /** - * Determines whether a character is whitespace according to the latest Unicode standard, as - * illustrated - * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. - * This is not the same definition used by other Java APIs. (See a - * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several - * definitions of "whitespace"</a>.) - * - * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up - * to date. - */ - public static final CharMatcher WHITESPACE = new FastMatcher("CharMatcher.WHITESPACE") { - - @Override public boolean matches(char c) { - return WHITESPACE_TABLE.charAt(c % 79) == c; - } - }; -} |