package edu.stanford.nlp.util; import java.io.Serializable; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.function.Predicate; /** * Filters Strings based on whether they exactly match any string in * the array it is initially constructed with. Saves some time over * using regexes if the array of strings is small enough. No specific * experiments exist for how long the array can be before performance * is worse than a regex, but the English dependencies code was helped * by replacing disjunction regexes of 6 words or fewer with this. * * @author John Bauer */ public class ArrayStringFilter implements Predicate<String>, Serializable { private final String[] words; private final int length; private final Mode mode; public enum Mode { EXACT, PREFIX, CASE_INSENSITIVE } public ArrayStringFilter(Mode mode, String ... words) { if (mode == null) { throw new NullPointerException("Cannot handle null mode"); } this.mode = mode; this.words = new String[words.length]; System.arraycopy(words, 0, this.words, 0, words.length); this.length = words.length; } @Override public boolean test(String input) { switch (mode) { case EXACT: for (int i = 0; i < length; ++i) { if (words[i].equals(input)) { return true; } } return false; case PREFIX: if (input == null) { return false; } for (int i = 0; i < length; ++i) { if (input.startsWith(words[i])) { return true; } } return false; case CASE_INSENSITIVE: for (int i = 0; i < length; ++i) { if (words[i].equalsIgnoreCase(input)) { return true; } } return false; default: throw new IllegalArgumentException("Unknown mode " + mode); } } @Override public String toString() { return mode.toString() + ':' + StringUtils.join(words, ","); } @Override public int hashCode() { int result = 1; for (String word : words) { result += word.hashCode(); } return result; } @Override public boolean equals(Object other) { if (other == this) { return true; } if (!(other instanceof ArrayStringFilter)) { return false; } ArrayStringFilter filter = (ArrayStringFilter) other; if (filter.mode != this.mode || filter.length != this.length) { return false; } Set<String> myWords = new HashSet<>(Arrays.asList(this.words)); Set<String> otherWords = new HashSet<>(Arrays.asList(filter.words)); return myWords.equals(otherWords); } private static final long serialVersionUID = 1; }