/** * Copyright (C) 2009-2013 FoundationDB, LLC * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.foundationdb.server.types.texpressions; import com.foundationdb.server.error.InvalidParameterValueException; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; public final class Matchers { public static Matcher getMatcher(String pattern, char escape, boolean ignoreCase) { if(pattern.isEmpty()) { return new EmptyMatcher(); } TokenSet ts = buildTokenSet(pattern, escape, ignoreCase); if((ts.startsWith != null) && (ts.startsWith == ts.endsWith)) { assert ts.contains.length == 0; return new EqualsMatcher(pattern, escape, ignoreCase, ts.startsWith); } return new GenericMatcher(pattern, escape, ignoreCase, ts); } private static final class Token { final char[] chunk; final Map<Character, Integer> rightIndex; final BitSet wildIndex; private Token(char[] chunk, Map<Character, Integer> rightIndex, BitSet wildIndex) { this.rightIndex = rightIndex; this.chunk = chunk; this.wildIndex = wildIndex; } /** Get right most index that would match {@code c}. */ public Integer getRightIndex(char c) { Integer r = rightIndex.get(c); // If no match, find the right-most wildcard as it matches anything. if(r == null) { r = wildIndex.length() - 1; if(r == -1) { r = null; } } return r; } } private static final class TokenSet { private final Token startsWith; private final Token[] contains; private final Token endsWith; public TokenSet(Token startsWith, Token[] contains, Token endsWith) { this.startsWith = startsWith; this.contains = contains; this.endsWith = endsWith; } } private static void checkEndEscape(String pattern, char escape) { int len = pattern.length(); if(pattern.charAt(len - 1) == escape) { if(((len - 2) < 0) || (pattern.charAt(len - 2) != escape)) { throw new InvalidParameterValueException("Illegal escape sequence"); } } } private static boolean isExactStart(String pattern, char escape) { return (pattern.charAt(0) != '%') && (escape != '%'); } private static boolean isExactEnd(String pattern, char escape) { int len = pattern.length(); if(pattern.charAt(len - 1) != '%') { return true; } // Still exact if percent is escaped but not if the escape is escaped boolean hasN2 = (len - 2) >= 0; boolean hasN3 = (len - 3) >= 0; return hasN2 && (pattern.charAt(len - 2) == escape) && (hasN3 && pattern.charAt(len - 3) != escape); } /** Split the pattern into (unescaped) % delimited Tokens. */ private static TokenSet buildTokenSet(String pattern, char escape, boolean doLowerCase) { assert !pattern.isEmpty(); checkEndEscape(pattern, escape); final int patLength = pattern.length(); List<Token> tokens = new LinkedList<>(); for(int n = 0; n < patLength; /*none*/) { char[] chunk = new char[patLength - n]; int chunkLen = 0; Map<Character, Integer> rightIndex = new HashMap<>(); BitSet wildIndex = new BitSet(); for(; n < patLength; ++n) { char ch = pattern.charAt(n); if(ch == escape) { assert (n + 1) < patLength : pattern; ch = pattern.charAt(++n); } else if(ch == '%') { ++n; // Split break; } else if(ch == '_') { wildIndex.set(chunkLen); } if(doLowerCase) { ch = Character.toLowerCase(ch); } chunk[chunkLen] = ch; rightIndex.put(ch, chunkLen++); } if(chunkLen > 0) { if(chunk.length != chunkLen) { chunk = Arrays.copyOf(chunk, chunkLen); } if(wildIndex.size() < chunkLen) { wildIndex.set(chunkLen, false); } tokens.add(new Token(chunk, rightIndex, wildIndex)); } } Token startsWith = null; if(!tokens.isEmpty() && isExactStart(pattern, escape)) { startsWith = tokens.remove(0); } Token endsWith = null; if(isExactEnd(pattern, escape)) { if(tokens.isEmpty()) { endsWith = startsWith; } else { endsWith = tokens.remove(tokens.size() - 1); } } return new TokenSet(startsWith, tokens.toArray(new Token[tokens.size()]), endsWith); } /** Find the first location of the token and return the following index, -1 if not found. */ private static int findToken(Token token, String str, int startIndex, boolean doLowerCase) { final int tokMax = token.chunk.length - 1; final int strLength = str.length(); int left = startIndex; outer: while(left < strLength) { int tail = left + tokMax; if(tail >= strLength) { // Text is shorter than pattern return -1; } // If mismatch does NOT occur at the end then keep moving leftward from the tail char ch = str.charAt(tail); if(doLowerCase) { ch = Character.toLowerCase(ch); } int right = tokMax; if((ch == token.chunk[right]) || token.wildIndex.get(right)) { int nextStart = tail + 1; while((--tail >= left) && (--right >= 0)) { ch = str.charAt(tail); if(doLowerCase) { ch = Character.toLowerCase(ch); } if((ch != token.chunk[right]) && !token.wildIndex.get(right)) { Integer d = token.getRightIndex(ch); if((d != null) && (d < right)) { // Mismatch is in pattern and rightmost is within how much of pattern has been used left += right - d; } else { // Shift pattern right by 1 iff it has no such char left += 1; } continue outer; } } // Would have skipped a mismatch return nextStart; } else { // Mismatch occurs at the end; Integer d = token.getRightIndex(ch); left += (d == null) ? token.chunk.length : right - d; } } // Would have already returned true if there was a match return -1; } /** Check if {@code tokens} matches {@code str} at {@code startIndex}. */ private static boolean tokensMatch(Token[] tokens, String str, int startIndex, boolean doLowerCase) { int nextStart = startIndex; boolean matched = true; for(int i = 0; matched && (i < tokens.length); ++i) { nextStart = findToken(tokens[i], str, nextStart, doLowerCase); matched = (nextStart >= 0); } return matched; } /** Check if a string ends with a given pattern */ private static boolean isExactMatch(Token token, String str, int startIndex, boolean doLowerCase) { if((startIndex < 0) || (startIndex + token.chunk.length) > str.length()) { return false; } for(int ti = 0, si = startIndex; ti < token.chunk.length; ++ti, ++si) { char tch = token.chunk[ti]; char sch = str.charAt(si); if(doLowerCase) { sch = Character.toLowerCase(sch); } if((tch != sch) && !token.wildIndex.get(ti)) { return false; } } return true; } private static class EmptyMatcher implements Matcher { @Override public boolean matches(String str) { return str.isEmpty(); } @Override public boolean sameState(String pattern, char escape) { return pattern.isEmpty(); } } private static abstract class AbstractMatcher implements Matcher { protected final String pattern; protected final char escape; protected final boolean ignoreCase; private AbstractMatcher(String pattern, char escape, boolean ignoreCase) { this.pattern = pattern; this.escape = escape; this.ignoreCase = ignoreCase; } @Override public boolean sameState(String pattern, char escape) { return this.pattern.equals(pattern) && (this.escape == escape); } } private static class EqualsMatcher extends AbstractMatcher { private final Token token; private EqualsMatcher(String pattern, char escape, boolean ignoreCase, Token token) { super(pattern, escape, ignoreCase); this.token = token; } @Override public boolean matches(String str) { return (str.length() == token.chunk.length) && isExactMatch(token, str, 0, ignoreCase); } } private static class GenericMatcher extends AbstractMatcher { private final TokenSet tokenSet; public GenericMatcher(String pattern, char escape, boolean ignoreCase, TokenSet tokenSet) { super(pattern, escape, ignoreCase); this.tokenSet = tokenSet; } @Override public boolean matches(String str) { int startIndex = (tokenSet.startsWith != null) ? tokenSet.startsWith.chunk.length : 0; return matchesStartsWith(str) && tokensMatch(tokenSet.contains, str, startIndex, ignoreCase) && matchesEndsWith(str); } private boolean matchesStartsWith(String str) { return (tokenSet.startsWith == null) || isExactMatch(tokenSet.startsWith, str, 0, ignoreCase); } private boolean matchesEndsWith(String str) { return (tokenSet.endsWith == null) || isExactMatch(tokenSet.endsWith, str, str.length() - tokenSet.endsWith.chunk.length, ignoreCase); } } /** Matches against an pattern a number of times. Pattern does not use escape, _ or %. */ public static class IndexMatcher implements Matcher { private final String pattern; private final Token token; public IndexMatcher(String pattern) { this.pattern = pattern; Map<Character, Integer> rightPos = new HashMap<>(); for(int i = 0; i < pattern.length(); ++i) { rightPos.put(pattern.charAt(i), i); } this.token = new Token(pattern.toCharArray(), rightPos, new BitSet(pattern.length())); } @Override public boolean matches(String str) { throw new UnsupportedOperationException(); } @Override public boolean sameState(String pattern, char escape) { return this.pattern.equals(pattern); } /** Returns the index at which {@code str} matched {@code count} times. */ public int matchesAt(String str, int count) { int nextStart = 0; for(int i = 0; i < count; ++i) { nextStart = findToken(token, str, nextStart, false); if(nextStart < 0) { return -1; } } // Index where count pattern starts return nextStart - token.chunk.length; } } }