/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package android.os; import android.util.Log; import java.util.Arrays; /** * A simple pattern matcher, which is safe to use on untrusted data: it does * not provide full reg-exp support, only simple globbing that can not be * used maliciously. */ public class PatternMatcher implements Parcelable { /** * Pattern type: the given pattern must exactly match the string it is * tested against. */ public static final int PATTERN_LITERAL = 0; /** * Pattern type: the given pattern must match the * beginning of the string it is tested against. */ public static final int PATTERN_PREFIX = 1; /** * Pattern type: the given pattern is interpreted with a * simple glob syntax for matching against the string it is tested against. * In this syntax, you can use the '*' character to match against zero or * more occurrences of the character immediately before. If the * character before it is '.' it will match any character. The character * '\' can be used as an escape. This essentially provides only the '*' * wildcard part of a normal regexp. */ public static final int PATTERN_SIMPLE_GLOB = 2; /** * Pattern type: the given pattern is interpreted with a regular * expression-like syntax for matching against the string it is tested * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) * with full support for character ranges and the not ({@code ^}) modifier. * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) * for one-or-more and full range ({@code {...}}) support. This is a simple * evaulation implementation in which matching is done against the pattern in * realtime with no backtracking support. * * {@hide} Pending approval for public API */ public static final int PATTERN_ADVANCED_GLOB = 3; // token types for advanced matching private static final int TOKEN_TYPE_LITERAL = 0; private static final int TOKEN_TYPE_ANY = 1; private static final int TOKEN_TYPE_SET = 2; private static final int TOKEN_TYPE_INVERSE_SET = 3; // Return for no match private static final int NO_MATCH = -1; private static final String TAG = "PatternMatcher"; // Parsed placeholders for advanced patterns private static final int PARSED_TOKEN_CHAR_SET_START = -1; private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; private static final int PARSED_TOKEN_CHAR_ANY = -4; private static final int PARSED_MODIFIER_RANGE_START = -5; private static final int PARSED_MODIFIER_RANGE_STOP = -6; private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; private final String mPattern; private final int mType; private final int[] mParsedPattern; private static final int MAX_PATTERN_STORAGE = 2048; // workspace to use for building a parsed advanced pattern; private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; public PatternMatcher(String pattern, int type) { mPattern = pattern; mType = type; if (mType == PATTERN_ADVANCED_GLOB) { mParsedPattern = parseAndVerifyAdvancedPattern(pattern); } else { mParsedPattern = null; } } public final String getPath() { return mPattern; } public final int getType() { return mType; } public boolean match(String str) { return matchPattern(str, mPattern, mParsedPattern, mType); } public String toString() { String type = "? "; switch (mType) { case PATTERN_LITERAL: type = "LITERAL: "; break; case PATTERN_PREFIX: type = "PREFIX: "; break; case PATTERN_SIMPLE_GLOB: type = "GLOB: "; break; case PATTERN_ADVANCED_GLOB: type = "ADVANCED: "; break; } return "PatternMatcher{" + type + mPattern + "}"; } public int describeContents() { return 0; } public void writeToParcel(Parcel dest, int flags) { dest.writeString(mPattern); dest.writeInt(mType); dest.writeIntArray(mParsedPattern); } public PatternMatcher(Parcel src) { mPattern = src.readString(); mType = src.readInt(); mParsedPattern = src.createIntArray(); } public static final Parcelable.Creator<PatternMatcher> CREATOR = new Parcelable.Creator<PatternMatcher>() { public PatternMatcher createFromParcel(Parcel source) { return new PatternMatcher(source); } public PatternMatcher[] newArray(int size) { return new PatternMatcher[size]; } }; static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { if (match == null) return false; if (type == PATTERN_LITERAL) { return pattern.equals(match); } if (type == PATTERN_PREFIX) { return match.startsWith(pattern); } else if (type == PATTERN_SIMPLE_GLOB) { return matchGlobPattern(pattern, match); } else if (type == PATTERN_ADVANCED_GLOB) { return matchAdvancedPattern(parsedPattern, match); } return false; } static boolean matchGlobPattern(String pattern, String match) { final int NP = pattern.length(); if (NP <= 0) { return match.length() <= 0; } final int NM = match.length(); int ip = 0, im = 0; char nextChar = pattern.charAt(0); while ((ip<NP) && (im<NM)) { char c = nextChar; ip++; nextChar = ip < NP ? pattern.charAt(ip) : 0; final boolean escaped = (c == '\\'); if (escaped) { c = nextChar; ip++; nextChar = ip < NP ? pattern.charAt(ip) : 0; } if (nextChar == '*') { if (!escaped && c == '.') { if (ip >= (NP-1)) { // at the end with a pattern match, so // all is good without checking! return true; } ip++; nextChar = pattern.charAt(ip); // Consume everything until the next character in the // pattern is found. if (nextChar == '\\') { ip++; nextChar = ip < NP ? pattern.charAt(ip) : 0; } do { if (match.charAt(im) == nextChar) { break; } im++; } while (im < NM); if (im == NM) { // Whoops, the next character in the pattern didn't // exist in the match. return false; } ip++; nextChar = ip < NP ? pattern.charAt(ip) : 0; im++; } else { // Consume only characters matching the one before '*'. do { if (match.charAt(im) != c) { break; } im++; } while (im < NM); ip++; nextChar = ip < NP ? pattern.charAt(ip) : 0; } } else { if (c != '.' && match.charAt(im) != c) return false; im++; } } if (ip >= NP && im >= NM) { // Reached the end of both strings, all is good! return true; } // One last check: we may have finished the match string, but still // have a '.*' at the end of the pattern, which should still count // as a match. if (ip == NP-2 && pattern.charAt(ip) == '.' && pattern.charAt(ip+1) == '*') { return true; } return false; } /** * Parses the advanced pattern and returns an integer array representation of it. The integer * array treats each field as a character if positive and a unique token placeholder if * negative. This method will throw on any pattern structure violations. */ synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { int ip = 0; final int LP = pattern.length(); int it = 0; boolean inSet = false; boolean inRange = false; boolean inCharClass = false; boolean addToParsedPattern; while (ip < LP) { if (it > MAX_PATTERN_STORAGE - 3) { throw new IllegalArgumentException("Pattern is too large!"); } char c = pattern.charAt(ip); addToParsedPattern = false; switch (c) { case '[': if (inSet) { addToParsedPattern = true; // treat as literal or char class in set } else { if (pattern.charAt(ip + 1) == '^') { sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; ip++; // skip over the '^' } else { sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; } ip++; // move to the next pattern char inSet = true; continue; } break; case ']': if (!inSet) { addToParsedPattern = true; // treat as literal outside of set } else { int parsedToken = sParsedPatternScratch[it - 1]; if (parsedToken == PARSED_TOKEN_CHAR_SET_START || parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { throw new IllegalArgumentException( "You must define characters in a set."); } sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; inSet = false; inCharClass = false; } break; case '{': if (!inSet) { if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { throw new IllegalArgumentException("Modifier must follow a token."); } sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; ip++; inRange = true; } break; case '}': if (inRange) { // only terminate the range if we're currently in one sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; inRange = false; } break; case '*': if (!inSet) { if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { throw new IllegalArgumentException("Modifier must follow a token."); } sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; } break; case '+': if (!inSet) { if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { throw new IllegalArgumentException("Modifier must follow a token."); } sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; } break; case '.': if (!inSet) { sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; } break; case '\\': // escape if (ip + 1 >= LP) { throw new IllegalArgumentException("Escape found at end of pattern!"); } c = pattern.charAt(++ip); addToParsedPattern = true; break; default: addToParsedPattern = true; break; } if (inSet) { if (inCharClass) { sParsedPatternScratch[it++] = c; inCharClass = false; } else { // look forward for character class if (ip + 2 < LP && pattern.charAt(ip + 1) == '-' && pattern.charAt(ip + 2) != ']') { inCharClass = true; sParsedPatternScratch[it++] = c; // set first token as lower end of range ip++; // advance past dash } else { // literal sParsedPatternScratch[it++] = c; // set first token as literal sParsedPatternScratch[it++] = c; // set second set as literal } } } else if (inRange) { int endOfSet = pattern.indexOf('}', ip); if (endOfSet < 0) { throw new IllegalArgumentException("Range not ended with '}'"); } String rangeString = pattern.substring(ip, endOfSet); int commaIndex = rangeString.indexOf(','); try { final int rangeMin; final int rangeMax; if (commaIndex < 0) { int parsedRange = Integer.parseInt(rangeString); rangeMin = rangeMax = parsedRange; } else { rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) rangeMax = Integer.MAX_VALUE; } else { rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); } } if (rangeMin > rangeMax) { throw new IllegalArgumentException( "Range quantifier minimum is greater than maximum"); } sParsedPatternScratch[it++] = rangeMin; sParsedPatternScratch[it++] = rangeMax; } catch (NumberFormatException e) { throw new IllegalArgumentException("Range number format incorrect", e); } ip = endOfSet; continue; // don't increment ip } else if (addToParsedPattern) { sParsedPatternScratch[it++] = c; } ip++; } if (inSet) { throw new IllegalArgumentException("Set was not terminated!"); } return Arrays.copyOf(sParsedPatternScratch, it); } private static boolean isParsedModifier(int parsedChar) { return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || parsedChar == PARSED_MODIFIER_RANGE_STOP || parsedChar == PARSED_MODIFIER_RANGE_START; } static boolean matchAdvancedPattern(int[] parsedPattern, String match) { // create indexes int ip = 0, im = 0; // one-time length check final int LP = parsedPattern.length, LM = match.length(); // The current character being analyzed in the pattern int patternChar; int tokenType; int charSetStart = 0, charSetEnd = 0; while (ip < LP) { // we still have content in the pattern patternChar = parsedPattern[ip]; // get the match type of the next verb switch (patternChar) { case PARSED_TOKEN_CHAR_ANY: tokenType = TOKEN_TYPE_ANY; ip++; break; case PARSED_TOKEN_CHAR_SET_START: case PARSED_TOKEN_CHAR_SET_INVERSE_START: tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START ? TOKEN_TYPE_SET : TOKEN_TYPE_INVERSE_SET; charSetStart = ip + 1; // start from the char after the set start while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); charSetEnd = ip - 1; // we're on the set stop, end is the previous ip++; // move the pointer to the next pattern entry break; default: charSetStart = ip; tokenType = TOKEN_TYPE_LITERAL; ip++; break; } final int minRepetition; final int maxRepetition; // look for a match length modifier if (ip >= LP) { minRepetition = maxRepetition = 1; } else { patternChar = parsedPattern[ip]; switch (patternChar) { case PARSED_MODIFIER_ZERO_OR_MORE: minRepetition = 0; maxRepetition = Integer.MAX_VALUE; ip++; break; case PARSED_MODIFIER_ONE_OR_MORE: minRepetition = 1; maxRepetition = Integer.MAX_VALUE; ip++; break; case PARSED_MODIFIER_RANGE_START: minRepetition = parsedPattern[++ip]; maxRepetition = parsedPattern[++ip]; ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token break; default: minRepetition = maxRepetition = 1; // implied literal break; } } if (minRepetition > maxRepetition) { return false; } // attempt to match as many characters as possible int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, parsedPattern, charSetStart, charSetEnd); // if we found a conflict, return false immediately if (matched == NO_MATCH) { return false; } // move the match pointer the number of characters matched im += matched; } return ip >= LP && im >= LM; // have parsed entire string and regex } private static int matchChars(String match, int im, final int lm, int tokenType, int minRepetition, int maxRepetition, int[] parsedPattern, int tokenStart, int tokenEnd) { int matched = 0; while(matched < maxRepetition && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, tokenEnd)) { matched++; } return matched < minRepetition ? NO_MATCH : matched; } private static boolean matchChar(String match, int im, final int lm, int tokenType, int[] parsedPattern, int tokenStart, int tokenEnd) { if (im >= lm) { // we've overrun the string, no match return false; } switch (tokenType) { case TOKEN_TYPE_ANY: return true; case TOKEN_TYPE_SET: for (int i = tokenStart; i < tokenEnd; i += 2) { char matchChar = match.charAt(im); if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { return true; } } return false; case TOKEN_TYPE_INVERSE_SET: for (int i = tokenStart; i < tokenEnd; i += 2) { char matchChar = match.charAt(im); if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { return false; } } return true; case TOKEN_TYPE_LITERAL: return match.charAt(im) == parsedPattern[tokenStart]; default: return false; } } }