/**
* Copyright (C) 2009 - present by OpenGamma Inc. and the OpenGamma group of companies
*
* Please see distribution for license.
*/
package com.opengamma.util;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.collect.ImmutableMap;
/**
* Utility methods to simplify comparisons.
* <p>
* This is a static thread-safe utility class.
*/
public final class RegexUtils {
/**
* Restricted constructor.
*/
private RegexUtils() {
}
//-------------------------------------------------------------------------
/**
* Converts a simple wildcard style pattern to a regex pattern.
* <p>
* The asterisk (<code>*</code>) matches zero or more characters.<br />
* The question mark (<code>?</code>) matches one character.<br />
* <p>
* The returned pattern will be setup to match a whole string using
* <code>^</code> and <code>$</code>.
*
* @param text the text to match, not null
* @return the pattern, not null
*/
public static Pattern wildcardsToPattern(final String text) {
ArgumentChecker.notNull(text, "text");
StringTokenizer tkn = new StringTokenizer(text, "?*", true);
StringBuilder buf = new StringBuilder(text.length() + 10);
buf.append('^');
boolean lastStar = false;
while (tkn.hasMoreTokens()) {
String str = tkn.nextToken();
if (str.equals("?")) {
buf.append('.');
lastStar = false;
} else if (str.equals("*")) {
if (lastStar == false) {
buf.append(".*");
}
lastStar = true;
} else {
buf.append(Pattern.quote(str));
lastStar = false;
}
}
buf.append('$');
return Pattern.compile(buf.toString(), Pattern.CASE_INSENSITIVE);
}
/**
* Creates a regular expression pattern from a simple glob string.
* <p>
* The special characters recognized in the glob string are {@code ?} (match any character),
* {@code *} (match any number of characters) and {@code %} (same as {@code *}.
* The other characters in the glob string are escaped before the pattern is created so
* it can safely contain regular expression characters. Escaping is not supported in
* the glob string, thus there is no way to match any of the special characters themselves.
*
* @param glob the glob string, not null
* @return a pattern for matching the glob, not null
*/
public static Pattern globToPattern(String glob) {
ArgumentChecker.notNull(glob, "glob");
Map<Character, String> replacements = ImmutableMap.of('?', ".", '*', ".*?", '%', ".*?");
StringBuilder builder = new StringBuilder();
StringBuilder tokenBuilder = new StringBuilder();
for (int i = 0; i < glob.length(); i++) {
char c = glob.charAt(i);
if (!replacements.containsKey(c)) {
tokenBuilder.append(c);
} else {
if (tokenBuilder.length() != 0) {
String quotedToken = Pattern.quote(tokenBuilder.toString());
builder.append(quotedToken);
tokenBuilder.setLength(0);
}
builder.append(replacements.get(c));
}
}
if (tokenBuilder.length() != 0) {
builder.append(Pattern.quote(tokenBuilder.toString()));
}
return Pattern.compile(builder.toString());
}
//-------------------------------------------------------------------------
/**
* Checks if a string matches a potentially wildcard string.
* <p>
* The asterisk (<code>*</code>) matches zero or more characters.<br />
* The question mark (<code>?</code>) matches one character.<br />
*
* @param searchCriteriaWithWildcard the search criteria text with wildcards, null returns false
* @param textToMatchAgainst the text without wildcards to match against, null returns false
* @return true if the text
*/
public static boolean wildcardMatch(final String searchCriteriaWithWildcard, final String textToMatchAgainst) {
if (searchCriteriaWithWildcard == null || textToMatchAgainst == null) {
return false;
}
return wildcardsToPattern(searchCriteriaWithWildcard).matcher(textToMatchAgainst).matches();
}
/**
* Determine whether the given string contains a wildcard.
*
* @param searchCriteria The string to check
* @return true if either <code>*</code> or <code>?</code> is present
*/
public static boolean containsWildcard(final String searchCriteria) {
ArgumentChecker.notNull(searchCriteria, "searchCriteria");
return searchCriteria.contains("*") || searchCriteria.contains("?");
}
/**
* Extracts first group from matched regex
* @param string input string
* @param regex regex string
* @return extracted text
*/
public static String extract(String string, String regex) {
return extract(string, Pattern.compile(regex), 1);
}
/**
* Extracts given group from matched regex
* @param string input string
* @param regex regex string
* @param group group index
* @return extracted text
*/
public static String extract(String string, String regex, int group) {
return extract(string, Pattern.compile(regex), group);
}
/**
* Extracts given group from matched regex
* @param string input string
* @param pattern pattern object
* @param group group index
* @return extracted text
*/
public static String extract(String string, Pattern pattern, int group) {
Matcher m = pattern.matcher(string);
if (m.find()) {
return m.group(group);
}
return null;
}
/**
* Returns true if given input string matches given pattern
* @param input the input
* @param pattern the pattern
* @return true if given input string matches given pattern
*/
public static boolean matches(String input, Pattern pattern) {
Matcher m = pattern.matcher(input);
return m.matches();
}
}