/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools;
import java.util.Stack;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* This class provides the functionality to translate a GLOB expression into
* a standard regular expression.
*
* GLOB provides a subset of the functionality of regular expressions:
* '?' Matches one unknown character
* '*' Matches any number of unknown characters
* '[abc]' Matches a, b or c, or more general any inserted character
* '{abc,def}' Matches abc or def.
*
* @author Sebastian Land
*
*/
public class GlobCompiler {
private enum Commands {
UNBRACED,
SQUARE_BRACKETS,
CURLY_BRACKETS
}
public static Pattern compileGlob(final String glob) throws PatternSyntaxException {
// use stack to keep track if inside braces. Start unbraced...
Stack<Commands> currentMode = new Stack<Commands>();
currentMode.push(Commands.UNBRACED);
int globLength = glob.length();
int currentIndex = 0;
/* equivalent REGEX expression to be compiled */
StringBuffer buffer = new StringBuffer();
while (currentIndex < globLength) {
char c = glob.charAt(currentIndex++);
if (c == '\\') {
if (currentIndex == globLength) {
// no characters left, so treat '\' as literal char
buffer.append(Pattern.quote("\\"));
} else {
// read next character
String s = glob.charAt(currentIndex) + "";
if ((Commands.UNBRACED == currentMode.peek() && "\\[]{}?*".contains(s)) || (Commands.SQUARE_BRACKETS == currentMode.peek() && "\\[]{}?*!-".contains(s)) || (Commands.CURLY_BRACKETS == currentMode.peek()) && "\\[]{}?*,".contains(s)) {
// escape the construct char
currentIndex++;
buffer.append(Pattern.quote(s));
} else {
// treat '\' as literal char
buffer.append(Pattern.quote("\\"));
}
}
} else if (c == '*') {
// *
buffer.append(".*");
} else if (c == '?') {
// .
buffer.append('.');
// [...]
} else if (c == '[') {
buffer.append('[');
currentMode.push(Commands.SQUARE_BRACKETS);
// check for negation character '!' immediately after the opening bracket '['
if ((currentIndex < globLength) && (glob.charAt(currentIndex) == '!')) {
currentIndex++;
buffer.append('^');
}
} else if ((c == ']') && Commands.SQUARE_BRACKETS == (currentMode.peek())) {
buffer.append(']');
currentMode.pop();
} else if ((c == '-') && Commands.SQUARE_BRACKETS == (currentMode.peek())) {
// character range '-' in "[...]"
buffer.append('-');
// {...}
} else if (c == '{') {
buffer.append("(?:(?:");
currentMode.push(Commands.CURLY_BRACKETS);
} else if ((c == '}') && Commands.CURLY_BRACKETS == (currentMode.peek())) {
buffer.append("))");
currentMode.pop();
} else if ((c == ',') && Commands.CURLY_BRACKETS == (currentMode.peek())) {
// comma between strings in "{...}"
buffer.append(")|(?:");
} else {
// simple literal
buffer.append(Pattern.quote(c + ""));
}
}
// finally check for mismatched [...] or {...}
if ("[]".equals(currentMode.peek()))
throw new PatternSyntaxException("Cannot find matching closing square bracket ] in GLOB expression", glob, -1);
if ("{}".equals(currentMode.peek()))
throw new PatternSyntaxException("Cannot find matching closing curly brace } in GLOB expression", glob, -1);
return Pattern.compile(buffer.toString());
}
}