package focusedCrawler.util;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Tests a string against a list of patterns and return true if link matches any of the patterns.
*/
public class RegexMatcher {
private static final Logger logger = LoggerFactory.getLogger(RegexMatcher.class);
List<Pattern> patterns = new ArrayList<Pattern>();
protected RegexMatcher(String filename) {
this(loadRegexesFromFile(filename));
}
protected RegexMatcher(List<String> textPatterns) {
for (String pattern : textPatterns) {
patterns.add(Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.DOTALL));
}
}
public boolean matches(String text) {
for (Pattern pattern : patterns) {
if (pattern.matcher(text).matches()) {
return true;
}
}
return false;
}
public static RegexMatcher fromFile(String filename) {
return new RegexMatcher(filename);
}
public static RegexMatcher fromList(List<String> patterns) {
return new RegexMatcher(patterns);
}
private static List<String> loadRegexesFromFile(String filename) {
List<String> textPatterns = new ArrayList<String>();
try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
String line;
while ((line = br.readLine()) != null) {
String trimedLine = line.trim();
if (!trimedLine.equals("")) {
textPatterns.add(trimedLine);
logger.info(trimedLine);
}
}
} catch (IOException e) {
logger.warn("Couldn't load patterns from file: " + filename + " Using a empty list.");
}
return textPatterns;
}
}