package focusedCrawler.util;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class LinkFilter {
private LinkWhiteList whitelist;
private LinkBlackList blacklist;
public LinkFilter(String configPath) {
this(new LinkWhiteList(Paths.get(configPath, "/link_whitelist.txt").toString()),
new LinkBlackList(Paths.get(configPath,"/link_blacklist.txt").toString()));
}
public LinkFilter(List<String> regularExpressions) {
this(new LinkWhiteList(regularExpressions));
}
public LinkFilter(LinkWhiteList linkWhiteList) {
this.whitelist = linkWhiteList;
this.blacklist = new LinkBlackList(new ArrayList<String>());
}
public LinkFilter(LinkBlackList linkBlackList) {
this.whitelist = new LinkWhiteList(new ArrayList<String>());
this.blacklist = linkBlackList;
}
public LinkFilter(LinkWhiteList linkWhiteList, LinkBlackList linkBlackList) {
this.whitelist = linkWhiteList;
this.blacklist = linkBlackList;
}
public boolean accept(String link) {
if(whitelist.accept(link) && blacklist.accept(link))
return true;
else
return false;
}
public static class LinkWhiteList extends RegexMatcher {
public LinkWhiteList(List<String> urlPatterns) {
super(urlPatterns);
}
public LinkWhiteList(String filename) {
super(filename);
}
public boolean accept(String link) {
if(patterns == null || patterns.size()==0) {
return true;
}
if(matches(link)) {
return true;
}
return false;
}
}
public static class LinkBlackList extends RegexMatcher {
public LinkBlackList(String filename) {
super(filename);
}
public LinkBlackList(List<String> urlPatterns) {
super(urlPatterns);
}
public boolean accept(String link) {
if(patterns == null || patterns.size()==0) {
return true;
}
if(super.matches(link)) {
return false;
}
return true;
}
}
}