package org.commoncrawl.service.crawler.filters;
import static org.junit.Assert.*;
import java.io.IOException;
import java.net.InetAddress;
import java.util.Map;
import java.util.TreeMap;
import java.util.Vector;
import org.commoncrawl.protocol.CrawlURLMetadata;
import org.commoncrawl.service.crawler.filters.FilterResults;
import org.commoncrawl.util.IPAddressUtils;
import org.commoncrawl.util.IntrusiveList;
import org.commoncrawl.util.IntrusiveList.IntrusiveListElement;
import org.junit.Test;
public class IPAddressBlockFilter extends Filter {
public IPAddressBlockFilter() {
}
public IPAddressBlockFilter(String filterPath,boolean hasMasterFile) {
super(filterPath,hasMasterFile);
}
private static final int CLASS_B_MASK = 0xFFFF0000;
private static class IPAddressRange extends IntrusiveListElement<IPAddressRange> {
public IPAddressRange(int ipAddressSubnet,int ipAddressMask) {
this.ipAddressSubnet = ipAddressSubnet;
this.ipAddressMask = ipAddressMask;
}
public int ipAddressSubnet;
public int ipAddressMask;
}
private Map<Integer,IntrusiveList<IPAddressRange>> _rangeMap = new TreeMap<Integer,IntrusiveList<IPAddressRange>>();
@Override
public void clear() {
_rangeMap.clear();
}
@Override
public void loadFilterItem(String filterItemLine) throws IOException {
String tokens[] = filterItemLine.split(",");
if (tokens.length >= 2) {
int ipAddress = IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName(tokens[0]).getAddress());
int ipAddressMask = IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName(tokens[1]).getAddress());
IntrusiveList<IPAddressRange> rangeList = rangeListForIPAddress(ipAddress,true);
rangeList.addTail(new IPAddressRange(ipAddress,ipAddressMask));
}
}
@Override
public FilterResult filterItem(String rootDomainName,String fullyQualifiedDomainName, String urlPath,CrawlURLMetadata metadata, FilterResults results) {
IntrusiveList<IPAddressRange> rangeList = rangeListForIPAddress(metadata.getServerIP(),false);
if (rangeList != null) {
for (IPAddressRange rangeItem : rangeList) {
if ((metadata.getServerIP() & rangeItem.ipAddressMask) == rangeItem.ipAddressSubnet) {
return FilterResult.Filter_Reject;
}
}
}
return FilterResult.Filter_NoAction;
}
private IntrusiveList<IPAddressRange> rangeListForIPAddress(int ipAddress,boolean addIfMissing) {
IntrusiveList<IPAddressRange> rangeList = _rangeMap.get((ipAddress & CLASS_B_MASK));
if (rangeList == null && addIfMissing) {
rangeList = new IntrusiveList<IPAddressRange>();
_rangeMap.put((ipAddress & CLASS_B_MASK),rangeList);
}
return rangeList;
}
@Test
public void validateFilter() throws Exception {
loadFilterItem("69.64.144.0,255.255.240.0,FOO COMMENT");
loadFilterItem("83.138.128.248,255.255.255.255,BAR COMMENT");
CrawlURLMetadata metadata = new CrawlURLMetadata();
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("69.64.144.0").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_Reject);
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("69.64.159.255").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_Reject);
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("69.64.160.0").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_NoAction);
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("69.64.143.0").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_NoAction);
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("83.138.128.248").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_Reject);
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("83.138.128.247").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_NoAction);
metadata.setServerIP(IPAddressUtils.IPV4AddressToInteger(InetAddress.getByName("83.138.128.249").getAddress()));
assertTrue(filterItem(null,null, null, metadata, null) == FilterResult.Filter_NoAction);
}
}