package org.archive.wayback.accesscontrol.staticmap; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.TreeSet; import org.archive.util.SURT; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; import junit.framework.TestCase; public class StaticListExclusionFilterTest extends TestCase { File tmpFile = null; StaticListExclusionFilterFactory factory = null; UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); protected void setUp() throws Exception { super.setUp(); factory = new StaticListExclusionFilterFactory(); tmpFile = File.createTempFile("static-map", ".tmp"); // Properties p = new Properties(); // p.put("resourceindex.exclusionpath", tmpFile.getAbsolutePath()); // factory.init(p); } /* * @see TestCase#tearDown() */ protected void tearDown() throws Exception { super.tearDown(); if(tmpFile != null && tmpFile.exists()) { tmpFile.delete(); } } /** * @throws Exception */ public void testRealWorld() throws Exception { String bases[] = { "pho-c.co.jp/~clever", "sf.net/pop/Roger", "www.eva-stu.vn", "mins.com.br/", "24.ne.jp", "24.ne.jp/~nekko"}; // setTmpContents(bases); ObjectFilter<CaptureSearchResult> filter = getFilter(bases); assertFalse("unmassaged",isBlocked(filter,"24.ne.jp.idpnt.com/robots.txt")); assertTrue("massage",isBlocked(filter,"http://24.ne.jp:80/")); assertTrue("unmassaged",isBlocked(filter,"http://www.pho-c.co.jp/~clever")); assertTrue("massage",isBlocked(filter,"http://24.ne.jp")); assertTrue("unmassaged",isBlocked(filter,"http://www.pho-c.co.jp/~clever")); assertTrue("massaged",isBlocked(filter,"http://pho-c.co.jp/~clever")); assertTrue("trailing-slash",isBlocked(filter,"http://pho-c.co.jp/~clever/")); assertTrue("subpath",isBlocked(filter,"http://pho-c.co.jp/~clever/foo.txt")); assertTrue("full-port",isBlocked(filter,"http://www.mins.com.br:80")); assertTrue("tail-slash-port",isBlocked(filter,"http://www.mins.com.br:80/")); assertTrue("full",isBlocked(filter,"http://www.mins.com.br")); assertTrue("tail-slash",isBlocked(filter,"http://www.mins.com.br/")); assertTrue("full-massage",isBlocked(filter,"http://mins.com.br")); assertTrue("tail-slash-massage",isBlocked(filter,"http://mins.com.br/")); assertTrue("massage",isBlocked(filter,"http://mins.com.br/foo.txt")); assertTrue("subpath",isBlocked(filter,"http://www13.mins.com.br/~clever/foo.txt")); assertTrue("massage",isBlocked(filter,"24.ne.jp")); assertTrue("full",isBlocked(filter,"http://www.mins.com.br")); assertTrue("subpath",isBlocked(filter,"www.24.ne.jp")); assertTrue("tail-slash-massage",isBlocked(filter,"http://mins.com.br/")); assertTrue("subpath",isBlocked(filter,"http://www.24.ne.jp:80/")); assertTrue(isBlocked(filter,"http://sf.net/pop/Roger")); assertTrue(isBlocked(filter,"http://sf.net/pop/Roger/")); assertTrue(isBlocked(filter,"http://sf.net/pop/Roger//")); assertFalse(isBlocked(filter,"http://sf.net/pop/")); assertTrue(isBlocked(filter,"http://sf.net/pop/Roger/2")); assertTrue(isBlocked(filter,"http://sf.net/pop/Roger/23")); assertTrue(isBlocked(filter,"http://www.sf.net/pop/Roger")); assertTrue(isBlocked(filter,"http://www1.sf.net/pop/Roger")); assertTrue(isBlocked(filter,"http://www23.sf.net/pop/Roger")); assertTrue(isBlocked(filter,"http://www23.eva-stu.vn/")); assertTrue(isBlocked(filter,"http://www23.eva-stu.vn")); assertTrue(isBlocked(filter,"http://eva-stu.vn")); assertTrue(isBlocked(filter,"http://www.eva-stu.vn/")); assertTrue(isBlocked(filter,"http://eva-stu.vn/")); assertTrue(isBlocked(filter,"http://www.eva-stu.vn/foo.txt")); assertTrue(isBlocked(filter,"http://www2.eva-stu.vn/foo/bar.txt")); assertTrue(isBlocked(filter,"http://eva-stu.vn/foo/bar.txt")); } /** * @throws Exception */ public void testBaseNoPrefix() throws Exception { String str = "http://peagreenboat.com/"; // String str = "http://(com,peagreenboat"; System.out.format("(%s) -> [%s]\n", str,SURT.prefixFromPlain(str)); String bases[] = {"http://www.peagreenboat.com/", "http://peagreenboat.com/"}; // setTmpContents(bases); ObjectFilter<CaptureSearchResult> filter = getFilter(bases); assertTrue("unmassaged",isBlocked(filter,"http://www.peagreenboat.com")); assertTrue("unmassaged",isBlocked(filter,"http://peagreenboat.com")); assertFalse("other1",isBlocked(filter,"http://peagreenboatt.com")); assertFalse("other2",isBlocked(filter,"http://peagreenboat.org")); assertFalse("other3",isBlocked(filter,"http://www.peagreenboat.org")); // there is a problem with the SURTTokenizer... deal with ports! // assertFalse("other4",isBlocked(filter,"http://www.peagreenboat.com:8080")); assertTrue("subpath",isBlocked(filter,"http://www.peagreenboat.com/foo")); assertTrue("emptypath",isBlocked(filter,"http://www.peagreenboat.com/")); } private boolean isBlocked(ObjectFilter<CaptureSearchResult> filter, String url) { CaptureSearchResult result = new CaptureSearchResult(); result.setOriginalUrl(url); int filterResult = filter.filterObject(result); if(filterResult == ObjectFilter.FILTER_EXCLUDE) { return true; } return false; } private ObjectFilter<CaptureSearchResult> getFilter(String lines[]) throws IOException { setTmpContents(lines); TreeSet<String> excludes = factory.loadFile(tmpFile.getAbsolutePath()); return new StaticListExclusionFilter(excludes,canonicalizer); } private void setTmpContents(String[] lines) throws IOException { if(tmpFile != null && tmpFile.exists()) { tmpFile.delete(); } // tmpFile = File.createTempFile("range-map","tmp"); FileWriter writer = new FileWriter(tmpFile); StringBuilder sb = new StringBuilder(); for(int i=0; i<lines.length; i++) { sb.append(lines[i]).append("\n"); } String contents = sb.toString(); writer.write(contents); writer.close(); //factory.reloadFile(); } }