package focusedCrawler.util; import static org.hamcrest.CoreMatchers.*; import static org.junit.Assert.*; import java.util.Arrays; import java.util.List; import org.junit.Test; import focusedCrawler.util.LinkFilter.LinkBlackList; import focusedCrawler.util.LinkFilter.LinkWhiteList; public class LinkFilterTest { @Test public void matchShoudReturnTrueIfStringMatchUrlPatterns() { // given List<String> urlRegexPatterns = Arrays.asList( ".*/thread/.*", ".*/archive/index.php/t.*", "www\\.mydomain\\.com.*", "www\\.somedomain\\.com/forum/.*" ); RegexMatcher matcher = new RegexMatcher(urlRegexPatterns); List<String> urlsThatMatch = Arrays.asList( "", "", "", "" ); List<String> urlsThatDoesntMatch = Arrays.asList( "", "", "", "" ); for (String url : urlsThatMatch) { // when boolean matched = matcher.matches(url); // then assertThat(url, matched, is(true)); } for (String url : urlsThatDoesntMatch) { // when boolean matched = matcher.matches(url); // then assertThat(url, matched, is(false)); } } @Test public void testComposedLinkFilter() { // given List<String> whitelistRegexes = Arrays.asList( "http[s]?://.*\\.?mydomain\\.com.*" // allow only links from ); List<String> blacklistRegexes = Arrays.asList( ".*/new_reply\\.php.*", // disallow links with path "/new_reply.php" ".*/new_user\\.php.*" // disallow links with path "/new_user.php" ); LinkFilter linkfilter = new LinkFilter(new LinkWhiteList(whitelistRegexes), new LinkBlackList(blacklistRegexes)); List<String> urlsThatMatch = Arrays.asList( "", "", "", "" ); List<String> urlsThatDoesntMatch = Arrays.asList( "", "", "", "" ); // when for (String url : urlsThatMatch) { // when boolean matched = linkfilter.accept(url); // then assertThat(url, matched, is(true)); } for (String url : urlsThatDoesntMatch) { // when boolean matched = linkfilter.accept(url); // then assertThat(url, matched, is(false)); } } @Test public void shouldAcceptAllUrlsIfBlackListAndWhiteListAreEmpty() { // given List<String> whitelistRegexes = Arrays.asList(); List<String> blacklistRegexes = Arrays.asList(); LinkFilter linkfilter = new LinkFilter(new LinkWhiteList(whitelistRegexes), new LinkBlackList(blacklistRegexes)); List<String> urlsThatMatch = Arrays.asList( "", "", "", "" ); // when for (String url : urlsThatMatch) { // when boolean matched = linkfilter.accept(url); // then assertThat(url, matched, is(true)); } } }