package aima.test.core.unit.nlp.rank; import static org.junit.Assert.*; import java.util.ArrayList; import java.util.Arrays; import java.util.Hashtable; import java.util.List; import org.junit.Before; import org.junit.Test; import aima.core.nlp.ranking.Page; import aima.core.nlp.ranking.WikiLinkFinder; public class WikiLinkFinderTest { Page testPage; Hashtable<String,Page> pageTable; WikiLinkFinder wLF; @Before public void setUp() { testPage = new Page("tester"); pageTable = new Hashtable<String,Page>(); wLF = new WikiLinkFinder(); } @Test public void testGetOutlinks() { List<String> outLinks; List<String> validLinks = new ArrayList<String>( Arrays.asList("/wiki/thisisthefinallink")); String content = "Some example text with certain <aa href=\"link1\"></aa> links" + "inside. Here is another href=\"link2\" without the surrounding tags. " + "This isn't a link because there are no quotes -> href=notALink. The following" + "is a link < href=\"www.link3.com\" ></> and should be found. Let's do a couple" + "more. Penultimate link is <a href=\"penultimateLink.com.au\">hyperlink</a>. Final" + "link is href href=\"/wiki/thisIsTheFinalLink\" href=notLink2, href\"notLink4\". Done"; testPage.setContent(content); outLinks = wLF.getOutlinks(testPage); assertTrue( outLinks.containsAll(validLinks)); // note that locations are stored in lowercase assertTrue( !outLinks.contains("notALink")); assertTrue( !outLinks.contains("notLink4")); } @Test public void testGetInlinks() { Page targetP = new Page("targetPage"); // create some test Pages Page test1 = new Page("test1"); Page test2 = new Page("test2"); Page test3 = new Page("test3"); Page test4 = new Page("test4"); test1.getOutlinks().addAll(Arrays.asList("a","b","targetPage","d")); test2.getOutlinks().addAll(Arrays.asList("targetpage","b","c","d","e")); test3.getOutlinks().addAll(Arrays.asList("target","page","c","d")); test4.getOutlinks().addAll(Arrays.asList("TARGETPAGE","b")); pageTable.put("test1", test1); pageTable.put("test2", test2); pageTable.put("test3", test3); pageTable.put("test4", test4); List<String> outLinks = wLF.getInlinks(targetP, pageTable); assertTrue( outLinks.contains("test1")); assertTrue( outLinks.containsAll(Arrays.asList("test1","test2","test4"))); assertTrue( !outLinks.contains("test3")); } }