package arkref.tests; import java.io.IOException; import java.util.List; import org.apache.commons.lang.ArrayUtils; import arkref.analysis.ARKref; import arkref.analysis.FindMentions; import arkref.analysis.SyntacticPaths; import arkref.analysis.Types; import arkref.analysis._SimplePipeline; import arkref.data.Document; import arkref.data.Mention; import arkref.parsestuff.AnalysisUtilities; import arkref.parsestuff.U; import edu.stanford.nlp.trees.Tree; import junit.framework.TestCase; public class TestArkref extends TestCase { /** * Sets up the test fixture. * (Called before every test case method.) */ protected void setUp() { ARKref.Opts.debug = true; } /** * Tears down the test fixture. * (Called after every test case method.) */ protected void tearDown() { } public void assertLink(Mention m1, Mention m2, Document d) { assertTrue(m1.node().toString()+"\t==>\t"+m2.node().toString(), d.entGraph().getLinkedMentions(m1).contains(m2)); } public void assertLink(Document d, Mention m1, Mention m2){ assertLink(m1,m2,d); } public void assertNoLink(Mention m1, Mention m2, Document d) { assertFalse(m1.node().toString()+"\t==>\t"+m2.node().toString(), d.entGraph().getLinkedMentions(m1).contains(m2)); } public void assertNoLink(Document d, Mention m1, Mention m2){ assertNoLink(m1,m2,d); } public void assertSurface(Mention m, String surface) { // "surface" is space-sep tokens assertEquals(surface, m.node().yield().toString()); } public void assertSingleton(Document d, int mi) { Mention m = d.mentions().get(mi-1); assertEquals(1, d.entGraph().getLinkedMentions(m).size()); } public void assertLink(int m1, int m2, Document d) { assertLink(d.mentions().get(m1-1), d.mentions().get(m2-1), d); } public void assertLink(Document d, int m1, int m2) { assertLink(m1,m2,d); } public void assertNoLink(Document d, int m1, int m2) { assertNoLink(m1,m2,d); } public void assertNoLink(int m1, int m2, Document d) { assertNoLink(d.mentions().get(m1-1), d.mentions().get(m2-1), d); } public void assertSurface(Document d, int m, String s) { assertSurface(d.mentions().get(m-1), s); } public void testSpecialSymbols() throws IOException{ Document d = Document.loadFiles("data/specialsymbols"); _SimplePipeline.go(d); assertTrue(d.mentions().toString(), d.mentions().size() == 2); } public void testReflexives() throws IOException{ //John bought himself a book. (s1) //Bob knew that John bought himself a book. (s2) //John knew that Bob bought him a book. (s3) //The company ruined itself. (s4) //The corporation ruined its chances. (s5) //The bank ruined it. (s6) //James believed he could win. (s7) Document d = Document.loadFiles("data/reflexives"); _SimplePipeline.go(d); assertLink(1,2, d); //John, himself (s1) assertNoLink(4,6, d); //Bob, himself (s2) assertLink(5,6, d); //John, himself (s2) assertNoLink(9,10, d); //Bob, him (s3) assertLink(8,10, d); //John, him (s3) assertLink(12,13,d); //company, itself (s4) assertLink(14,16,d); //corporation, its (s5) assertNoLink(17,18,d); //bank, it (s6) assertLink(19,20,d); // James, he (s7) } public void testSameHead() throws IOException{ //The nice, smart boy liked to play in the park. //This boy also liked to play soccer. Document d = Document.loadFiles("data/sameHeadWordTest"); _SimplePipeline.go(d); assertSurface(d,1, "The nice , smart boy"); assertSurface(d,2, "the park"); assertSurface(d,3, "This boy"); assertLink(3,1, d); assertNoLink(3,2, d); } public void testRoleAppositives() throws IOException{ //The author John Smith wrote the book. //I learned about the painter John Smith, the subject of the exposition. //The shared Lunar Precursor Robotic Program was new. Document d = Document.loadFiles("data/roleAppositivesTest"); _SimplePipeline.go(d); assertTrue(d.mentions().toString(), d.mentions().size()==9); //Tree t = AnalysisUtilities.getInstance().readTreeFromString("(NP (NP (DT The) (NN author)) (NNP John) (NNP Smith))"); //System.err.println(t.toString()); //System.err.println(t.headTerminal(AnalysisUtilities.getInstance().getHeadFinder()).toString()); assertSurface(d, 1, "The author John Smith"); assertSurface(d, 2, "The author"); assertSurface(d, 5, "the famous painter John Smith , the subject of the exposition"); assertSurface(d, 6, "the famous painter"); assertSurface(d, 7, "the subject of the exposition"); assertSurface(d, 9, "The shared Lunar Precursor Robotic Program"); assertLink(2,1, d); assertLink(7,5, d); assertLink(6,5, d); } public void testFirstPerson() throws IOException{ Document d = Document.loadFiles("data/roleAppositivesTest"); _SimplePipeline.go(d); assertSurface(d, 4, "I"); assertSingleton(d, 4); d = Document.loadFiles("data/firstPerson1"); _SimplePipeline.go(d); assertSurface(d,1,"I"); assertSurface(d,4,"I"); assertSurface(d,6,"my"); assertSurface(d,7,"it"); assertLink(d,1,4); assertLink(d,4,6); assertNoLink(d,6,7); // BTO: should do appositive & pred-noms? low prio } public void testDefaultMale() throws IOException{ // Some of these cases should flip if we turn on hard gender constraints // Which might make more sense for certain domains. // If we make it an option, should make tests to ensure the flip happens. Document d; d = Document.loadFiles("data/defaultMale1"); _SimplePipeline.go(d); assertSurface(d,1, "Sally"); assertEquals(Types.Gender.Female, Types.gender(mention(d,1))); assertSurface(d,2, "the banker"); assertEquals(null, Types.gender(mention(d,2))); assertSurface(d,3, "He"); assertLink(d, 2,3); d = Document.loadFiles("data/defaultMale2"); _SimplePipeline.go(d); assertSurface(d,1, "Bob"); assertEquals(Types.Gender.Male, Types.gender(mention(d,1))); assertSurface(d,2, "the banker"); assertEquals(null, Types.gender(mention(d,2))); assertSurface(d,3, "He"); assertLink(d, 1,3); d = Document.loadFiles("data/defaultMale3"); _SimplePipeline.go(d); assertSurface(d,1, "The banker"); assertEquals(null, Types.gender(mention(d,1))); assertSurface(d,2, "Bob"); assertEquals(Types.Gender.Male, Types.gender(mention(d,2))); assertSurface(d,3, "He"); assertLink(d, 1,3); } public void testAppositives() throws IOException{ //example from H&K 2009 //Walmart says Gitano, its top-selling brand, is underselling. Document d = Document.loadFiles("data/IWithinI"); _SimplePipeline.go(d); assertSurface(d, 1, "Walmart"); assertSurface(d, 2, "Gitano , its top-selling brand ,"); assertSurface(d, 3, "its top-selling brand"); assertSurface(d, 4, "its"); assertLink(3,2, d); d = Document.loadFiles("data/nativeAmericans"); _SimplePipeline.go(d); assertSurface(d, 7, "the Anishinaabe , the Dakota , and other Native American inhabitants"); assertSurface(d, 8, "the Dakota"); assertSurface(d, 9, "other Native American inhabitants"); assertNoLink(7,8, d); assertNoLink(7,9, d); assertNoLink(8,9, d); } public void testPathLength() throws IOException{ //John knew that Bob was weird, but he still invited him to the party. Document d = Document.loadFiles("data/pathLengthTest"); _SimplePipeline.go(d); assertSurface(d, 1, "John"); assertSurface(d, 2, "Bob"); assertSurface(d, 3, "he"); assertLink(3,1, d); assertNoLink(3,2, d); // BTO: error, "him" != "John" but path length won't solve } public void testLargerNodeComesFirstAsMention() throws IOException{ //Nintendo of America announced its new console. Document d = Document.loadFiles("data/pathLengthTest2"); _SimplePipeline.go(d); assertSurface(d,1,"Nintendo of America"); assertSurface(d,2,"America"); assertSurface(d,3,"its new console"); assertSurface(d,4,"its"); assertTrue(""+d.mentions(), d.mentions().size()==4); } public void testPathLength2() throws IOException{ //Nintendo of America announced its new console. Document d = Document.loadFiles("data/pathLengthTest2"); _SimplePipeline.go(d); Mention m1 = d.mentions().get(0); //Nintendo of America Mention m2 = d.mentions().get(1); //America Mention m3 = d.mentions().get(2); //its new console Mention m4 = d.mentions().get(3); //its assertLink(m4,m1, d); assertNoLink(m4,m2, d); assertNoLink(m2,m1, d); assertNoLink(m3,m2, d); //TODO the following commented-out test needs semantic information (i.e., Nintendo != console) //BTO: but it works assertNoLink(m3,m1, d); } public void testEntityTypeMatching() throws IOException{ //John went to the store. //Bob also went to the store. //It was a grocery store. //He bought an item. Document d = Document.loadFiles("data/test1"); _SimplePipeline.go(d); assertLink(5,4,d); //store and it assertNoLink(3,2,d); //bob and store assertLink(7,3,d); //he and bob assertNoLink(7,6,d); //he and store assertNoLink(1,2,d); //john and store d = Document.loadFiles("data/personNounTest"); _SimplePipeline.go(d); //The astronaut went to the space with Howard. //The robot did, too. //He had fun. assertLink(1,5,d); //astronaut and he assertNoLink(5,4,d); //he and robot assertNoLink(4,1,d); //robot and astronaut assertNoLink(5,3,d); //he and Howard assertSingleton(d,6); //fun } public void testIWithinI() throws IOException{ //example from H&K 2009 //Walmart says Gitano, its top-selling brand, is underselling. Document d = Document.loadFiles("data/IWithinI"); _SimplePipeline.go(d); assertSurface(d,1,"Walmart"); assertSurface(d,2,"Gitano , its top-selling brand ,"); assertSurface(d,3,"its top-selling brand"); assertSurface(d,4,"its"); assertLink( 4,1, d); assertNoLink(4,2, d); assertNoLink(4,3, d); } public void testPredicateNominatives() throws IOException{ //Lincoln was president. //Lincoln had been president. //Lincoln was being president. //Lincoln will be president. Document d = Document.loadFiles("data/predNomTest"); _SimplePipeline.go(d); Mention m1; Mention m2; m1 = d.mentions().get(0); //Lincoln m2 = d.mentions().get(1); //president assertLink(m1,m2,d); m1 = d.mentions().get(2); //Lincoln m2 = d.mentions().get(3); //president assertLink(m1,m2,d); m1 = d.mentions().get(6); //Lincoln m2 = d.mentions().get(7); //president assertLink(m1,m2,d); } public void testConjunctions() throws IOException{ //He and Fred went to the store. //They also went to the library. Document d = Document.loadFiles("data/conjunctionsTest"); _SimplePipeline.go(d); Mention m1 = d.mentions().get(0); //He and Fred Mention m2 = d.mentions().get(1); //the store assertSurface(m1, "He and Fred"); assertSurface(m2, "the store"); assertNoLink(1, 2, d); assertLink(1, 3, d); } public void testThey() throws IOException{ //The earliest known settlers followed herds of large game to the region //during the last glacial period. They preceded the Anishinaabe, the Dakota, //and other Native American inhabitants. Document d = Document.loadFiles("data/they1"); _SimplePipeline.go(d); assertSurface(d,1,"The earliest known settlers"); assertSurface(d,6,"They"); Mention m1 = mention(d,1); assertEquals(Types.Number.Plural, Types.number(m1)); //The team practiced very hard, and later on they won the game. //The herd of animals grazed on the land, and then they moved on. d = Document.loadFiles("data/they2"); _SimplePipeline.go(d); System.out.println("!!!! Disabled data/they2 test, fails since we disabled the org check in Types.number() !!!!"); // assertLink(d,1,2); // assertLink(d,4,7); // assertNoLink(d,2,4); } public Mention mention(Document d, int mi) { return d.mentions().get(mi-1); } public void testFindNodeFromSpan() throws IOException{ //He and Fred went to the store. //They also went to the library. Document d; Tree t; d = Document.loadFiles("data/conjunctionsTest"); t = d.findNodeThatCoversSpan(0, 0, 0); assertEquals("He", t.yield().toString()); t = d.findNodeThatCoversSpan(0, 0, 2); assertEquals("He and Fred", t.yield().toString()); t = d.findNodeThatCoversSpan(0, 0, 3); assertEquals("He and Fred went to the store .", t.yield().toString()); t = d.findNodeThatCoversSpan(0, 3, 5); assertEquals("went to the store", t.yield().toString()); // Jerusalem 7-15 -LRB- AFP -RRB- - A high level Israeli army official has said today Saturday that Israel believes Iran is set to begin acquiring nuclear capability for military purposes from 2005 and will be in a position to equip missiles with nuclear warheads , capable of reaching Israel , within ten years . // [0 Jerusalem] [1 7-15] [2 -LRB-] [3 AFP] [4 -RRB-] [5 -] [6 A] [7 high] [8 level] [9 Israeli] [10 army] [11 official] [12 has] [13 said] [14 today] [15 Saturday] [16 that] [17 Israel] [18 believes] [19 Iran] [20 is] [21 set] [22 to] [23 begin] [24 acquiring] [25 nuclear] [26 capability] [27 for] [28 military] [29 purposes] [30 from] [31 2005] [32 and] [33 will] [34 be] [35 in] [36 a] [37 position] [38 to] [39 equip] [40 missiles] [41 with] [42 nuclear] [43 warheads] [44 ,] [45 capable] // [46 of] [47 reaching] [48 Israel] [49 ,] [50 within] [51 ten] [52 years] [53 .] d = Document.loadFiles("data/20000715_AFP_ARB_0072_ENG"); t = d.findNodeThatCoversSpan(0, 0,0); assertEquals("Jerusalem", t.yield().toString()); t = d.findNodeThatCoversSpan(0, 48,48); // when using Tree.subTrees(), // this gets (S (VP (VBG reaching) (NP (NNP Israel)))) // but should be just (NP (NNP Israel)) assertEquals("Israel", t.yield().toString()); } public void testFindMentionDominatingNode() throws IOException{ //example from H&K 2009 //Walmart says Gitano, its top-selling brand, is underselling. Document d = Document.loadFiles("data/IWithinI"); _SimplePipeline.go(d); Tree t; Mention m; t = d.findNodeThatCoversSpan(0, 0, 0); m = d.findMentionDominatingNode(0, t); assertTrue(t.yield().toString(), t.yield().toString().equals("Walmart")); assertTrue(m.toString(), m.node().yield().toString().equals("Walmart")); t = d.findNodeThatCoversSpan(0, 6, 6); m = d.findMentionDominatingNode(0, t); assertTrue(t.yield().toString(), t.yield().toString().equals("brand")); assertTrue(m.toString(), m.node().yield().toString().equals("its top-selling brand")); t = d.findNodeThatCoversSpan(0, 2, 2); m = d.findMentionDominatingNode(0, t); assertTrue(t.yield().toString(), t.yield().toString().equals("Gitano")); assertTrue(m.toString(), m.node().yield().toString().equals("Gitano , its top-selling brand ,")); } public void testTokenAlignments() throws IOException { Document d = arkref.data.Document.loadFiles("data/indo"); String text = arkref.parsestuff.U.readFile("data/indo.sent"); String[] lines = text.split("\n"); int[] alignments = AnalysisUtilities.alignTokens(lines[0], d.sentences().get(0).words); assertFalse(ArrayUtils.contains(alignments, -1)); assertEquals(0, alignments[0]); //[JAKARTA] assertEquals(7, alignments[1]); //[,] assertEquals(9, alignments[2]); //[Indonesia] assertEquals(19, alignments[3]); //[(] assertEquals(20, alignments[4]); //[AP] assertEquals(22, alignments[5]); //[)] assertEquals(24, alignments[6]); //[_] assertEquals(26, alignments[7]); //[The] assertEquals(30, alignments[8]); //[youngest] assertEquals(39, alignments[9]); //[son] assertEquals(43, alignments[10]); //[of] assertEquals(46, alignments[11]); //[ex-dictator] assertEquals(58, alignments[12]); //[Suharto] assertEquals(66, alignments[13]); //[disobeyed] assertEquals(76, alignments[14]); //[a] assertEquals(78, alignments[15]); //[summons] assertEquals(86, alignments[16]); //[to] assertEquals(89, alignments[17]); //[surrender] assertEquals(99, alignments[18]); //[himself] assertEquals(107, alignments[19]); //[to] assertEquals(110, alignments[20]); //[prosecutors] assertEquals(122, alignments[21]); //[Monday] assertEquals(129, alignments[22]); //[and] assertEquals(133, alignments[23]); //[be] assertEquals(136, alignments[24]); //[imprisoned] assertEquals(147, alignments[25]); //[for] assertEquals(151, alignments[26]); //[corruption] assertEquals(161, alignments[27]); //[.] alignments = AnalysisUtilities.alignTokens(lines[1], d.sentences().get(1).words); assertFalse(ArrayUtils.contains(alignments, -1)); assertEquals(0,alignments[0]); //Hutomo assertEquals(7,alignments[1]); //`` assertEquals(9,alignments[2]); //Tommy assertEquals(14,alignments[3]); //'' } public void testAdjunctPhrases() throws IOException{ //The students were tired of working. (s1) //To meet their friends, they went to the bar. (s2) //To meet new people, they talked to them. (s3) //Since Bill wanted to talk to John, he picked up the phone. (s4) //To Susan, she seemed nice. (s5) Document d = Document.loadFiles("data/adjunctPhrases"); _SimplePipeline.go(d); assertLink(1,3, d); //students, their assertLink(1,4, d); //students, they assertNoLink(2,4, d); //their friends, they assertLink(1,7, d); //students, they (s3) assertNoLink(6,7, d); //they (s3), new people assertLink(8,6, d); //them (s3), new people assertNoLink(1,8, d); //students, them (s3) assertLink(9,11,d); //Bill, he (s4) assertNoLink(10,11,d); //John, he (s4) assertNoLink(13,14,d); //Susan, she (s5) } /*public void testDisallowPronounsMatchesInQuotes() throws IOException{ //John said, "You need to see him." //Susan said, "She needs to see him." //She also said, "She needs to see him." Document d = Document.loadFiles("data/quotations"); _SimplePipeline.go(d); assertSurface(d,4,"Susan"); assertFalse(SyntacticPaths.isInQuotation(d.mentions().get(4-1))); //susan assertSurface(d,7,"She"); assertFalse(SyntacticPaths.isInQuotation(d.mentions().get(7-1))); //she (s3) assertNoLink(1,2, d); //John, you assertNoLink(1,3, d); //John, him assertNoLink(4,5, d); //Susan, she assertNoLink(1,6, d); //John, he (s2) assertLink(4,7, d); //Susan, She assertNoLink(7,8, d); //She, she assertNoLink(1,9, d); //John, him (s3) }*/ }