package edu.stanford.nlp.naturalli; import edu.stanford.nlp.classify.LinearClassifier; import edu.stanford.nlp.international.Language; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.stats.ClassicCounter; import edu.stanford.nlp.trees.GrammaticalRelation; import org.junit.Test; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import static org.junit.Assert.assertEquals; /** * A test of the hard-coded clause splitting rules. * * @author Gabor Angeli */ public class OpenIETest { protected CoreLabel mkWord(String gloss, int index) { CoreLabel w = new CoreLabel(); w.setWord(gloss); w.setValue(gloss); if (index >= 0) { w.setIndex(index); } return w; } protected Set<String> clauses(String conll) { List<CoreLabel> sentence = new ArrayList<>(); SemanticGraph tree = new SemanticGraph(); for (String line : conll.split("\n")) { if (line.trim().equals("")) { continue; } String[] fields = line.trim().split("\\s+"); int index = Integer.parseInt(fields[0]); String word = fields[1]; CoreLabel label = mkWord(word, index); sentence.add(label); if (fields[2].equals("0")) { tree.addRoot(new IndexedWord(label)); } else { tree.addVertex(new IndexedWord(label)); } if (fields.length > 4) { label.setTag(fields[4]); } if (fields.length > 5) { label.setNER(fields[5]); } if (fields.length > 6) { label.setLemma(fields[6]); } } int i = 0; for (String line : conll.split("\n")) { if (line.trim().equals("")) { continue; } String[] fields = line.trim().split("\\s+"); int parent = Integer.parseInt(fields[2]); String reln = fields[3]; if (parent > 0) { tree.addEdge( new IndexedWord(sentence.get(parent - 1)), new IndexedWord(sentence.get(i)), new GrammaticalRelation(Language.English, reln, null, null), 1.0, false ); } i += 1; } // Run extractor ClauseSplitterSearchProblem problem = new ClauseSplitterSearchProblem(tree, true); Set<String> clauses = new HashSet<>(); problem.search( triple -> { clauses.add(triple.third.get().toString()); return true; }, new LinearClassifier<>(new ClassicCounter<>()), ClauseSplitterSearchProblem.HARD_SPLITS, triple -> new ClassicCounter<String>(){{setCount("__undocumented_junit_no_classifier", 1.0);}}, 100000); return clauses; } @Test public void testNoClauses() { assertEquals(new HashSet<String>() {{ add("cats have tails"); }}, clauses( "1\tcats\t2\tnsubj\tNN\n" + "2\thave\t0\troot\tVB\n" + "3\ttails\t2\tdobj\tNN\n" )); } @Test public void testXCompObj() { assertEquals(new HashSet<String>() {{ add("I persuaded Fred to leave the room"); add("Fred leave the room"); }}, clauses( "1\tI\t2\tnsubj\tPR\n" + "2\tpersuaded\t0\troot\tVBD\n" + "3\tFred\t2\tdobj\tNNP\n" + "4\tto\t5\taux\tTO\n" + "5\tleave\t2\txcomp\tVB\n" + "6\tthe\t7\tdet\tDT\n" + "7\troom\t5\tdobj\tNN\n" )); } @Test public void testXCompSubj() { assertEquals(new HashSet<String>() {{ add("I was persuaded to leave the room"); add("I leave the room"); }}, clauses( "1\tI\t3\tnsubjpass\tPR\n" + "2\twas\t3\tauxpass\tVB\n" + "3\tpersuaded\t0\troot\tVBD\n" + "4\tto\t5\taux\tTO\n" + "5\tleave\t3\txcomp\tVB\n" + "6\tthe\t7\tdet\tDT\n" + "7\troom\t5\tdobj\tNN\n" )); } @Test public void testCComp() { assertEquals(new HashSet<String>() {{ add("I suggested that he leave the room"); add("he leave the room"); }}, clauses( "1\tI\t2\tnsubj\tPR\n" + "2\tsuggested\t0\troot\tVBD\n" + "3\tthat\t5\tmark\tIN\n" + "4\the\t5\tnsubj\tPR\n" + "5\tleave\t2\tccomp\tVB\n" + "6\tthe\t7\tdet\tDT\n" + "7\troom\t5\tdobj\tNN\n" )); } }