package edu.stanford.nlp.naturalli;
import edu.stanford.nlp.ie.util.RelationTriple;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import org.junit.Ignore;
import org.junit.Test;
import java.util.*;
import static org.junit.Assert.*;
/**
* Test the natural logic OpenIE extractor at {@link edu.stanford.nlp.naturalli.OpenIE}.
*
* @author Gabor Angeli
*/
public class OpenIEITest {
protected static StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties() {{
setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
setProperty("openie.splitter.threshold", "0.25");
setProperty("openie.ignoreaffinity", "false");
setProperty("openie.max_entailments_per_clause", "1000");
setProperty("openie.triple.strict", "true");
// setProperty("openie.splitter.model", "/home/gabor/tmp/clauseSearcher.ser.gz");
setProperty("ssplit.isOneSentence", "true");
setProperty("tokenize.class", "PTBTokenizer");
setProperty("tokenize.language", "en");
setProperty("enforceRequirements", "true");
}});
public CoreMap annotate(String text) {
Annotation ann = new Annotation(text);
pipeline.annotate(ann);
return ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
}
public void assertExtracted(String expected, String text) {
boolean found = false;
Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
for (RelationTriple extraction : extractions) {
if (extraction.toString().equals("1.0\t" + expected)) {
found = true;
}
}
assertTrue("The extraction (" + expected.replace("\t", "; ") + ") was not found in '" + text + "'", found);
}
public void assertExtracted(Set<String> expectedSet, String text) {
Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
String actual = StringUtils.join(
extractions.stream().map(x -> x.toString().substring(x.toString().indexOf("\t") + 1).toLowerCase()).sorted(),
"\n");
String expected = StringUtils.join(expectedSet.stream().map(String::toLowerCase).sorted(), "\n");
assertEquals(expected, actual);
}
public void assertEntailed(String expected, String text) {
boolean found = false;
Collection<SentenceFragment> extractions = annotate(text).get(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
for (SentenceFragment extraction : extractions) {
if (extraction.toString().equals(expected)) {
found = true;
}
}
assertTrue("The sentence '" + expected + "' was not entailed from '" + text + "'", found);
}
@Test
public void testAnnotatorRuns() {
annotate("all cats have tails");
}
@Test
public void testBasicEntailments() {
assertEntailed("some cats have tails", "some blue cats have tails");
assertEntailed("blue cats have tails", "some blue cats have tails");
assertEntailed("cats have tails", "some blue cats have tails");
}
@Test
public void testBasicExtractions() {
assertExtracted("cats\thave\ttails", "some cats have tails");
}
@Test
public void testPaperExamples() {
// assertExtracted("Fish\tlike to\tswim", "Fish like to swim"); // Parse is persistently broken
// assertExtracted("Tom\tfighting\tJerry", "Tom and Jerry are fighting."); // Collapsed CC Processed tree incorrectly adds nsubj from fighting->jrry
assertExtracted("cats\tis with\ttails", "There are cats with tails.");
assertExtracted("IBM\thas\tresearch group", "IBM's research group.");
assertExtracted("rabbits\teat\tvegetables", "All rabbits eat vegetables.");
}
@Test
public void testOtherExamples() {
// Preconj (but, parser currently fails)
// assertExtracted("Mary\tis\tbeautiful", "Mary is both beautiful and smart.");
// assertExtracted(Collections.EMPTY_SET, "Mary is neither beautiful and smart.");
}
@Test
public void testExtractionsGeorgeBoyd() {
assertExtracted(new HashSet<String>() {{
add("George Boyd\tjoined on\t21 february 2013");
add("George Boyd\tjoined for\tremainder");
add("George Boyd\tjoined for\tremainder of season");
add("George Boyd\tjoined on\tloan");
add("George Boyd\tjoined from\tpeterborough united");
}}, "On 21 February 2013 George Boyd joined on loan from Peterborough United for the remainder of the season.");
}
@Test
public void testExtractionsObamaWikiOne() {
assertExtracted(new HashSet<String>() {{
add("Barack Hussein Obama II\tis 44th and current President of\tUnited States");
add("Barack Hussein Obama II\tis 44th President of\tUnited States");
// add("Barack Hussein Obama II\tis current President of\tUnited States");
add("Barack Hussein Obama II\tis President of\tUnited States");
add("Barack Hussein Obama II\tis\tPresident");
// add("Barack Hussein Obama II\tis\tcurrent President");
add("Barack Hussein Obama II\tis\t44th President");
}}, "Barack Hussein Obama II is the 44th and current President of the United States, and the first African American to hold the office.");
}
@Test
@Ignore // TODO(gabor) dependency parse error.
public void testExtractionsObamaWikiTwo() {
assertExtracted(new HashSet<String>() {{
add("Obama\tis graduate of\tColumbia University");
// add("Obama\tis graduate of\tHarvard Law School");
add("Obama\tborn in\tHawaii");
add("Obama\tborn in\tHonolulu");
add("he\tserved as\tpresident of Harvard Law Review");
add("he\tserved as\tpresident");
add("he\tpresident of\tHarvard Law Review");
add("Obama\tis\tgraduate");
}}, "Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he served as president of the Harvard Law Review.");
}
@Test
@Ignore // TODO(gabor) why does this fail? [2016-06-07]
public void testExtractionsObamaWikiThree() {
assertExtracted(new HashSet<String>() {{
add("He\twas\tcommunity organizer in Chicago");
add("He\twas\tcommunity organizer");
// add("He\tearning\tlaw degree");
add("He\tearning\this law degree");
add("community organizer\tis in\tChicago");
}}, "He was a community organizer in Chicago before earning his law degree.");
}
@Test
public void testExtractionsObamaWikiFour() {
assertExtracted(new HashSet<String>() {{
add("He\tworked as\tcivil rights attorney");
add("He\tworked as\trights attorney");
add("He\ttaught\tconstitutional law");
add("He\ttaught\tlaw");
add("He\ttaught law at\tUniversity of Chicago");
add("He\ttaught law at_time\tLaw School");
// add("He\ttaught law at\tUniversity of Chicago Law School from 1992");
add("He\ttaught law at\tUniversity");
add("He\ttaught law from\t1992 to 2004"); // shouldn't be here, but sometimes appears?
}}, "He worked as a civil rights attorney and taught constitutional law at the University of Chicago Law School from 1992 to 2004.");
}
@Test
public void testExtractionsObamaWikiFive() {
assertExtracted(new HashSet<String>() {{
add("He\tserved\tthree terms");
// note[gabor] Should get these
// add("He\trepresenting\t13th District in Illinois Senate");
// add("He\trepresenting\t13th District");
// add("He\trepresenting\tDistrict in Illinois Senate");
// add("He\trepresenting\tDistrict");
// add("He\trunning unsuccessfully for\tUnited States House of Representatives in 2000");
add("13th district\tis in\tIllinois Senate");
add("He\trunning unsuccessfully for\tUnited States House of Representatives");
add("He\trunning unsuccessfully for\tUnited States House");
// add("He\trunning for\tUnited States House of Representatives in 2000");
add("He\trunning for\tUnited States House of Representatives");
add("He\trunning for\tUnited States House");
add("He\trunning in\t2000");
add("He\trunning unsuccessfully in\t2000");
}}, "He served three terms representing the 13th District in the Illinois Senate from 1997 to 2004, running unsuccessfully for the United States House of Representatives in 2000.");
}
@Test
public void testExtractionsObamaWikiSix() {
assertExtracted(new HashSet<String>() {{
add("He\tdefeated\tRepublican nominee John McCain");
add("He\tdefeated\tnominee John McCain");
add("He\tdefeated nominee John McCain in\telection");
add("He\tdefeated nominee John McCain in\tgeneral election");
add("He\twas\tinaugurated as president on January 20 2009");
add("He\twas inaugurated as\tpresident");
add("He\twas\tinaugurated");
add("He\twas inaugurated on\tJanuary 20 2009");
// note[gabor] these are wrong!
// add("nominee john mccain\twas\tinaugurated");
// add("nominee john mccain\twas inaugurated as\tpresident");
// add("nominee john mccain\twas inaugurated as\tpresident on january 20 2009");
// add("Republican nominee John McCain\twas\tinaugurated");
// add("Republican nominee John McCain\twas inaugurated as\tpresident");
// add("Republican nominee John McCain\twas inaugurated as\tpresident on january 20 2009");
// note[gabor] end wrong extractions
}}, "He then defeated Republican nominee John McCain in the general election, and was inaugurated as president on January 20, 2009.");
}
@Test
public void testThereIsNoDoubt() {
assertExtracted(new HashSet<String>() {{
add("I\tam\tgreat");
}}, "There is no doubt that I am great.");
assertExtracted(new HashSet<String>() {{
add("cats\thave\ttails");
}}, "There is no doubt that cats have tails.");
assertExtracted(new HashSet<String>() {{
}}, "There is doubt that cats have tails.");
}
@Test
public void testChessIsNotAPhysicalSport() {
assertExtracted(new HashSet<String>() {{
add("Chess\tis\tphysical");
add("Chess\tis\tphysical sport");
add("Chess\tis\tsport");
}}, "Chess is a physical sport");
assertExtracted(new HashSet<String>() {{
}}, "Chess is not a physical sport");
}
@Test
public void dummyTest() {
assertTrue(true);
}
}