package edu.stanford.nlp.pipeline; import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import edu.stanford.nlp.util.StringUtils; import java.util.*; import junit.framework.TestCase; import org.junit.Test; /** The purpose of this test is to check all flavors of coreference work * when integrated with the CorefAnnotator. */ public class CorefAnnotatorSanityITest extends TestCase { public StanfordCoreNLP pipeline; public String englishDoc = "Barack Obama is the president of the United States. " + "He was elected in 2008. " + "Over the course of the election, Obama inspired many young voters."; public String englishCorefResult = "(2,1,[1,2]) -> (1,2,[1,3]), that is: \"He\" -> \"Barack Obama\"\n" + "(3,8,[8,9]) -> (1,2,[1,3]), that is: \"Obama\" -> \"Barack Obama\""; public String chineseDoc = "巴拉克·奥巴马是美国总统。他在2008年当选"; public String chineseCorefResult = "(2,1,[1,2]) -> (1,1,[1,2]), that is: \"他\" -> \"巴拉克·奥巴马\""; // helper to print out coref chains public String getCorefChainString(Map<Integer, CorefChain> corefChains) { String returnString = ""; if (corefChains != null) { for (CorefChain chain : corefChains.values()) { CorefChain.CorefMention representative = chain.getRepresentativeMention(); boolean outputHeading = false; for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) { if (mention == representative) continue; /*if (!outputHeading) { outputHeading = true; System.err.println("Coreference set:"); }*/ // all offsets start at 1! String corefResultString = String.format("(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative.endIndex, mention.mentionSpan, representative.mentionSpan); returnString += corefResultString; } } } return returnString.trim(); } @Test public void testStatisticalEnglishSlow() { // build pipeline Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref"); props.setProperty("coref.algorithm", "clustering"); props.setProperty("coref.md.type", "rule"); pipeline = new StanfordCoreNLP(props); // build annotation Annotation annotation = new Annotation(englishDoc); // annotate pipeline.annotate(annotation); // check coref chains make sense Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); assertEquals(getCorefChainString(corefChains),englishCorefResult); } @Test public void testStatisticalEnglishFast() { // build pipeline Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse,mention,coref"); props.setProperty("coref.algorithm", "statistical"); props.setProperty("coref.md.type", "dependency"); pipeline = new StanfordCoreNLP(props); // build annotation Annotation annotation = new Annotation(englishDoc); // annotate pipeline.annotate(annotation); // check coref chains make sense Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); assertEquals(getCorefChainString(corefChains),englishCorefResult); } @Test public void testNeuralEnglish() { // build pipeline Properties props = new Properties(); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref"); props.setProperty("coref.algorithm", "neural"); props.setProperty("coref.md.type", "rule"); pipeline = new StanfordCoreNLP(props); // build annotation Annotation annotation = new Annotation(englishDoc); // annotate pipeline.annotate(annotation); // check coref chains make sense Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); assertEquals(getCorefChainString(corefChains), englishCorefResult); } @Test public void testHybridChinese() { // build pipeline Properties props = StringUtils.argsToProperties("-props", "StanfordCoreNLP-chinese.properties"); pipeline = new StanfordCoreNLP(props); // build annotation Annotation annotation = new Annotation(chineseDoc); // annotate pipeline.annotate(annotation); // check coref chains make sense Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); assertEquals(getCorefChainString(corefChains), chineseCorefResult); } @Test public void testNeuralChinese() { // build pipeline Properties props = StringUtils.argsToProperties("-props", "StanfordCoreNLP-chinese.properties"); props.setProperty("coref.algorithm", "neural"); props.setProperty("coref.md.liberalChineseMD", "true"); pipeline = new StanfordCoreNLP(props); // build annotation Annotation annotation = new Annotation(chineseDoc); // annotate pipeline.annotate(annotation); // check coref chains make sense Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); assertEquals(getCorefChainString(corefChains), chineseCorefResult); } }