package edu.stanford.nlp.pipeline; import java.util.List; import java.util.Map; import java.util.Properties; import org.junit.Assert; import junit.framework.TestCase; import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import edu.stanford.nlp.coref.data.CorefChain.CorefMention; import edu.stanford.nlp.dcoref.Constants; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.util.CoreMap; public class DeterministicCorefAnnotatorITest extends TestCase { private static AnnotationPipeline pipeline; @Override public void setUp() throws Exception { synchronized(DeterministicCorefAnnotatorITest.class) { pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new MorphaAnnotator(false)); pipeline.addAnnotator(new NERCombinerAnnotator(false)); pipeline.addAnnotator(new ParserAnnotator(false, -1)); Properties corefProps = new Properties(); corefProps.setProperty(Constants.DEMONYM_PROP, DefaultPaths.DEFAULT_DCOREF_DEMONYM); corefProps.setProperty(Constants.ANIMATE_PROP, DefaultPaths.DEFAULT_DCOREF_ANIMATE); corefProps.setProperty(Constants.INANIMATE_PROP, DefaultPaths.DEFAULT_DCOREF_INANIMATE); pipeline.addAnnotator(new DeterministicCorefAnnotator(corefProps)); } } public void testDeterministicCorefAnnotator() throws Exception { // create annotation with text String text = "Dan Ramage is working for\nMicrosoft. He's in Seattle!\nAt least, he used to be. Ed is not in Seattle."; Annotation document = new Annotation(text); // annotate text with pipeline pipeline.annotate(document); // test CorefGraphAnnotation Map<Integer, CorefChain> corefChains = document.get(CorefCoreAnnotations.CorefChainAnnotation.class); Assert.assertNotNull(corefChains); // test chainID = m.corefClusterID for (int chainID : corefChains.keySet()) { CorefChain c = corefChains.get(chainID); for (CorefMention m : c.getMentionsInTextualOrder()) { Assert.assertEquals(m.corefClusterID, chainID); } } // test CorefClusterIdAnnotation List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); CoreLabel ramageToken = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(1); CoreLabel heToken = sentences.get(1).get(CoreAnnotations.TokensAnnotation.class).get(0); Integer ramageClusterId = ramageToken.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); Assert.assertNotNull(ramageClusterId); Assert.assertSame(ramageClusterId, heToken.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class)); } /** * Tests named entities with exact string matches (also tests some more pronouns). * @throws Exception */ public void testSameString() throws Exception { // create annotation with text String text = "Your mom thinks she lives in Denver, but it's a big city. She actually lives outside of Denver."; Annotation document = new Annotation(text); // annotate text with pipeline pipeline.annotate(document); // test CorefChainAnnotation Map<Integer, CorefChain> chains = document.get(CorefCoreAnnotations.CorefChainAnnotation.class); Assert.assertNotNull(chains); // test CorefGraphAnnotation // List<Pair<IntTuple, IntTuple>> graph = document.get(CorefCoreAnnotations.CorefGraphAnnotation.class); // Assert.assertNotNull(graph); // for( Pair<IntTuple, IntTuple> pair : graph ) { // System.out.println("pair " + pair); // } // test chainID = m.corefClusterID for (int chainID : chains.keySet()) { CorefChain c = chains.get(chainID); for (CorefMention m : c.getMentionsInTextualOrder()) { Assert.assertEquals(m.corefClusterID, chainID); } } // test CorefClusterIdAnnotation List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); CoreLabel yourMomsToken = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(1); CoreLabel sheToken1 = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(3); CoreLabel sheToken2 = sentences.get(1).get(CoreAnnotations.TokensAnnotation.class).get(0); CoreLabel denverToken1 = sentences.get(0).get(CoreAnnotations.TokensAnnotation.class).get(6); CoreLabel denverToken2 = sentences.get(1).get(CoreAnnotations.TokensAnnotation.class).get(5); Integer yourMomsClusterId = yourMomsToken.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); Integer she1ClusterId = sheToken1.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); Integer she2ClusterId = sheToken2.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); Integer denver1ClusterId = denverToken1.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); Integer denver2ClusterId = denverToken2.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); Assert.assertNotNull(yourMomsClusterId); Assert.assertNotNull(she1ClusterId); Assert.assertNotNull(she2ClusterId); Assert.assertNotNull(denver1ClusterId); Assert.assertNotNull(denver2ClusterId); Assert.assertSame(yourMomsClusterId, she1ClusterId); Assert.assertSame(yourMomsClusterId, she2ClusterId); Assert.assertSame(denver1ClusterId, denver2ClusterId); Assert.assertNotSame(yourMomsClusterId, denver1ClusterId); // test CorefClusterAnnotation // Assert.assertEquals(yourMomsToken.get(CorefCoreAnnotations.CorefClusterAnnotation.class), sheToken1.get(CorefCoreAnnotations.CorefClusterAnnotation.class)); // Assert.assertEquals(yourMomsToken.get(CorefCoreAnnotations.CorefClusterAnnotation.class), sheToken2.get(CorefCoreAnnotations.CorefClusterAnnotation.class)); // Assert.assertEquals(denverToken1.get(CorefCoreAnnotations.CorefClusterAnnotation.class), denverToken2.get(CorefCoreAnnotations.CorefClusterAnnotation.class)); } public static void main(String[] args) throws Exception { DeterministicCorefAnnotatorITest itest = new DeterministicCorefAnnotatorITest(); itest.testDeterministicCorefAnnotator(); } }