DcorefExactOutputITest.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.dcoref;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import edu.stanford.nlp.coref.CorefCoreAnnotations;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.Generics;

import junit.framework.TestCase;

/**
 * Run the dcoref system on a particular input file from the DEFT
 * project.  Check that the output is an exact match for the output we
 * expect to get.
 * <br>
 * Expected results are represented in a data file in the source tree.
 * Rather than try to rebuild the CorefChain objects from the expected
 * results, we keep an internal class which represents them in a very
 * simple manner.  Also included are utility methods to rewrite the
 * expected results file if we change the sample input used.
 * <br>
 * Assuming the test file has not changed, the command line to rebuild
 * the expected output is
 * <br>
 * <code>
 * java edu.stanford.nlp.dcoref.DcorefExactOutputITest projects/core/data/edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm projects/core/data/edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.expectedcoref
 * </code>
 *
 * @author John Bauer
 */
public class DcorefExactOutputITest extends TestCase {
  static StanfordCoreNLP pipeline = null;

  @Override
  public void setUp() {
    synchronized (DcorefExactOutputITest.class) {
      if (pipeline == null) {
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
        pipeline = new StanfordCoreNLP(props);
      }
    }
  }

  static class ExpectedMention {
    int sentNum;
    String mentionSpan;

    ExpectedMention(String line) {
      String[] pieces = line.trim().split(" +", 2);
      sentNum = Integer.valueOf(pieces[0]);
      mentionSpan = pieces[1];
    }

    @Override
    public String toString() {
      return sentNum + ": " + mentionSpan;
    }
  }

  public static Map<Integer, List<ExpectedMention>> loadExpectedResults(String filename) throws IOException {
    Map<Integer, List<ExpectedMention>> results = Generics.newHashMap();

    int id = -1;
    List<String> mentionLines = new ArrayList<String>();
    for (String line : IOUtils.readLines(filename)) {
      if (line.trim().isEmpty()) {
        if (mentionLines.isEmpty()) {
          if (id != -1) {
            throw new RuntimeException("Found coref chain without any mentions, id " + id);
          }
          continue;
        }
        List<ExpectedMention> mentions = new ArrayList<ExpectedMention>();
        for (String mentionLine : mentionLines) {
          mentions.add(new ExpectedMention(mentionLine));
        }
        results.put(id, mentions);
        id = -1;
        mentionLines.clear();
        continue;
      }

      if (id == -1) {
        id = Integer.valueOf(line);
      } else {
        mentionLines.add(line.trim());
      }
    }

    return results;
  }

  public static void saveResults(String filename, Map<Integer, CorefChain> chains) throws IOException {
    FileWriter fout = new FileWriter(filename);
    BufferedWriter bout = new BufferedWriter(fout);

    List<Integer> keys = new ArrayList<Integer>(chains.keySet());
    Collections.sort(keys);

    for (Integer key : keys) {
      saveKey(bout, key, chains.get(key));
    }

    bout.flush();
    bout.close();
    fout.close();
  }

  public static void saveKey(BufferedWriter bout, Integer key, CorefChain chain) throws IOException {
    bout.write(key.toString());
    bout.newLine();
    for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
      bout.write(mention.sentNum + " " + mention.mentionSpan);
      bout.newLine();
    }
    bout.newLine();
  }

  public static boolean compareChain(List<ExpectedMention> expectedChain, CorefChain chain) {
    for (ExpectedMention expectedMention : expectedChain) {
      boolean found = false;
      for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
        if (mention.sentNum == expectedMention.sentNum && mention.mentionSpan.equals(expectedMention.mentionSpan)) {
          found = true;
          break;
        }
      }
      if (!found) return false;
    }
    return true;
  }

  public static void compareResults(Map<Integer, List<ExpectedMention>> expected, Map<Integer, CorefChain> chains) {
    assertEquals("Unexpected difference in number of chains", expected.size(), chains.size());

    // Note that we don't insist on the chain ID numbers being the same
    for (Map.Entry<Integer, List<ExpectedMention>> mapEntry : expected.entrySet()) {
      boolean found = false;
      List<ExpectedMention> expectedChain = mapEntry.getValue();
      for (CorefChain chain : chains.values()) {
        if (compareChain(expectedChain, chain)) {
          found = true;
          break;
        }
      }
      assertTrue("Could not find expected coref chain " + mapEntry.getKey() + " " + expectedChain + " in the results", found);
    }

    for (Map.Entry<Integer, CorefChain> integerCorefChainEntry : chains.entrySet()) {
      boolean found = false;
      CorefChain chain = integerCorefChainEntry.getValue();
      for (List<ExpectedMention> expectedChain : expected.values()) {
        if (compareChain(expectedChain, chain)) {
          found = true;
          break;
        }
      }
      assertTrue("Dcoref produced chain " + chain + " which was not in the expected results", found);
    }
  }

  public void testCoref() throws IOException {
    String doc = IOUtils.slurpFile("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm");
    Annotation annotation = pipeline.process(doc);
    Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    Map<Integer, List<ExpectedMention>> expected = loadExpectedResults("edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.expectedcoref");
    compareResults(expected, chains);
  }

  /**
   * If run as a program, writes the expected output of args[0] to args[1]
   */
  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Expected args <input> <output>");
      throw new IllegalArgumentException();
    }

    String input = args[0];
    String output = args[1];

    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // for example
    // "edu/stanford/nlp/dcoref/STILLALONEWOLF_20050102.1100.eng.LDC2005E83.sgm"
    String doc = IOUtils.slurpFile(input);
    Annotation annotation = pipeline.process(doc);
    Map<Integer, CorefChain> chains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    saveResults(output, chains);
  }

}