package edu.stanford.nlp.coref;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.Calendar;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Logger;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.coref.data.CorefCluster;
import edu.stanford.nlp.coref.data.Dictionaries;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.data.DocumentMaker;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
/**
* Class for running coreference algorithms
* @author Kevin Clark
*/
public class CorefSystem {
private final DocumentMaker docMaker;
private final CorefAlgorithm corefAlgorithm;
private final boolean removeSingletonClusters;
private final boolean verbose;
public CorefSystem(Properties props) {
try {
Dictionaries dictionaries = new Dictionaries(props);
docMaker = new DocumentMaker(props, dictionaries);
corefAlgorithm = CorefAlgorithm.fromProps(props, dictionaries);
removeSingletonClusters = CorefProperties.removeSingletonClusters(props);
verbose = CorefProperties.verbose(props);
} catch (Exception e) {
throw new RuntimeException("Error initializing coref system", e);
}
}
public CorefSystem(DocumentMaker docMaker, CorefAlgorithm corefAlgorithm,
boolean removeSingletonClusters, boolean verbose) {
this.docMaker = docMaker;
this.corefAlgorithm = corefAlgorithm;
this.removeSingletonClusters = removeSingletonClusters;
this.verbose = verbose;
}
public void annotate(Annotation ann) {
Document document;
try {
document = docMaker.makeDocument(ann);
} catch (Exception e) {
throw new RuntimeException("Error making document", e);
}
CorefUtils.checkForInterrupt();
corefAlgorithm.runCoref(document);
if (removeSingletonClusters) {
CorefUtils.removeSingletonClusters(document);
}
CorefUtils.checkForInterrupt();
Map<Integer, CorefChain> result = Generics.newHashMap();
for (CorefCluster c : document.corefClusters.values()) {
result.put(c.clusterID, new CorefChain(c, document.positions));
}
ann.set(CorefCoreAnnotations.CorefChainAnnotation.class, result);
}
public void runOnConll(Properties props) throws Exception {
String baseName = CorefProperties.conllOutputPath(props) +
Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
String goldOutput = baseName + ".gold.txt";
String beforeCorefOutput = baseName + ".predicted.txt";
String afterCorefOutput = baseName + ".coref.predicted.txt";
PrintWriter writerGold = new PrintWriter(new FileOutputStream(goldOutput));
PrintWriter writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
PrintWriter writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
(new CorefDocumentProcessor() {
@Override
public void process(int id, Document document) {
writerGold.print(CorefPrinter.printConllOutput(document, true));
writerBeforeCoref.print(CorefPrinter.printConllOutput(document, false));
long time = System.currentTimeMillis();
corefAlgorithm.runCoref(document);
if (verbose) {
Redwood.log(getName(), "Coref took "
+ (System.currentTimeMillis() - time) / 1000.0 + "s");
}
CorefUtils.removeSingletonClusters(document);
writerAfterCoref.print(CorefPrinter.printConllOutput(document, false, true));
}
@Override
public void finish() throws Exception {}
@Override
public String getName() {
return corefAlgorithm.getClass().getName();
}
}).run(docMaker);
Logger logger = Logger.getLogger(CorefSystem.class.getName());
String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props),
goldOutput, beforeCorefOutput);
CorefScorer.printScoreSummary(summary, logger, false);
summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput,
afterCorefOutput);
CorefScorer.printScoreSummary(summary, logger, true);
CorefScorer.printFinalConllScore(summary);
writerGold.close();
writerBeforeCoref.close();
writerAfterCoref.close();
}
public static void main(String[] args) throws Exception {
Properties props = StringUtils.argsToProperties(args);
CorefSystem coref = new CorefSystem(props);
coref.runOnConll(props);
}
}