package edu.stanford.nlp.coref;
import java.util.Properties;
import edu.stanford.nlp.coref.data.Dictionaries;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.data.DocumentMaker;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.logging.Redwood;
/**
* An interface for classes that iterate through coreference documents and process them one by one.
* @author Kevin Clark
*/
public interface CorefDocumentProcessor {
public void process(int id, Document document);
public void finish() throws Exception;
public default String getName() {
return this.getClass().getName();
}
public default void run(Properties props, Dictionaries dictionaries) throws Exception {
run(new DocumentMaker(props, dictionaries));
}
public default void runFromScratch(Properties props, Dictionaries dictionaries)
throws Exception {
// Some annotators produce slightly different outputs when running over the same input data
// twice. Here we first clear annotator pool to avoid this.
StanfordCoreNLP.clearAnnotatorPool();
run(new DocumentMaker(props, dictionaries));
}
public default void run(DocumentMaker docMaker) throws Exception {
Redwood.hideChannelsEverywhere("debug-mention", "debug-preprocessor", "debug-docreader",
"debug-md");
int docId = 0;
Document document = docMaker.nextDoc();
long time = System.currentTimeMillis();
while (document != null) {
process(docId, document);
Redwood.log(getName(), "Processed document " + docId + " in "
+ (System.currentTimeMillis() - time) / 1000.0 + "s");
time = System.currentTimeMillis();
docId++;
document = docMaker.nextDoc();
}
finish();
}
}