package edu.stanford.nlp.coref.statistical; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; import edu.stanford.nlp.coref.CorefDocumentProcessor; import edu.stanford.nlp.coref.data.Dictionaries; import edu.stanford.nlp.coref.data.Document; import edu.stanford.nlp.io.IOUtils; import edu.stanford.nlp.util.Pair; /** * Runs feature extraction over coreference documents. * @author Kevin Clark */ public class FeatureExtractorRunner implements CorefDocumentProcessor { private final FeatureExtractor extractor; private final Compressor<String> compressor; private final Map<Integer, Map<Pair<Integer, Integer>, Boolean>> dataset; private final List<DocumentExamples> documents; public FeatureExtractorRunner(Properties props, Dictionaries dictionaries) { documents = new ArrayList<>(); compressor = new Compressor<>(); extractor = new FeatureExtractor(props, dictionaries, compressor); try { dataset = IOUtils.readObjectFromFile(StatisticalCorefTrainer.datasetFile); } catch(Exception e) { throw new RuntimeException("Error initializing FeatureExtractorRunner", e); } } @Override public void process(int id, Document document) { if (dataset.containsKey(id)) { documents.add(extractor.extract(id, document, dataset.get(id))); } } @Override public void finish() throws Exception { IOUtils.writeObjectToFile(documents, StatisticalCorefTrainer.extractedFeaturesFile); IOUtils.writeObjectToFile(compressor, StatisticalCorefTrainer.compressorFile); } }