package edu.stanford.nlp.quoteattribution; import edu.stanford.nlp.classify.Classifier; import edu.stanford.nlp.classify.GeneralDataset; import edu.stanford.nlp.classify.LinearClassifierFactory; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.RVFDatum; import edu.stanford.nlp.io.*; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator; import edu.stanford.nlp.quoteattribution.Sieves.Sieve; import edu.stanford.nlp.quoteattribution.Sieves.training.SupervisedSieveTraining; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Pair; import edu.stanford.nlp.util.*; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.ObjectInputStream; import java.util.List; /** * Created by michaelf on 3/31/16. * */ public class ExtractQuotesClassifier { boolean verbose = true; private Classifier<String, String> quoteToMentionClassifier; public ExtractQuotesClassifier(GeneralDataset<String, String> trainingSet) { LinearClassifierFactory<String, String> lcf = new LinearClassifierFactory<>(); quoteToMentionClassifier = lcf.trainClassifier(trainingSet); } public ExtractQuotesClassifier(String modelPath) { try { ObjectInputStream si = IOUtils.readStreamFromString(modelPath); quoteToMentionClassifier = (Classifier<String, String>) si.readObject(); si.close(); } catch (FileNotFoundException e) { e.printStackTrace(); throw new RuntimeException(); } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException(); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(); } } public Classifier<String, String> getClassifier() { return quoteToMentionClassifier; } public void scoreBestMentionNew(SupervisedSieveTraining.FeaturesData fd, Annotation doc) { List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class); for(int i = 0; i < quotes.size(); i++) { CoreMap quote = quotes.get(i); if (quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) { continue; } double maxConfidence = 0; int maxDataIdx = -1; int goldDataIdx = -1; Pair<Integer, Integer> dataRange = fd.mapQuoteToDataRange.get(i); if(dataRange == null) { continue; } else { for(int dataIdx = dataRange.first; dataIdx <= dataRange.second; dataIdx++) { RVFDatum<String, String> datum = fd.dataset.getRVFDatum(dataIdx); double isMentionConfidence = quoteToMentionClassifier.scoresOf(datum).getCount("isMention"); if(isMentionConfidence > maxConfidence) { maxConfidence = isMentionConfidence; maxDataIdx = dataIdx; } } if(maxDataIdx != -1) { Sieve.MentionData mentionData = fd.mapDatumToMention.get(maxDataIdx); if(mentionData.type.equals("animate noun")) continue; quote.set(QuoteAttributionAnnotator.MentionAnnotation.class, mentionData.text); quote.set(QuoteAttributionAnnotator.MentionBeginAnnotation.class, mentionData.begin); quote.set(QuoteAttributionAnnotator.MentionEndAnnotation.class, mentionData.end); quote.set(QuoteAttributionAnnotator.MentionTypeAnnotation.class, mentionData.type); quote.set(QuoteAttributionAnnotator.MentionSieveAnnotation.class, "supervised"); } } } } }