package edu.stanford.nlp.pipeline;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
import java.util.Set;
import edu.stanford.nlp.coref.CorefCoreAnnotations;
import edu.stanford.nlp.coref.CorefProperties;
import edu.stanford.nlp.coref.data.Dictionaries;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.coref.md.CorefMentionFinder;
import edu.stanford.nlp.coref.md.DependencyCorefMentionFinder;
import edu.stanford.nlp.coref.md.HybridCorefMentionFinder;
import edu.stanford.nlp.coref.md.RuleBasedCorefMentionFinder;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.SemanticHeadFinder;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.trees.international.pennchinese.ChineseSemanticHeadFinder;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.logging.Redwood;
/**
* This class adds mention information to an Annotation.
*
* After annotation each sentence will have a List<Mention> representing the Mentions in the sentence
*
* the List<Mention> containing the Mentions will be put under the annotation
* {@link edu.stanford.nlp.coref.CorefCoreAnnotations.CorefMentionsAnnotation}.
*
* @author heeyoung
* @author Jason Bolton
*/
public class MentionAnnotator extends TextAnnotationCreator implements Annotator {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(MentionAnnotator.class);
HeadFinder headFinder;
CorefMentionFinder md;
String mdName;
Dictionaries dictionaries;
Properties corefProperties;
Set<Class<? extends CoreAnnotation>> mentionAnnotatorRequirements = new HashSet<>();
public MentionAnnotator(Properties props) {
try {
corefProperties = props;
//System.out.println("corefProperties: "+corefProperties);
dictionaries = new Dictionaries(props);
//System.out.println("got dictionaries");
headFinder = getHeadFinder(props);
//System.out.println("got head finder");
md = getMentionFinder(props, headFinder);
log.info("Using mention detector type: "+mdName);
mentionAnnotatorRequirements.addAll(Arrays.asList(
CoreAnnotations.TokensAnnotation.class,
CoreAnnotations.SentencesAnnotation.class,
CoreAnnotations.PartOfSpeechAnnotation.class,
CoreAnnotations.NamedEntityTagAnnotation.class,
CoreAnnotations.IndexAnnotation.class,
CoreAnnotations.TextAnnotation.class,
CoreAnnotations.ValueAnnotation.class,
SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class,
SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class
));
} catch (Exception e) {
e.printStackTrace();
log.info("Error with building coref mention annotator!");
}
}
@Override
public void annotate(Annotation annotation) {
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
// TO DO: be careful, this could introduce a really hard to find bug
// this is necessary for Chinese coreference
// removeNested needs to be set to "false" for newswire text or big performance drop
String docID = annotation.get(CoreAnnotations.DocIDAnnotation.class);
if (docID == null) {
docID = "";
}
if (docID.contains("nw") && (CorefProperties.conll(corefProperties)
|| corefProperties.getProperty("coref.input.type", "raw").equals("conll")) &&
CorefProperties.getLanguage(corefProperties) == Locale.CHINESE &&
PropertiesUtils.getBool(corefProperties,"coref.specialCaseNewswire")) {
corefProperties.setProperty("removeNestedMentions", "false");
} else {
corefProperties.setProperty("removeNestedMentions", "true");
}
List<List<Mention>> mentions = md.findMentions(annotation, dictionaries, corefProperties);
int mentionIndex = 0;
int currIndex = 0;
for (CoreMap sentence : sentences) {
List<Mention> mentionsForThisSentence = mentions.get(currIndex);
sentence.set(CorefCoreAnnotations.CorefMentionsAnnotation.class, mentionsForThisSentence);
// increment to next list of mentions
currIndex++;
// assign latest mentionID
for (Mention m : mentionsForThisSentence) {
m.mentionID = mentionIndex;
mentionIndex++;
}
}
}
private static HeadFinder getHeadFinder(Properties props) {
Locale lang = CorefProperties.getLanguage(props);
if(lang == Locale.ENGLISH) return new SemanticHeadFinder();
else if(lang == Locale.CHINESE) return new ChineseSemanticHeadFinder();
else {
throw new RuntimeException("Invalid language setting: cannot load HeadFinder");
}
}
private CorefMentionFinder getMentionFinder(Properties props, HeadFinder headFinder)
throws ClassNotFoundException, IOException {
switch (CorefProperties.mdType(props)) {
case DEPENDENCY:
mdName = "dependency";
return new DependencyCorefMentionFinder(props);
case HYBRID:
mdName = "hybrid";
mentionAnnotatorRequirements.add(TreeCoreAnnotations.TreeAnnotation.class);
mentionAnnotatorRequirements.add(CoreAnnotations.BeginIndexAnnotation.class);
mentionAnnotatorRequirements.add(CoreAnnotations.EndIndexAnnotation.class);
return new HybridCorefMentionFinder(headFinder, props);
case RULE:
default:
mentionAnnotatorRequirements.add(TreeCoreAnnotations.TreeAnnotation.class);
mentionAnnotatorRequirements.add(CoreAnnotations.BeginIndexAnnotation.class);
mentionAnnotatorRequirements.add(CoreAnnotations.EndIndexAnnotation.class);
mdName = "rule";
return new RuleBasedCorefMentionFinder(headFinder, props);
}
}
@Override
public Set<Class<? extends CoreAnnotation>> requires() {
return mentionAnnotatorRequirements;
}
@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
CorefCoreAnnotations.CorefMentionsAnnotation.class,
CoreAnnotations.ParagraphAnnotation.class,
CoreAnnotations.SpeakerAnnotation.class,
CoreAnnotations.UtteranceAnnotation.class
)));
}
}