package edu.stanford.nlp.coref;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
/**
* Class for printing out coreference output.
* @author Heeyoung Lee
* @author Kevin Clark
*/
public class CorefPrinter {
public static String printConllOutput(Document document, boolean gold) {
return printConllOutput(document, gold, false);
}
public static String printConllOutput(Document document, boolean gold, boolean filterSingletons) {
List<List<Mention>> orderedMentions = gold ? document.goldMentions : document.predictedMentions;
if (filterSingletons) {
orderedMentions = orderedMentions.stream().map(
ml -> ml.stream().filter(m -> document.corefClusters.get(m.corefClusterID) != null &&
document.corefClusters.get(m.corefClusterID).size() > 1)
.collect(Collectors.toList()))
.collect(Collectors.toList());
}
return CorefPrinter.printConllOutput(document, orderedMentions, gold);
}
public static String printConllOutput(Document document,
List<List<Mention>> orderedMentions, boolean gold) {
Annotation anno = document.annotation;
List<List<String[]>> conllDocSentences = document.conllDoc.sentenceWordLists;
String docID = anno.get(CoreAnnotations.DocIDAnnotation.class);
StringBuilder sb = new StringBuilder();
sb.append("#begin document ").append(docID).append("\n");
List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
for(int sentNum = 0 ; sentNum < sentences.size() ; sentNum++){
List<CoreLabel> sentence = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
List<String[]> conllSentence = conllDocSentences.get(sentNum);
Map<Integer,Set<Mention>> mentionBeginOnly = Generics.newHashMap();
Map<Integer,Set<Mention>> mentionEndOnly = Generics.newHashMap();
Map<Integer,Set<Mention>> mentionBeginEnd = Generics.newHashMap();
for(int i=0 ; i<sentence.size(); i++){
mentionBeginOnly.put(i, new LinkedHashSet<>());
mentionEndOnly.put(i, new LinkedHashSet<>());
mentionBeginEnd.put(i, new LinkedHashSet<>());
}
for(Mention m : orderedMentions.get(sentNum)) {
if(m.startIndex==m.endIndex-1) {
mentionBeginEnd.get(m.startIndex).add(m);
} else {
mentionBeginOnly.get(m.startIndex).add(m);
mentionEndOnly.get(m.endIndex-1).add(m);
}
}
for(int i=0 ; i<sentence.size(); i++){
StringBuilder sb2 = new StringBuilder();
for(Mention m : mentionBeginOnly.get(i)){
if (sb2.length() > 0) {
sb2.append("|");
}
int corefClusterId = (gold)? m.goldCorefClusterID:m.corefClusterID;
sb2.append("(").append(corefClusterId);
}
for(Mention m : mentionBeginEnd.get(i)){
if (sb2.length() > 0) {
sb2.append("|");
}
int corefClusterId = (gold)? m.goldCorefClusterID:m.corefClusterID;
sb2.append("(").append(corefClusterId).append(")");
}
for(Mention m : mentionEndOnly.get(i)){
if (sb2.length() > 0) {
sb2.append("|");
}
int corefClusterId = (gold)? m.goldCorefClusterID:m.corefClusterID;
sb2.append(corefClusterId).append(")");
}
if(sb2.length() == 0) sb2.append("-");
String[] columns = conllSentence.get(i);
for(int j = 0 ; j < columns.length-1 ; j++){
String column = columns[j];
sb.append(column).append("\t");
}
sb.append(sb2).append("\n");
}
sb.append("\n");
}
sb.append("#end document").append("\n");
return sb.toString();
}
}