CorefPrinter.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.coref;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;

/**
 * Class for printing out coreference output.
 * @author Heeyoung Lee
 * @author Kevin Clark
 */
public class CorefPrinter {
  public static String printConllOutput(Document document, boolean gold) {
    return printConllOutput(document, gold, false);
  }

  public static String printConllOutput(Document document, boolean gold, boolean filterSingletons) {
    List<List<Mention>> orderedMentions = gold ? document.goldMentions : document.predictedMentions;
    if (filterSingletons) {
      orderedMentions = orderedMentions.stream().map(
          ml -> ml.stream().filter(m -> document.corefClusters.get(m.corefClusterID) != null &&
            document.corefClusters.get(m.corefClusterID).size() > 1)
            .collect(Collectors.toList()))
          .collect(Collectors.toList());
    }
    return CorefPrinter.printConllOutput(document, orderedMentions, gold);
  }

  public static String printConllOutput(Document document,
      List<List<Mention>> orderedMentions, boolean gold) {
    Annotation anno = document.annotation;
    List<List<String[]>> conllDocSentences = document.conllDoc.sentenceWordLists;
    String docID = anno.get(CoreAnnotations.DocIDAnnotation.class);
    StringBuilder sb = new StringBuilder();
    sb.append("#begin document ").append(docID).append("\n");
    List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
    for(int sentNum = 0 ; sentNum < sentences.size() ; sentNum++){
      List<CoreLabel> sentence = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
      List<String[]> conllSentence = conllDocSentences.get(sentNum);
      Map<Integer,Set<Mention>> mentionBeginOnly = Generics.newHashMap();
      Map<Integer,Set<Mention>> mentionEndOnly = Generics.newHashMap();
      Map<Integer,Set<Mention>> mentionBeginEnd = Generics.newHashMap();

      for(int i=0 ; i<sentence.size(); i++){
        mentionBeginOnly.put(i, new LinkedHashSet<>());
        mentionEndOnly.put(i, new LinkedHashSet<>());
        mentionBeginEnd.put(i, new LinkedHashSet<>());
      }

      for(Mention m : orderedMentions.get(sentNum)) {
        if(m.startIndex==m.endIndex-1) {
          mentionBeginEnd.get(m.startIndex).add(m);
        } else {
          mentionBeginOnly.get(m.startIndex).add(m);
          mentionEndOnly.get(m.endIndex-1).add(m);
        }
      }

      for(int i=0 ; i<sentence.size(); i++){
        StringBuilder sb2 = new StringBuilder();
        for(Mention m : mentionBeginOnly.get(i)){
          if (sb2.length() > 0) {
            sb2.append("|");
          }
          int corefClusterId = (gold)? m.goldCorefClusterID:m.corefClusterID;
          sb2.append("(").append(corefClusterId);
        }
        for(Mention m : mentionBeginEnd.get(i)){
          if (sb2.length() > 0) {
            sb2.append("|");
          }
          int corefClusterId = (gold)? m.goldCorefClusterID:m.corefClusterID;
          sb2.append("(").append(corefClusterId).append(")");
        }
        for(Mention m : mentionEndOnly.get(i)){
          if (sb2.length() > 0) {
            sb2.append("|");
          }
          int corefClusterId = (gold)? m.goldCorefClusterID:m.corefClusterID;
          sb2.append(corefClusterId).append(")");
        }
        if(sb2.length() == 0) sb2.append("-");

        String[] columns = conllSentence.get(i);
        for(int j = 0 ; j < columns.length-1 ; j++){
          String column = columns[j];
          sb.append(column).append("\t");
        }
        sb.append(sb2).append("\n");
      }
      sb.append("\n");
    }

    sb.append("#end document").append("\n");

    return sb.toString();
  }
}