CoreUtilities.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.ling;

import java.util.ArrayList;
import java.util.List;

import edu.stanford.nlp.util.CoreMap;

public class CoreUtilities {

  private CoreUtilities() { } // class of static methods


  /**
   * Pieces a List of CoreMaps back together using
   * word and setting a white space between each word
   * TODO: remove this (SentenceUtils.listToString does the same thing - why 2 separate classes)
   */
  public static String toSentence(List<? extends CoreMap> sentence) {
    StringBuilder text = new StringBuilder();
    for (int i = 0, sz = sentence.size(); i < sz; i++) {
      CoreMap iw = sentence.get(i);
      text.append(iw.get(CoreAnnotations.TextAnnotation.class));
      if (i < sz - 1) {
        text.append(' ');
      }
    }
    return text.toString();
  }

  public static List<CoreLabel> deepCopy(List<CoreLabel> tokens) {
    List<CoreLabel> copy = new ArrayList<>();
    for (CoreLabel ml : tokens) {
      CoreLabel ml1 = new CoreLabel(ml);  // copy the labels
      copy.add(ml1);
    }
    return copy;
  }

  public static List<CoreLabel> toCoreLabelList(String... words) {
    List<CoreLabel> tokens = new ArrayList<>(words.length);
    for (String word : words) {
      CoreLabel cl = new CoreLabel();
      cl.setWord(word);
      tokens.add(cl);
    }
    return tokens;
  }

  public static List<CoreLabel> toCoreLabelList(List<String> words) {
    List<CoreLabel> tokens = new ArrayList<>(words.size());
    for (String word : words) {
      CoreLabel cl = new CoreLabel();
      cl.setWord(word);
      tokens.add(cl);
    }
    return tokens;
  }

  public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags) {
    assert tags.length == words.length;
    List<CoreLabel> tokens = new ArrayList<>(words.length);
    for (int i = 0, sz = words.length; i < sz; i++) {
      CoreLabel cl = new CoreLabel();
      cl.setWord(words[i]);
      cl.setTag(tags[i]);
      tokens.add(cl);
    }
    return tokens;
  }

  public static List<CoreLabel> toCoreLabelListWithCharacterOffsets(String[] words, String[] tags) {
    assert tags.length == words.length;
    List<CoreLabel> tokens = new ArrayList<>(words.length);
    int offset = 0;
    for (int i = 0, sz = words.length; i < sz; i++) {
      CoreLabel cl = new CoreLabel();
      cl.setWord(words[i]);
      cl.setTag(tags[i]);
      cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
      offset += words[i].length();
      cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
      offset++; // assume one space between words :-)
      tokens.add(cl);
    }
    return tokens;
  }

  public static List<CoreLabel> toCoreLabelList(String[] words,
                                                String[] tags,
                                                String[] answers) {
    assert tags.length == words.length;
    assert answers.length == words.length;
    List<CoreLabel> tokens = new ArrayList<>(words.length);
    for (int i = 0, sz = words.length; i < sz; i++) {
      CoreLabel cl = new CoreLabel();
      cl.setWord(words[i]);
      cl.setTag(tags[i]);
      cl.set(CoreAnnotations.AnswerAnnotation.class, answers[i]);
      tokens.add(cl);
    }
    return tokens;
  }

}