package edu.stanford.nlp.ling; import java.util.ArrayList; import java.util.List; import edu.stanford.nlp.util.CoreMap; public class CoreUtilities { private CoreUtilities() { } // class of static methods /** * Pieces a List of CoreMaps back together using * word and setting a white space between each word * TODO: remove this (SentenceUtils.listToString does the same thing - why 2 separate classes) */ public static String toSentence(List<? extends CoreMap> sentence) { StringBuilder text = new StringBuilder(); for (int i = 0, sz = sentence.size(); i < sz; i++) { CoreMap iw = sentence.get(i); text.append(iw.get(CoreAnnotations.TextAnnotation.class)); if (i < sz - 1) { text.append(' '); } } return text.toString(); } public static List<CoreLabel> deepCopy(List<CoreLabel> tokens) { List<CoreLabel> copy = new ArrayList<>(); for (CoreLabel ml : tokens) { CoreLabel ml1 = new CoreLabel(ml); // copy the labels copy.add(ml1); } return copy; } public static List<CoreLabel> toCoreLabelList(String... words) { List<CoreLabel> tokens = new ArrayList<>(words.length); for (String word : words) { CoreLabel cl = new CoreLabel(); cl.setWord(word); tokens.add(cl); } return tokens; } public static List<CoreLabel> toCoreLabelList(List<String> words) { List<CoreLabel> tokens = new ArrayList<>(words.size()); for (String word : words) { CoreLabel cl = new CoreLabel(); cl.setWord(word); tokens.add(cl); } return tokens; } public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags) { assert tags.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); tokens.add(cl); } return tokens; } public static List<CoreLabel> toCoreLabelListWithCharacterOffsets(String[] words, String[] tags) { assert tags.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); int offset = 0; for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset); offset += words[i].length(); cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset); offset++; // assume one space between words :-) tokens.add(cl); } return tokens; } public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags, String[] answers) { assert tags.length == words.length; assert answers.length == words.length; List<CoreLabel> tokens = new ArrayList<>(words.length); for (int i = 0, sz = words.length; i < sz; i++) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); cl.set(CoreAnnotations.AnswerAnnotation.class, answers[i]); tokens.add(cl); } return tokens; } }