package edu.stanford.nlp.ie.util;
import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
/**
* Factor out some commonly used code (e.g., make a tree from a CoNLL spec)
*
* @author Gabor Angeli
*/
public class IETestUtils {
/**
* Create a dummy word, just with a given word at a given index.
* Mostly useful for making semantic graphs.
*/
public static CoreLabel mkWord(String gloss, int index) {
CoreLabel w = new CoreLabel();
w.setWord(gloss);
w.setValue(gloss);
if (index >= 0) {
w.setIndex(index);
}
return w;
}
/**
* Parse a CoNLL formatted string into a SemanticGraph.
* This is useful for tests so that you don't need to load the model (and are robust to
* model changes).
*
* @param conll The CoNLL format for the tree.
* @return A semantic graph, as well as the flat tokens of the sentence.
*/
public static Pair<SemanticGraph,List<CoreLabel>> parseCoNLL(String conll) {
List<CoreLabel> sentence = new ArrayList<>();
SemanticGraph tree = new SemanticGraph();
for (String line : conll.split("\n")) {
if (line.trim().equals("")) { continue; }
String[] fields = line.trim().split("\\s+");
int index = Integer.parseInt(fields[0]);
String word = fields[1];
CoreLabel label = mkWord(word, index);
sentence.add(label);
if (fields[2].equals("0")) {
tree.addRoot(new IndexedWord(label));
} else {
tree.addVertex(new IndexedWord(label));
}
if (fields.length > 4) {
label.setTag(fields[4]);
}
if (fields.length > 5) {
label.setNER(fields[5]);
}
if (fields.length > 6) {
label.setLemma(fields[6]);
}
}
int i = 0;
for (String line : conll.split("\n")) {
if (line.trim().equals("")) { continue; }
String[] fields = line.trim().split("\\s+");
int parent = Integer.parseInt(fields[2]);
String reln = fields[3];
if (parent > 0) {
tree.addEdge(
new IndexedWord(sentence.get(parent - 1)),
new IndexedWord(sentence.get(i)),
new GrammaticalRelation(Language.UniversalEnglish, reln, null, null),
1.0, false
);
}
i += 1;
}
return Pair.makePair(tree, sentence);
}
/**
* Create a sentence (list of CoreLabels) from a given text.
* The resulting labels will have a word, lemma (guessed poorly), and
* a part of speech if one is specified on the input.
*
* @param text The text to parse.
*
* @return A sentence corresponding to the text.
*/
public static List<CoreLabel> parseSentence(String text) {
return Arrays.asList(text.split("\\s+")).stream().map(w -> {
CoreLabel token = new CoreLabel();
if (w.contains("/")) {
String[] fields = w.split("/");
token.setWord(fields[0]);
token.setTag(fields[1]);
} else {
token.setWord(w);
}
token.setValue(token.word());
token.setLemma(token.word());
if (token.word().equals("is") || token.word().equals("was") || token.word().equals("are")) {
token.setLemma("be");
}
if (token.word().equals("has")) {
token.setLemma("have");
}
if (token.word().equals("did") | token.word().equals("will") || token.word().equals("does")) {
token.setLemma("do");
}
if (token.word().endsWith("ed")) {
token.setLemma(token.word().substring(0, token.word().length() - 1));
}
if (token.word().endsWith("ing")) {
token.setLemma(token.word().substring(0, token.word().length() - 3));
}
return token;
}).collect(Collectors.toList());
}
}