package edu.stanford.nlp.naturalli; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.util.Pair; import edu.stanford.nlp.util.StringUtils; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; /** * A representation of a sentence fragment. * * @author Gabor Angeli */ public class SentenceFragment { /** * The words in this sentence fragment (e.g., for use as the gloss of the fragment). */ public final List<CoreLabel> words = new ArrayList<>(); /** * The parse tree for this sentence fragment. */ public final SemanticGraph parseTree; /** * The assumed truth of this fragment; this is relevant for what entailments are supported */ public final boolean assumedTruth; /** * A score for this fragment. This is 1.0 by default. */ public double score = 1.0; public SentenceFragment(SemanticGraph tree, boolean assumedTruth, boolean copy) { if (copy) { this.parseTree = new SemanticGraph(tree); } else { this.parseTree = tree; } this.assumedTruth = assumedTruth; words.addAll(this.parseTree.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList())); } /** The length of this fragment, in words */ public int length() { return words.size(); } /** * Changes the score of this fragment in place. * @param score The new score of the fragment * @return This sentence fragment. */ public SentenceFragment changeScore(double score) { this.score = score; return this; } /** * Return the tokens in this fragment, but padded with null so that the index in this * sentence matches the index of the parse tree. */ public List<CoreLabel> paddedWords() { int maxIndex = -1; for (IndexedWord vertex : parseTree.vertexSet()) { maxIndex = Math.max(maxIndex, vertex.index()); } List<CoreLabel> tokens = new ArrayList<>(maxIndex); for (int i = 0; i < maxIndex; ++i) { tokens.add(null); } for (CoreLabel token : this.words) { tokens.set(token.index() - 1, token); } return tokens; } @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof SentenceFragment)) return false; SentenceFragment that = (SentenceFragment) o; return this.parseTree.vertexSet().equals((that.parseTree.vertexSet())); } @Override public int hashCode() { return this.parseTree.vertexSet().hashCode(); } @Override public String toString() { List<Pair<String, Integer>> glosses = new ArrayList<>(); for (CoreLabel word : words) { // Add the word itself glosses.add(Pair.makePair(word.word(), word.index() - 1)); String addedConnective = null; // Find additional connectives for (SemanticGraphEdge edge : parseTree.incomingEdgeIterable(new IndexedWord(word))) { String rel = edge.getRelation().toString(); if (rel.contains("_")) { // for Stanford dependencies only addedConnective = rel.substring(rel.indexOf('_') + 1); } } if (addedConnective != null) { // Found a connective (e.g., a preposition or conjunction) Pair<Integer, Integer> yield = parseTree.yieldSpan(new IndexedWord(word)); glosses.add(Pair.makePair(addedConnective.replaceAll("_", " "), yield.first - 1)); } } // Sort the sentence Collections.sort(glosses, (a, b) -> a.second - b.second); // Return the sentence return StringUtils.join(glosses.stream().map(Pair::first), " "); } }