SentenceFragment.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.naturalli;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

/**
 * A representation of a sentence fragment.
 *
 * @author Gabor Angeli
 */
public class SentenceFragment {

  /**
   * The words in this sentence fragment (e.g., for use as the gloss of the fragment).
   */
  public final List<CoreLabel> words = new ArrayList<>();

  /**
   * The parse tree for this sentence fragment.
   */
  public final SemanticGraph parseTree;

  /**
   * The assumed truth of this fragment; this is relevant for what entailments are supported
   */
  public final boolean assumedTruth;

  /**
   * A score for this fragment. This is 1.0 by default.
   */
  public double score = 1.0;


  public SentenceFragment(SemanticGraph tree, boolean assumedTruth, boolean copy) {
    if (copy) {
      this.parseTree = new SemanticGraph(tree);
    } else {
      this.parseTree = tree;
    }
    this.assumedTruth = assumedTruth;
    words.addAll(this.parseTree.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList()));
  }

  /** The length of this fragment, in words */
  public int length() {
    return words.size();
  }

  /**
   * Changes the score of this fragment in place.
   * @param score The new score of the fragment
   * @return This sentence fragment.
   */
  public SentenceFragment changeScore(double score) {
    this.score = score;
    return this;
  }

  /**
   * Return the tokens in this fragment, but padded with null so that the index in this
   * sentence matches the index of the parse tree.
   */
  public List<CoreLabel> paddedWords() {
    int maxIndex = -1;
    for (IndexedWord vertex : parseTree.vertexSet()) {
      maxIndex = Math.max(maxIndex, vertex.index());
    }
    List<CoreLabel> tokens = new ArrayList<>(maxIndex);
    for (int i = 0; i < maxIndex; ++i) { tokens.add(null); }
    for (CoreLabel token : this.words) {
      tokens.set(token.index() - 1, token);
    }
    return tokens;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (!(o instanceof SentenceFragment)) return false;
    SentenceFragment that = (SentenceFragment) o;
    return this.parseTree.vertexSet().equals((that.parseTree.vertexSet()));

  }

  @Override
  public int hashCode() {
    return this.parseTree.vertexSet().hashCode();
  }

  @Override
  public String toString() {
    List<Pair<String, Integer>> glosses = new ArrayList<>();
    for (CoreLabel word : words) {
      // Add the word itself
      glosses.add(Pair.makePair(word.word(), word.index() - 1));
      String addedConnective = null;
      // Find additional connectives
      for (SemanticGraphEdge edge : parseTree.incomingEdgeIterable(new IndexedWord(word))) {
        String rel = edge.getRelation().toString();
        if (rel.contains("_")) {  // for Stanford dependencies only
          addedConnective = rel.substring(rel.indexOf('_') + 1);
        }
      }
      if (addedConnective != null) {
        // Found a connective (e.g., a preposition or conjunction)
        Pair<Integer, Integer> yield = parseTree.yieldSpan(new IndexedWord(word));
        glosses.add(Pair.makePair(addedConnective.replaceAll("_", " "), yield.first - 1));
      }
    }
    // Sort the sentence
    Collections.sort(glosses, (a, b) -> a.second - b.second);
    // Return the sentence
    return StringUtils.join(glosses.stream().map(Pair::first), " ");
  }

}