AddDep.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.semgraph.semgrex.ssurgeon;

import java.io.StringWriter;
import java.util.*;

import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphUtils;
import edu.stanford.nlp.util.Generics;

/**
 * Adds a new dependent node, based off of a prototype IndexedWord, with the given relation.
 * The new node's sentence index is inherited from the governing node.  Currently a cheap heuristic
 * is made, placing the new node as the leftmost child of the governing node.
 *
 * TODO: add position (a la Tregex)
 * TODO: determine consistent and intuitive arguments
 * TODO: because word position is important for certain features (such as bigram lexical overlap), need
 * ability to specify in which position the new node is inserted.
 *
 * @author Eric Yeh
 *
 */
public class AddDep extends SsurgeonEdit {
  public static final String LABEL = "addDep";
  IndexedWord newNodePrototype;
  GrammaticalRelation relation;
  String govNodeName;
  double weight;

  /**
   * Creates an EnglishGrammaticalRelation AddDep edit.
   * @param newNode String representation of new dependent IndexedFeatureNode map.
   */
  public static AddDep createEngAddDep(String govNodeName, String engRelation,  String newNode) {
    GrammaticalRelation relation = EnglishGrammaticalRelations.valueOf(engRelation);
//  IndexedWord newNodeObj = new IndexedWord(CoreLabel.fromAbstractMapLabel(IndexedFeatureLabel.valueOf(newNode, MapFactory.HASH_MAP_FACTORY)));
    IndexedWord newNodeObj = fromCheapString(newNode);
    return new AddDep(govNodeName, relation, newNodeObj);
  }

  public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype) {
    this.newNodePrototype = newNodePrototype;
    this.relation = relation;
    this.govNodeName = govNodeName;
    this.weight = 0;
  }

  public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype, double weight) {
    this(govNodeName, relation, newNodePrototype);
    this.weight = weight;
  }

  /**
   * Emits a parseable instruction string.
   */
  @Override
  public String toEditString() {
    StringWriter buf = new StringWriter();
    buf.write(LABEL);  buf.write("\t");
    buf.write(Ssurgeon.GOV_NODENAME_ARG);buf.write(" ");
    buf.write(govNodeName); buf.write("\t");
    buf.write(Ssurgeon.RELN_ARG);buf.write(" ");
    buf.write(relation.toString()); buf.write("\t");
    buf.write(Ssurgeon.NODE_PROTO_ARG);buf.write(" ");
    buf.write("\"");
//  buf.write(newNodePrototype.toString("map")); buf.write("\"\t")
    buf.write(cheapWordToString(newNodePrototype));
    buf.write("\"\t");

    buf.write(Ssurgeon.WEIGHT_ARG);buf.write(" ");
    buf.write(String.valueOf(weight));
    return buf.toString();
  }

  /**
   * TODO: figure out how to specify where in the sentence this node goes.
   * TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel.
   * TODO: bombproof if this gov, dep, and reln already exist.
   */
  @Override
  public void evaluate(SemanticGraph sg, SemgrexMatcher sm) {
    IndexedWord govNode = sm.getNode(govNodeName);
    IndexedWord newNode = new IndexedWord(newNodePrototype);
    int newIndex = SemanticGraphUtils.leftMostChildVertice(govNode, sg).index(); // cheap En-specific hack for placing copula (beginning of governing phrase)
    newNode.setDocID(govNode.docID());
    newNode.setIndex(newIndex);
    newNode.setSentIndex(govNode.sentIndex());
    sg.addVertex(newNode);
    sg.addEdge(govNode, newNode, relation, weight,false);
  }

  public static final String WORD_KEY = "word";
  public static final String LEMMA_KEY = "lemma";
  public static final String VALUE_KEY = "value";
  public static final String CURRENT_KEY = "current";
  public static final String POS_KEY = "POS";
  public static final String TUPLE_DELIMITER="=";
  public static final String ATOM_DELIMITER = " ";

  // Simple mapping of all the stuff we care about (until IndexedFeatureLabel --> CoreLabel map pain is fixed)
  /**
   * This converts the node into a simple string based representation.
   * NOTE: this is extremely brittle, and presumes values do not contain delimiters
   */
  public static String cheapWordToString(IndexedWord node) {
    StringWriter buf = new StringWriter();
    buf.write("{");
    buf.write(WORD_KEY);
    buf.write(TUPLE_DELIMITER);
    buf.write(nullShield(node.word()));
    buf.write(ATOM_DELIMITER);

    buf.write(LEMMA_KEY);
    buf.write(TUPLE_DELIMITER);
    buf.write(nullShield(node.lemma()));
    buf.write(ATOM_DELIMITER);

    buf.write(POS_KEY);
    buf.write(TUPLE_DELIMITER);
    buf.write(nullShield(node.tag()));
    buf.write(ATOM_DELIMITER);

    buf.write(VALUE_KEY);
    buf.write(TUPLE_DELIMITER);
    buf.write(nullShield(node.value()));
    buf.write(ATOM_DELIMITER);

    buf.write(CURRENT_KEY);
    buf.write(TUPLE_DELIMITER);
    buf.write(nullShield(node.originalText()));
    buf.write("}");
    return buf.toString();
  }

  /**
   * Given the node arg string, converts it into an IndexedWord.
   */
  public static IndexedWord fromCheapString(String rawArg) {
    String arg = rawArg.substring(1, rawArg.length()-1);
    String[] tuples=arg.split(ATOM_DELIMITER);
    Map<String,String> args = Generics.newHashMap();
    for (String tuple : tuples) {
      String[] vals = tuple.split(TUPLE_DELIMITER);
      String key = vals[0];
      String value = "";
      if (vals.length == 2)
        value = vals[1];
      args.put(key, value);
    }
    IndexedWord newWord = new IndexedWord();
    newWord.setWord(args.get(WORD_KEY));
    newWord.setLemma(args.get(LEMMA_KEY));
    newWord.setTag(args.get(POS_KEY));
    newWord.setValue(args.get(VALUE_KEY));
    newWord.setOriginalText(args.get(CURRENT_KEY));
    return newWord;
  }

  public static String nullShield(String str) {
    return str == null ? "" : str;
  }
}