package edu.stanford.nlp.semgraph.semgrex.ssurgeon; import java.io.StringWriter; import java.util.*; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; import edu.stanford.nlp.trees.EnglishGrammaticalRelations; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphUtils; import edu.stanford.nlp.util.Generics; /** * Adds a new dependent node, based off of a prototype IndexedWord, with the given relation. * The new node's sentence index is inherited from the governing node. Currently a cheap heuristic * is made, placing the new node as the leftmost child of the governing node. * * TODO: add position (a la Tregex) * TODO: determine consistent and intuitive arguments * TODO: because word position is important for certain features (such as bigram lexical overlap), need * ability to specify in which position the new node is inserted. * * @author Eric Yeh * */ public class AddDep extends SsurgeonEdit { public static final String LABEL = "addDep"; IndexedWord newNodePrototype; GrammaticalRelation relation; String govNodeName; double weight; /** * Creates an EnglishGrammaticalRelation AddDep edit. * @param newNode String representation of new dependent IndexedFeatureNode map. */ public static AddDep createEngAddDep(String govNodeName, String engRelation, String newNode) { GrammaticalRelation relation = EnglishGrammaticalRelations.valueOf(engRelation); // IndexedWord newNodeObj = new IndexedWord(CoreLabel.fromAbstractMapLabel(IndexedFeatureLabel.valueOf(newNode, MapFactory.HASH_MAP_FACTORY))); IndexedWord newNodeObj = fromCheapString(newNode); return new AddDep(govNodeName, relation, newNodeObj); } public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype) { this.newNodePrototype = newNodePrototype; this.relation = relation; this.govNodeName = govNodeName; this.weight = 0; } public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype, double weight) { this(govNodeName, relation, newNodePrototype); this.weight = weight; } /** * Emits a parseable instruction string. */ @Override public String toEditString() { StringWriter buf = new StringWriter(); buf.write(LABEL); buf.write("\t"); buf.write(Ssurgeon.GOV_NODENAME_ARG);buf.write(" "); buf.write(govNodeName); buf.write("\t"); buf.write(Ssurgeon.RELN_ARG);buf.write(" "); buf.write(relation.toString()); buf.write("\t"); buf.write(Ssurgeon.NODE_PROTO_ARG);buf.write(" "); buf.write("\""); // buf.write(newNodePrototype.toString("map")); buf.write("\"\t") buf.write(cheapWordToString(newNodePrototype)); buf.write("\"\t"); buf.write(Ssurgeon.WEIGHT_ARG);buf.write(" "); buf.write(String.valueOf(weight)); return buf.toString(); } /** * TODO: figure out how to specify where in the sentence this node goes. * TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel. * TODO: bombproof if this gov, dep, and reln already exist. */ @Override public void evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = sm.getNode(govNodeName); IndexedWord newNode = new IndexedWord(newNodePrototype); int newIndex = SemanticGraphUtils.leftMostChildVertice(govNode, sg).index(); // cheap En-specific hack for placing copula (beginning of governing phrase) newNode.setDocID(govNode.docID()); newNode.setIndex(newIndex); newNode.setSentIndex(govNode.sentIndex()); sg.addVertex(newNode); sg.addEdge(govNode, newNode, relation, weight,false); } public static final String WORD_KEY = "word"; public static final String LEMMA_KEY = "lemma"; public static final String VALUE_KEY = "value"; public static final String CURRENT_KEY = "current"; public static final String POS_KEY = "POS"; public static final String TUPLE_DELIMITER="="; public static final String ATOM_DELIMITER = " "; // Simple mapping of all the stuff we care about (until IndexedFeatureLabel --> CoreLabel map pain is fixed) /** * This converts the node into a simple string based representation. * NOTE: this is extremely brittle, and presumes values do not contain delimiters */ public static String cheapWordToString(IndexedWord node) { StringWriter buf = new StringWriter(); buf.write("{"); buf.write(WORD_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.word())); buf.write(ATOM_DELIMITER); buf.write(LEMMA_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.lemma())); buf.write(ATOM_DELIMITER); buf.write(POS_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.tag())); buf.write(ATOM_DELIMITER); buf.write(VALUE_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.value())); buf.write(ATOM_DELIMITER); buf.write(CURRENT_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.originalText())); buf.write("}"); return buf.toString(); } /** * Given the node arg string, converts it into an IndexedWord. */ public static IndexedWord fromCheapString(String rawArg) { String arg = rawArg.substring(1, rawArg.length()-1); String[] tuples=arg.split(ATOM_DELIMITER); Map<String,String> args = Generics.newHashMap(); for (String tuple : tuples) { String[] vals = tuple.split(TUPLE_DELIMITER); String key = vals[0]; String value = ""; if (vals.length == 2) value = vals[1]; args.put(key, value); } IndexedWord newWord = new IndexedWord(); newWord.setWord(args.get(WORD_KEY)); newWord.setLemma(args.get(LEMMA_KEY)); newWord.setTag(args.get(POS_KEY)); newWord.setValue(args.get(VALUE_KEY)); newWord.setOriginalText(args.get(CURRENT_KEY)); return newWord; } public static String nullShield(String str) { return str == null ? "" : str; } }