package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
import java.io.StringWriter;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
/**
*
* Collapses a subtree into a single node.
* The new node has the POS tag and index of the root node
* and the value and the lemma of the concatenation of the subnodes.
*
* One intended use is to collapse multi-word expressions into one node
* to facilitate relation extraction and related tasks.
*
* @author Sebastian Schuster
*
*/
public class CollapseSubtree extends SsurgeonEdit {
public static final String LABEL="collapseSubtree";
protected String rootName; // Name of the root node in match
public CollapseSubtree(String rootNodeName) {
this.rootName = rootNodeName;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void evaluate(SemanticGraph sg, SemgrexMatcher sm) {
IndexedWord rootNode = this.getNamedNode(rootName, sm);
Set<IndexedWord> subgraphNodeSet = sg.getSubgraphVertices(rootNode);
if ( ! sg.isDag(rootNode)) {
/* Check if there is a cycle going back to the root. */
for (IndexedWord child : sg.getChildren(rootNode)) {
Set<IndexedWord> reachableSet = sg.getSubgraphVertices(child);
if (reachableSet.contains(rootNode)) {
throw new IllegalArgumentException("Subtree cannot contain cycle leading back to root node!");
}
}
}
List<IndexedWord> sortedSubgraphNodes = Generics.newArrayList(subgraphNodeSet);
Collections.sort(sortedSubgraphNodes);
IndexedWord newNode = new IndexedWord(rootNode.docID(), rootNode.sentIndex(), rootNode.index());
/* Copy all attributes from rootNode. */
for (Class key : newNode.backingLabel().keySet()) {
newNode.set(key, rootNode.get(key));
}
newNode.setValue(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::value), " "));
newNode.setWord(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::word), " "));
newNode.setLemma(StringUtils.join(sortedSubgraphNodes.stream().map(x -> x.lemma() == null ? x.word() : x.lemma()), " "));
if (sg.getRoots().contains(rootNode)) {
sg.getRoots().remove(rootNode);
sg.addRoot(rootNode);
}
for (SemanticGraphEdge edge : sg.incomingEdgeIterable(rootNode)) {
sg.addEdge(edge.getGovernor(), newNode, edge.getRelation(), edge.getWeight(), edge.isExtra());
}
for (IndexedWord node : sortedSubgraphNodes) {
sg.removeVertex(node);
}
}
@Override
public String toEditString() {
StringWriter buf = new StringWriter();
buf.write(LABEL); buf.write("\t");
buf.write(Ssurgeon.NODENAME_ARG);buf.write(" ");
buf.write(rootName);
return buf.toString();
}
}