package edu.stanford.nlp.trees.tregex.tsurgeon;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.LabelFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* @author Roger Levy (rog@nlp.stanford.edu)
*/
public class AuxiliaryTree {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(AuxiliaryTree.class);
private final String originalTreeString;
final Tree tree;
Tree foot;
private final IdentityHashMap<Tree,String> nodesToNames; // no one else should be able to get this one.
private final Map<String,Tree> namesToNodes; // this one has a getter.
public AuxiliaryTree(Tree tree, boolean mustHaveFoot) {
originalTreeString = tree.toString();
this.tree = tree;
this.foot = findFootNode(tree);
if (foot == null && mustHaveFoot) {
throw new TsurgeonParseException("Error -- no foot node found for " + originalTreeString);
}
namesToNodes = Generics.newHashMap();
nodesToNames = new IdentityHashMap<>();
initializeNamesNodesMaps(tree);
}
private AuxiliaryTree(Tree tree, Tree foot, Map<String, Tree> namesToNodes, String originalTreeString) {
this.originalTreeString = originalTreeString;
this.tree = tree;
this.foot = foot;
this.namesToNodes = namesToNodes;
nodesToNames = null;
}
public Map<String, Tree> namesToNodes() {
return namesToNodes;
}
@Override
public String toString() {
return originalTreeString;
}
/**
* Copies the Auxiliary tree. Also, puts the new names->nodes map in the TsurgeonMatcher that called copy.
* <br>
* The trees and labels to use when making the copy are specified
* with treeFactory and labelFactory. This lets the tsurgeon script
* produce trees which are of the same type as the input trees.
* Each of the tsurgeon relations which copies a tree should include
* pass in the correct factories.
*/
public AuxiliaryTree copy(TsurgeonMatcher matcher, TreeFactory treeFactory, LabelFactory labelFactory) {
if (labelFactory == null) {
labelFactory = CoreLabel.factory();
}
Map<String,Tree> newNamesToNodes = Generics.newHashMap();
Pair<Tree,Tree> result = copyHelper(tree, newNamesToNodes, treeFactory, labelFactory);
//if(! result.first().dominates(result.second()))
//log.info("Error -- aux tree copy doesn't dominate foot copy.");
matcher.newNodeNames.putAll(newNamesToNodes);
return new AuxiliaryTree(result.first(), result.second(), newNamesToNodes, originalTreeString);
}
// returns Pair<node,foot>
private Pair<Tree,Tree> copyHelper(Tree node, Map<String,Tree> newNamesToNodes, TreeFactory treeFactory, LabelFactory labelFactory) {
Tree clone;
Tree newFoot = null;
if (node.isLeaf()) {
if (node == foot) { // found the foot node; pass it up.
clone = treeFactory.newTreeNode(node.label(), new ArrayList<>(0));
newFoot = clone;
} else {
clone = treeFactory.newLeaf(labelFactory.newLabel(node.label()));
}
} else {
List<Tree> newChildren = new ArrayList<>(node.children().length);
for (Tree child : node.children()) {
Pair<Tree,Tree> newChild = copyHelper(child, newNamesToNodes, treeFactory, labelFactory);
newChildren.add(newChild.first());
if (newChild.second() != null) {
if (newFoot != null) {
log.info("Error -- two feet found when copying auxiliary tree " + tree.toString() + "; using last foot found.");
}
newFoot = newChild.second();
}
}
clone = treeFactory.newTreeNode(labelFactory.newLabel(node.label()),newChildren);
}
if (nodesToNames.containsKey(node))
newNamesToNodes.put(nodesToNames.get(node),clone);
return new Pair<>(clone, newFoot);
}
/***********************************************************/
/* below here is init stuff for finding the foot node. */
/***********************************************************/
private static final String footNodeCharacter = "@";
private static final Pattern footNodeLabelPattern = Pattern.compile("^(.*)" + footNodeCharacter + '$');
private static final Pattern escapedFootNodeCharacter = Pattern.compile('\\' + footNodeCharacter);
/**
* Returns the foot node of the adjunction tree, which is the terminal node
* that ends in @. In the process, turns the foot node into a TreeNode
* (rather than a leaf), and destructively un-escapes all the escaped
* instances of @ in the tree. Note that final @ in a non-terminal node is
* ignored, and left in.
*/
private static Tree findFootNode(Tree t) {
Tree footNode = findFootNodeHelper(t);
Tree result = footNode;
if (footNode != null) {
Tree newFootNode = footNode.treeFactory().newTreeNode(footNode.label(), new ArrayList<>());
Tree parent = footNode.parent(t);
if (parent != null) {
int i = parent.objectIndexOf(footNode);
parent.setChild(i, newFootNode);
}
result = newFootNode;
}
return result;
}
private static Tree findFootNodeHelper(Tree t) {
Tree foundDtr = null;
if (t.isLeaf()) {
Matcher m = footNodeLabelPattern.matcher(t.label().value());
if (m.matches()) {
t.label().setValue(m.group(1));
return t;
} else {
return null;
}
}
for (Tree child : t.children()) {
Tree thisFoundDtr = findFootNodeHelper(child);
if (thisFoundDtr != null) {
if (foundDtr != null) {
throw new TsurgeonParseException("Error -- two foot nodes in subtree" + t.toString());
} else {
foundDtr = thisFoundDtr;
}
}
}
Matcher m = escapedFootNodeCharacter.matcher(t.label().value());
t.label().setValue(m.replaceAll(footNodeCharacter));
return foundDtr;
}
/* ******************************************************* *
* below here is init stuff for getting node -> names maps *
* ******************************************************* */
// There are two ways in which you can can match the start of a name
// expression.
// The first is if you have any number of non-escaping characters
// preceding an "=" and a name. This is the ([^\\\\]*) part.
// The second is if you have any number of any characters, followed
// by a non-"\" character, as "\" is used to escape the "=". After
// that, any number of pairs of "\" are allowed, as we let "\" also
// escape itself. After that comes "=" and a name.
static final Pattern namePattern = Pattern.compile("^((?:[^\\\\]*)|(?:(?:.*[^\\\\])?)(?:\\\\\\\\)*)=([^=]+)$");
/**
* Looks for new names, destructively strips them out.
* Destructively unescapes escaped chars, including "=", as well.
*/
private void initializeNamesNodesMaps(Tree t) {
for (Tree node : t.subTreeList()) {
Matcher m = namePattern.matcher(node.label().value());
if (m.find()) {
namesToNodes.put(m.group(2), node);
nodesToNames.put(node, m.group(2));
node.label().setValue(m.group(1));
}
node.label().setValue(unescape(node.label().value()));
}
}
static String unescape(String input) {
return input.replaceAll("\\\\(.)", "$1");
}
}