AuxiliaryTree.java example

package edu.stanford.nlp.trees.tregex.tsurgeon; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.LabelFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;

import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

/**
 * @author Roger Levy (rog@nlp.stanford.edu)
 */
public class AuxiliaryTree  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(AuxiliaryTree.class);

  private final String originalTreeString;
  final Tree tree;
  Tree foot;
  private final IdentityHashMap<Tree,String> nodesToNames; // no one else should be able to get this one.
  private final Map<String,Tree> namesToNodes; // this one has a getter.


  public AuxiliaryTree(Tree tree, boolean mustHaveFoot) {
    originalTreeString = tree.toString();
    this.tree = tree;
    this.foot = findFootNode(tree);
    if (foot == null && mustHaveFoot) {
      throw new TsurgeonParseException("Error -- no foot node found for " + originalTreeString);
    }
    namesToNodes = Generics.newHashMap();
    nodesToNames = new IdentityHashMap<>();
    initializeNamesNodesMaps(tree);
  }

  private AuxiliaryTree(Tree tree, Tree foot, Map<String, Tree> namesToNodes, String originalTreeString) {
    this.originalTreeString = originalTreeString;
    this.tree = tree;
    this.foot = foot;
    this.namesToNodes = namesToNodes;
    nodesToNames = null;
  }

  public Map<String, Tree> namesToNodes() {
    return namesToNodes;
  }

  @Override
  public String toString() {
    return originalTreeString;
  }

  /**
   * Copies the Auxiliary tree.  Also, puts the new names->nodes map in the TsurgeonMatcher that called copy.
   * <br>
   * The trees and labels to use when making the copy are specified
   * with treeFactory and labelFactory.  This lets the tsurgeon script
   * produce trees which are of the same type as the input trees.
   * Each of the tsurgeon relations which copies a tree should include
   * pass in the correct factories.
   */
  public AuxiliaryTree copy(TsurgeonMatcher matcher, TreeFactory treeFactory, LabelFactory labelFactory) {
    if (labelFactory == null) {
      labelFactory = CoreLabel.factory();
    }
    Map<String,Tree> newNamesToNodes = Generics.newHashMap();
    Pair<Tree,Tree> result = copyHelper(tree, newNamesToNodes, treeFactory, labelFactory);
    //if(! result.first().dominates(result.second()))
      //log.info("Error -- aux tree copy doesn't dominate foot copy.");
    matcher.newNodeNames.putAll(newNamesToNodes);
    return new AuxiliaryTree(result.first(), result.second(), newNamesToNodes, originalTreeString);
  }

  // returns Pair<node,foot>
  private Pair<Tree,Tree> copyHelper(Tree node, Map<String,Tree> newNamesToNodes, TreeFactory treeFactory, LabelFactory labelFactory) {
    Tree clone;
    Tree newFoot = null;
    if (node.isLeaf()) {
      if (node == foot) { // found the foot node; pass it up.
        clone = treeFactory.newTreeNode(node.label(), new ArrayList<>(0));
        newFoot = clone;
      } else {
        clone = treeFactory.newLeaf(labelFactory.newLabel(node.label()));
      }
    } else {
      List<Tree> newChildren = new ArrayList<>(node.children().length);
      for (Tree child : node.children()) {
        Pair<Tree,Tree> newChild = copyHelper(child, newNamesToNodes, treeFactory, labelFactory);
        newChildren.add(newChild.first());
        if (newChild.second() != null) {
          if (newFoot != null) {
            log.info("Error -- two feet found when copying auxiliary tree " + tree.toString() + "; using last foot found.");
          }
          newFoot = newChild.second();
        }
      }
      clone = treeFactory.newTreeNode(labelFactory.newLabel(node.label()),newChildren);
    }

    if (nodesToNames.containsKey(node))
      newNamesToNodes.put(nodesToNames.get(node),clone);

    return new Pair<>(clone, newFoot);
  }



  /***********************************************************/
  /* below here is init stuff for finding the foot node.     */
  /***********************************************************/


  private static final String footNodeCharacter = "@";
  private static final Pattern footNodeLabelPattern = Pattern.compile("^(.*)" + footNodeCharacter + '$');
  private static final Pattern escapedFootNodeCharacter = Pattern.compile('\\' + footNodeCharacter);

  /**
   * Returns the foot node of the adjunction tree, which is the terminal node
   * that ends in @.  In the process, turns the foot node into a TreeNode
   * (rather than a leaf), and destructively un-escapes all the escaped
   * instances of @ in the tree.  Note that final @ in a non-terminal node is
   * ignored, and left in.
   */
  private static Tree findFootNode(Tree t) {
    Tree footNode = findFootNodeHelper(t);
    Tree result = footNode;
    if (footNode != null) {
      Tree newFootNode = footNode.treeFactory().newTreeNode(footNode.label(), new ArrayList<>());

      Tree parent = footNode.parent(t);
      if (parent != null) {
        int i = parent.objectIndexOf(footNode);
        parent.setChild(i, newFootNode);
      }

      result = newFootNode;
    }
    return result;
  }

  private static Tree findFootNodeHelper(Tree t) {
    Tree foundDtr = null;
    if (t.isLeaf()) {
      Matcher m = footNodeLabelPattern.matcher(t.label().value());
      if (m.matches()) {
        t.label().setValue(m.group(1));
        return t;
      } else {
        return null;
      }
    }
    for (Tree child : t.children()) {
      Tree thisFoundDtr = findFootNodeHelper(child);
      if (thisFoundDtr != null) {
        if (foundDtr != null) {
          throw new TsurgeonParseException("Error -- two foot nodes in subtree" + t.toString());
        } else {
          foundDtr = thisFoundDtr;
        }
      }
    }
    Matcher m = escapedFootNodeCharacter.matcher(t.label().value());
    t.label().setValue(m.replaceAll(footNodeCharacter));
    return foundDtr;
  }


  /* ******************************************************* *
   * below here is init stuff for getting node -> names maps *
   * ******************************************************* */

  // There are two ways in which you can can match the start of a name
  // expression.
  // The first is if you have any number of non-escaping characters
  // preceding an "=" and a name.  This is the ([^\\\\]*) part.
  // The second is if you have any number of any characters, followed
  // by a non-"\" character, as "\" is used to escape the "=".  After
  // that, any number of pairs of "\" are allowed, as we let "\" also
  // escape itself.  After that comes "=" and a name.
  static final Pattern namePattern = Pattern.compile("^((?:[^\\\\]*)|(?:(?:.*[^\\\\])?)(?:\\\\\\\\)*)=([^=]+)$");

  /**
   * Looks for new names, destructively strips them out.
   * Destructively unescapes escaped chars, including "=", as well.
   */
  private void initializeNamesNodesMaps(Tree t) {
    for (Tree node : t.subTreeList()) {
      Matcher m = namePattern.matcher(node.label().value());
      if (m.find()) {
        namesToNodes.put(m.group(2), node);
        nodesToNames.put(node, m.group(2));
        node.label().setValue(m.group(1));
      }
      node.label().setValue(unescape(node.label().value()));
    }
  }

  static String unescape(String input) {
    return input.replaceAll("\\\\(.)", "$1");
  }

}