package edu.stanford.nlp.trees; import edu.stanford.nlp.util.logging.Redwood; import java.io.StringReader; import java.util.List; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.ling.LabelFactory; /** * <p> * A <code>TreeGraphNode</code> is simply a * {@link Tree <code>Tree</code>} * with some additional functionality. For example, the * <code>parent()</code> method works without searching from the root. * Labels are always assumed to be * {@link CoreLabel <code>CoreLabel</code>} * * <p>This class makes the horrible mistake of changing the semantics of * equals and hashCode to go back to "==" and System.identityHashCode, * despite the semantics of the superclass's equality.</p> * * @author Bill MacCartney */ public class TreeGraphNode extends Tree implements HasParent { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(TreeGraphNode.class); /** * Label for this node. */ protected CoreLabel label; /** * Parent of this node. */ protected TreeGraphNode parent; // = null; /** * Children of this node. */ protected TreeGraphNode[] children = ZERO_TGN_CHILDREN; /** * For internal nodes, the head word of this subtree. */ private TreeGraphNode headWordNode; /** * A leaf node should have a zero-length array for its * children. For efficiency, subclasses can use this array as a * return value for children() for leaf nodes if desired. Should * this be public instead? */ protected static final TreeGraphNode[] ZERO_TGN_CHILDREN = new TreeGraphNode[0]; private static final LabelFactory mlf = CoreLabel.factory(); /** * Create a new <code>TreeGraphNode</code> with the supplied * label. * * @param label the label for this node. */ public TreeGraphNode(Label label) { this.label = (CoreLabel) mlf.newLabel(label); } /** * Create a new <code>TreeGraphNode</code> with the supplied * label and list of child nodes. * * @param label the label for this node. * @param children the list of child <code>TreeGraphNode</code>s * for this node. */ public TreeGraphNode(Label label, List<Tree> children) { this(label); setChildren(children); } /** * Create a new <code>TreeGraphNode</code> having the same tree * structure and label values as an existing tree (but no shared * storage). Operates recursively to construct an entire * subtree. * * @param t the tree to copy * @param parent the parent node */ protected TreeGraphNode(Tree t, TreeGraphNode parent) { this.parent = parent; Tree[] tKids = t.children(); int numKids = tKids.length; children = new TreeGraphNode[numKids]; for (int i = 0; i < numKids; i++) { children[i] = new TreeGraphNode(tKids[i], this); if (t.isPreTerminal()) { // add the tags to the leaves children[i].label.setTag(t.label().value()); } } this.label = (CoreLabel) mlf.newLabel(t.label()); } /** * Implements equality for <code>TreeGraphNode</code>s. Unlike * <code>Tree</code>s, <code>TreeGraphNode</code>s should be * considered equal only if they are ==. <i>Implementation note:</i> * TODO: This should be changed via introducing a Tree interface with the current Tree and this class implementing it, since what is done here breaks the equals() contract. * * @param o The object to compare with * @return Whether two things are equal */ @Override public boolean equals(Object o) { return o == this; } @Override public int hashCode() { return System.identityHashCode(this); } /** * Returns the label associated with the current node, or null * if there is no label. * * @return the label of the node */ @Override public CoreLabel label() { return label; } @Override public void setLabel(Label label) { if (label instanceof CoreLabel) { this.setLabel((CoreLabel) label); } else { this.setLabel((CoreLabel) mlf.newLabel(label)); } } /** * Sets the label associated with the current node. * * @param label the new label to use. */ public void setLabel(final CoreLabel label) { this.label = label; } /** * Get the index for the current node. */ public int index() { return label.index(); } /** * Set the index for the current node. */ protected void setIndex(int index) { label.setIndex(index); } /** * Get the parent for the current node. */ @Override public TreeGraphNode parent() { return parent; } /** * Set the parent for the current node. */ public void setParent(TreeGraphNode parent) { this.parent = parent; } /** * Returns an array of the children of this node. */ @Override public TreeGraphNode[] children() { return children; } /** * Sets the children of this <code>TreeGraphNode</code>. If * given <code>null</code>, this method sets * the node's children to the canonical zero-length Tree[] array. * * @param children an array of child trees */ @Override public void setChildren(Tree[] children) { if (children == null || children.length == 0) { this.children = ZERO_TGN_CHILDREN; } else { if (children instanceof TreeGraphNode[]) { this.children = (TreeGraphNode[]) children; for (TreeGraphNode child : this.children) { child.setParent(this); } } else { this.children = new TreeGraphNode[children.length]; for (int i = 0; i < children.length; i++) { this.children[i] = (TreeGraphNode)children[i]; this.children[i].setParent(this); } } } } /** {@inheritDoc} */ @Override public void setChildren(List<? extends Tree> childTreesList) { if (childTreesList == null || childTreesList.isEmpty()) { setChildren(ZERO_TGN_CHILDREN); } else { int leng = childTreesList.size(); TreeGraphNode[] childTrees = new TreeGraphNode[leng]; childTreesList.toArray(childTrees); setChildren(childTrees); } } @Override public Tree setChild(int i, Tree t) { if (!(t instanceof TreeGraphNode)) { throw new IllegalArgumentException("Horrible error"); } ((TreeGraphNode) t).setParent(this); return super.setChild(i, t); } /** * Adds a child in the ith location. Does so without overwriting * the parent pointers of the rest of the children, which might be * relevant in case there are add and remove operations mixed * together. */ @Override public void addChild(int i, Tree t) { if (!(t instanceof TreeGraphNode)) { throw new IllegalArgumentException("Horrible error"); } ((TreeGraphNode) t).setParent(this); TreeGraphNode[] kids = this.children; TreeGraphNode[] newKids = new TreeGraphNode[kids.length + 1]; if (i != 0) { System.arraycopy(kids, 0, newKids, 0, i); } newKids[i] = (TreeGraphNode) t; if (i != kids.length) { System.arraycopy(kids, i, newKids, i + 1, kids.length - i); } this.children = newKids; } /** * Removes the ith child from the TreeGraphNode. Needs to override * the parent removeChild so it can avoid setting the parent * pointers on the remaining children. This is useful if you want * to add and remove children from one node to another node; this way, * it won't matter what order you do the add and remove operations. */ @Override public Tree removeChild(int i) { TreeGraphNode[] kids = children(); TreeGraphNode kid = kids[i]; TreeGraphNode[] newKids = new TreeGraphNode[kids.length - 1]; for (int j = 0; j < newKids.length; j++) { if (j < i) { newKids[j] = kids[j]; } else { newKids[j] = kids[j + 1]; } } this.children = newKids; return kid; } /** * Uses the specified {@link HeadFinder <code>HeadFinder</code>} * to determine the heads for this node and all its descendants, * and to store references to the head word node and head tag node * in this node's {@link CoreLabel <code>CoreLabel</code>} and the * <code>CoreLabel</code>s of all its descendants.<p> * <p/> * Note that, in contrast to {@link Tree#percolateHeads * <code>Tree.percolateHeads()</code>}, which assumes {@link * edu.stanford.nlp.ling.CategoryWordTag * <code>CategoryWordTag</code>} labels and therefore stores head * words and head tags merely as <code>String</code>s, this * method stores references to the actual nodes. This mitigates * potential problems in sentences which contain the same word * more than once. * * @param hf The headfinding algorithm to use */ @Override public void percolateHeads(HeadFinder hf) { if (isLeaf()) { TreeGraphNode hwn = headWordNode(); if (hwn == null) { setHeadWordNode(this); } } else { for (Tree child : children()) { child.percolateHeads(hf); } TreeGraphNode head = safeCast(hf.determineHead(this,parent)); if (head != null) { TreeGraphNode hwn = head.headWordNode(); if (hwn == null && head.isLeaf()) { // below us is a leaf setHeadWordNode(head); } else { setHeadWordNode(hwn); } } else { log.info("Head is null: " + this); } } } /** * Return the node containing the head word for this node (or * <code>null</code> if none), as recorded in this node's {@link * CoreLabel <code>CoreLabel</code>}. (In contrast to {@link * edu.stanford.nlp.ling.CategoryWordTag * <code>CategoryWordTag</code>}, we store head words and head * tags as references to nodes, not merely as * <code>String</code>s.) * * @return the node containing the head word for this node */ public TreeGraphNode headWordNode() { return headWordNode; } /** * Store the node containing the head word for this node by * storing it in this node's {@link CoreLabel * <code>CoreLabel</code>}. (In contrast to {@link * edu.stanford.nlp.ling.CategoryWordTag * <code>CategoryWordTag</code>}, we store head words and head * tags as references to nodes, not merely as * <code>String</code>s.) * * @param hwn the node containing the head word for this node */ private void setHeadWordNode(final TreeGraphNode hwn) { this.headWordNode = hwn; } /** * Safely casts an <code>Object</code> to a * <code>TreeGraphNode</code> if possible, else returns * <code>null</code>. * * @param t any <code>Object</code> * @return <code>t</code> if it is a <code>TreeGraphNode</code>; * <code>null</code> otherwise */ private static TreeGraphNode safeCast(Object t) { if (t == null || !(t instanceof TreeGraphNode)) { return null; } return (TreeGraphNode) t; } /** * Checks the node's ancestors to find the highest ancestor with the * same <code>headWordNode</code> as this node. */ public TreeGraphNode highestNodeWithSameHead() { TreeGraphNode node = this; while (true) { TreeGraphNode parent = safeCast(node.parent()); if (parent == null || parent.headWordNode() != node.headWordNode()) { return node; } node = parent; } } // extra class guarantees correct lazy loading (Bloch p.194) private static class TreeFactoryHolder { static final TreeGraphNodeFactory tgnf = new TreeGraphNodeFactory(); private TreeFactoryHolder() { } } /** * Returns a <code>TreeFactory</code> that produces * <code>TreeGraphNode</code>s. The <code>Label</code> of * <code>this</code> is examined, and providing it is not * <code>null</code>, a <code>LabelFactory</code> which will * produce that kind of <code>Label</code> is supplied to the * <code>TreeFactory</code>. If the <code>Label</code> is * <code>null</code>, a * <code>CoreLabel.factory()</code> will be used. The factories * returned on different calls are different: a new one is * allocated each time. * * @return a factory to produce treegraphs */ @Override public TreeFactory treeFactory() { LabelFactory lf; if (label() != null) { lf = label().labelFactory(); } else { lf = CoreLabel.factory(); } return new TreeGraphNodeFactory(lf); } /** * Return a <code>TreeFactory</code> that produces trees of type * <code>TreeGraphNode</code>. The factory returned is always * the same one (a singleton). * * @return a factory to produce treegraphs */ public static TreeFactory factory() { return TreeFactoryHolder.tgnf; } /** * Return a <code>TreeFactory</code> that produces trees of type * <code>TreeGraphNode</code>, with the <code>Label</code> made * by the supplied <code>LabelFactory</code>. The factory * returned is a different one each time. * * @param lf The <code>LabelFactory</code> to use * @return a factory to produce treegraphs */ public static TreeFactory factory(LabelFactory lf) { return new TreeGraphNodeFactory(lf); } /** * Returns a <code>String</code> representation of this node and * its subtree with one node per line, indented according to * <code>indentLevel</code>. * * @param indentLevel how many levels to indent (0 for root node) * @return <code>String</code> representation of this subtree */ public String toPrettyString(int indentLevel) { StringBuilder buf = new StringBuilder("\n"); for (int i = 0; i < indentLevel; i++) { buf.append(" "); } if (children == null || children.length == 0) { buf.append(label.toString(CoreLabel.OutputFormat.VALUE_INDEX_MAP)); } else { buf.append('(').append(label.toString(CoreLabel.OutputFormat.VALUE_INDEX_MAP)); for (TreeGraphNode child : children) { buf.append(' ').append(child.toPrettyString(indentLevel + 1)); } buf.append(')'); } return buf.toString(); } /** * Returns a <code>String</code> representation of this node and * its subtree as a one-line parenthesized list. * * @return <code>String</code> representation of this subtree */ public String toOneLineString() { StringBuilder buf = new StringBuilder(); if (children == null || children.length == 0) { buf.append(label); } else { buf.append('(').append(label); for (TreeGraphNode child : children) { buf.append(' ').append(child.toOneLineString()); } buf.append(')'); } return buf.toString(); } @Override public String toString() { return toString(CoreLabel.DEFAULT_FORMAT); } public String toString(CoreLabel.OutputFormat format) { return label.toString(format); } /** * Just for testing. */ public static void main(String[] args) { try { TreeReader tr = new PennTreeReader(new StringReader("(S (NP (NNP Sam)) (VP (VBD died) (NP (NN today))))"), new LabeledScoredTreeFactory()); Tree t = tr.readTree(); System.out.println(t); TreeGraphNode tgn = new TreeGraphNode(t, (TreeGraphNode) null); System.out.println(tgn.toPrettyString(0)); EnglishGrammaticalStructure gs = new EnglishGrammaticalStructure(tgn); System.out.println(tgn.toPrettyString(0)); tgn.percolateHeads(new SemanticHeadFinder()); System.out.println(tgn.toPrettyString(0)); } catch (Exception e) { log.info("Horrible error: " + e); e.printStackTrace(); } } // Automatically generated by Eclipse private static final long serialVersionUID = 5080098143617475328L; }