/** * Copyright (c) 2007, Regents of the University of Colorado All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. Redistributions in binary * form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided * with the distribution. Neither the name of the University of Colorado at * Boulder nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package clear.treebank; import clear.dep.DepLib; import clear.dep.DepNode; import clear.dep.DepTree; import clear.dep.srl.SRLHead; import clear.propbank.PBLib; import clear.propbank.PBLoc; import clear.util.tuple.JIntIntTuple; import java.util.ArrayList; import java.util.Collections; /** * Tree as in Penn Treebank. * * @author Jinho D. Choi <b>Last update:</b> 1/27/2011 */ public class TBTree { /** * Pointer to the root node */ private TBNode nd_root; /** * Pointer to terminal nodes */ private ArrayList<TBNode> ls_terminal; /** * Initializes the tree. */ public TBTree() { ls_terminal = new ArrayList<>(); } /** * @return node in * <code>terminalId:height</code>. */ public TBNode getNode(int terminalId, int height) { TBNode node; if ((node = getTerminalNode(terminalId)) != null) { for (int i = 0; i < height; i++) { if (node.getParent() == null) { return null; } node = node.getParent(); } } return node; } public TBNode getNode(PBLoc pbLoc) { return getNode(pbLoc.terminalId, pbLoc.height); } /** * @return the root node. */ public TBNode getRootNode() { return nd_root; } /** * Sets the root node of this tree. */ public void setRootNode(TBNode root) { nd_root = root; } /** * @return terminal node with the terminal ID. If the terminal ID is not * within the range, return null. */ public TBNode getTerminalNode(int terminalId) { if (0 <= terminalId && terminalId < ls_terminal.size()) { return ls_terminal.get(terminalId); } return null; } /** * @return list of terminal nodes. */ public ArrayList<TBNode> getTerminalNodes() { return ls_terminal; } /** * Adds a terminal node. */ public void addTerminalNode(TBNode node) { ls_terminal.add(node); } /** * Assigns the PropBank locations to all nodes. */ public void setPBLocs() { TBNode parent; int height; for (TBNode node : ls_terminal) { parent = node; height = 0; node.pbLoc = new PBLoc(null, node.terminalId, height++); while ((parent = parent.getParent()) != null) { if (parent.pbLoc != null) { break; } parent.pbLoc = new PBLoc("", node.terminalId, height++); } } } /** * @return antecedent node indicated by the co-index. If there is no such * node, return null. */ public TBNode getAntecedent(int coIndex) { return getAntecedentAux(nd_root, coIndex); } /** * Called from {@link TBTree#getAntecedent(int)}. */ public TBNode getAntecedentAux(TBNode curr, int coIndex) { if (curr.coIndex == coIndex) { return curr; } if (!curr.isPhrase()) { return null; } for (TBNode child : curr.getChildren()) { TBNode node = getAntecedentAux(child, coIndex); if (node != null) { return node; } } return null; } /** * Finds antecedents of all empty categories and complementizer. */ public void setAntecedents() { setAntecedentsEmptyCategory(); setAntecedentsComplementizer(nd_root); } /** * Called from {@link TBTree#setAntecedents()}. */ private void setAntecedentsEmptyCategory() { TBNode ante; int coIndex; for (TBNode node : ls_terminal) { coIndex = node.getEmptyCategoryCoIndex(); if (coIndex == -1) { continue; } ante = getAntecedent(coIndex); if (ante == null) { // if (!node.isForm("\\*PRO\\*.*") && !node.getParent().isPos("WH.*")) System.err.println("Missing antecedent " + coIndex + ": " + node.form);//+"\n"+toTree()); } else { ante.pbLoc.type = PBLib.PROP_OP_ANTE; node.antecedent = ante; } } } /** * Called from {@link TBTree#setAntecedents()}. */ private void setAntecedentsComplementizer(TBNode curr) { if (!curr.isPhrase()) { return; } for (TBNode child : curr.getChildren()) { if (curr.isPos(TBEnLib.POS_SBAR) && child.isPos("WH.*")) { TBNode parent = curr.getParent(); if (parent != null && parent.isPos("NP|VP")) { ArrayList<TBNode> siblings = parent.getChildren(); TBNode ante; for (int i = curr.childId - 1; i >= 0; i--) { ante = siblings.get(i); if ((parent.isPos("NP") && ante.isPos("NP")) || (parent.isPos("VP") && ante.isPos("PP"))) { TBNode comp = child.getComplementizer(); if (comp != null) { ante.pbLoc.type = PBLib.PROP_OP_ANTE; comp.pbLoc.type = PBLib.PROP_OP_COMP; comp.antecedent = ante; } break; } } } } setAntecedentsComplementizer(child); } } /** * @return tree representation in Penn Treebank style. */ public String toTree() { return "(" + toTreeAux(nd_root, "") + ")"; } /** * Called from {@link TBTree#toTreeAux(TBNode, String).} */ private String toTreeAux(TBNode node, String indent) { String str = indent + "(" + node.getTags(); if (!node.isPhrase()) { return str += " " + node.form + ")"; } for (TBNode child : node.getChildren()) { str += "\n" + toTreeAux(child, indent + " "); } return str + ")"; } public boolean isUnder(int terminalId, String phrase) { TBNode curr; for (int i = 1; i < 100; i++) { curr = getNode(terminalId, i); if (curr == null) { return false; } if (curr.isPos(phrase)) { return true; } } return false; } public boolean isLastRNR(TBNode node) { for (int i = node.terminalId + 1; i < ls_terminal.size(); i++) { if (ls_terminal.get(i).form.equals((node.form))) { return false; } } return true; } public void mapSRLTree(DepTree tree) { mapSRLTreeAux(tree, getRootNode()); mapSRLTreeAuxClean(tree); } private void mapSRLTreeAux(DepTree tree, TBNode tNode) { if (tNode.headId >= 0) { DepNode dNode = tree.get(tNode.headId + 1); if (tNode.pb_heads != null) { dNode.addSRLHeads(tNode.pb_heads); Collections.sort(dNode.srlInfo.heads); } else if (tNode.rolesetId != null) { dNode.setRolesetId(tNode.rolesetId); } if (tNode.antecedent != null) { TBNode ante = getNode(tNode.antecedent.pbLoc); dNode.antecedent = tree.get(ante.headId + 1); } } if (tNode.isPhrase()) { for (TBNode child : tNode.getChildren()) { mapSRLTreeAux(tree, child); } } } private void mapSRLTreeAuxClean(DepTree tree) { ArrayList<SRLHead> list; DepNode node, head; for (int i = 1; i < tree.size(); i++) { node = tree.get(i); head = tree.get(node.headId); list = new ArrayList<>(); while (!head.isRoot()) { for (SRLHead tmp : head.srlInfo.heads) { if (!tree.isAncestor(head.id, tmp.headId)) { list.add(tmp); } else if (tree.get(tmp.headId).isDeprel(DepLib.DEPREL_PRN)) { list.add(tmp); } } head = tree.get(head.headId); } node.removeSRLHeads(list); } } public String formsWithoutSpace() { StringBuilder build = new StringBuilder(); for (TBNode node : ls_terminal) { if (node.isEmptyCategory()) { continue; } node.form = node.form.replaceAll("\\\\/", "/"); build.append(node.form); } return build.toString(); } public ArrayList<JIntIntTuple> getCharIdToTerminalIdMap() { ArrayList<JIntIntTuple> map = new ArrayList<>(); int length = 0; for (TBNode node : ls_terminal) { if (node.isEmptyCategory()) { continue; } map.add(new JIntIntTuple(length, node.terminalId)); length += node.form.length(); } return map; } }