package edu.berkeley.nlp.syntax; import edu.berkeley.nlp.ling.HeadFinder; import edu.berkeley.nlp.ling.CollinsHeadFinder; import edu.berkeley.nlp.util.Pair; import java.util.List; import java.util.ArrayList; import java.io.StringReader; /** * Created by IntelliJ IDEA. * User: aria42 * Date: Oct 25, 2008 * Time: 4:04:53 PM */ public class RichLabel { private String headWord; private String headTag; private int start; private int stop; private int headIndex; private String label; private Tree<String> origNode; public int getSpanSize() { return stop-start; } public int getHeadIndex() { return headIndex; } public void setHeadIndex(int headIndex) { this.headIndex = headIndex; } public String getHeadWord() { return headWord; } public void setHeadWord(String headWord) { this.headWord = headWord; } public String getHeadTag() { return headTag; } public void setHeadTag(String headTag) { this.headTag = headTag; } public int getStart() { return start; } public void setStart(int start) { this.start = start; } public int getStop() { return stop; } public void setStop(int stop) { this.stop = stop; } public String getLabel() { return label; } public void setLabel(String label) { this.label = label; } public Tree<String> getOriginalNode() { return origNode; } public void setOriginalNode(Tree<String> origNode) { this.origNode = origNode; } @Override public String toString() { return String.format("%s(%s[%d]-%s)[%d,%d]",label,headWord,headIndex,headTag,start,stop); } private static final CollinsHeadFinder cf = new CollinsHeadFinder(); public static Tree<RichLabel> getRichTree(Tree<String> tree) { return getRichTree(tree, cf); } public static Tree<RichLabel> getRichTree(Tree<String> tree, HeadFinder headFinder) { return buildRecursive(tree,headFinder, 0); } private static Pair<String,String> getHeadWordTag(Tree<String> tree, HeadFinder headFinder) { if (tree.isPreTerminal()) { Tree<String> term = tree.getChildren().get(0); return Pair.newPair(term.getLabel(),tree.getLabel()); } if (tree.isLeaf()) { return Pair.newPair(tree.getLabel(),null); } Tree<String> head = headFinder.determineHead(tree); return getHeadWordTag(head,headFinder); } private static Tree<RichLabel> buildRecursive(Tree<String> tree, HeadFinder headFinder, int start) { RichLabel label = new RichLabel(); label.setStart(start); label.setStop(start + tree.getYield().size()); label.setLabel(tree.getLabel()); label.setOriginalNode(tree); Pair<String,String> headWordTagPair = getHeadWordTag(tree,headFinder); label.setHeadWord(headWordTagPair.getFirst()); label.setHeadTag(headWordTagPair.getSecond()); int offset = start; List<Tree<RichLabel>> richChildren = new ArrayList<Tree<RichLabel>>(); for (Tree<String> child : tree.getChildren()) { Tree<RichLabel> richChild = buildRecursive(child,headFinder,offset); richChildren.add(richChild); offset += child.getYield().size(); } // Head Index if (tree.isPhrasal()) { Tree<String> headChild = headFinder.determineHead(tree); for (Tree<RichLabel> child: richChildren) { if (child.getLabel().origNode == headChild) { label.setHeadIndex(child.getLabel().getHeadIndex()); } } } else { label.setHeadIndex(label.start); } return new Tree<RichLabel>(label,richChildren); } public static void main(String[] args) { String tStr = "((S (NP (DT The) (NN man)) (VP (VBD ran) (PP (IN down) (NP (DT the) (NNS stairs))))))"; Tree<String> t = new Trees.PennTreeReader(new StringReader(tStr)).next(); System.out.println("Rich Tree: " + getRichTree(t, new CollinsHeadFinder())); } }