/** * */ package edu.berkeley.nlp.PCFGLA; import java.util.AbstractCollection; import java.util.Iterator; import java.util.List; import java.util.ArrayList; import edu.berkeley.nlp.syntax.Tree; import edu.berkeley.nlp.syntax.StateSet; import edu.berkeley.nlp.util.Numberer; /** * Essentially equivalent to a List<Tree<StateSet>>, but each Tree<StateSet> is re-built every time * from the corresponding Tree<String>. This saves a lot of memory at the expense of some time. * Most of the code is contained in the subclass StringTreeListIterator. * * Beware of the behavior of hasNext(), which deallocates the current tree (the last one returned * by next()). This is PRESUMABLY when the current tree is no longer needed, but be careful. * * @author Romain Thibaux */ public class StateSetTreeList extends AbstractCollection<Tree<StateSet>> { List<Tree<StateSet>> trees; static short zero = 0, one = 1; /* * Allocate the inside and outside score arrays for the whole tree */ void allocate(Tree<StateSet> tree) { tree.getLabel().allocate(); for (Tree<StateSet> child : tree.getChildren()) { allocate(child); } } /* * Deallocate the inside and outside score arrays for the whole tree */ void deallocate(Tree<StateSet> tree) { tree.getLabel().deallocate(); for (Tree<StateSet> child : tree.getChildren()) { deallocate(child); } } /* * create a deep copy of this object */ public StateSetTreeList copy(){ StateSetTreeList copy = new StateSetTreeList(); for (Tree<StateSet> tree : trees){ copy.add(copyTree(tree)); } return copy; } /** * @param tree * @return */ private Tree<StateSet> copyTree(Tree<StateSet> tree) { ArrayList<Tree<StateSet>> newChildren = new ArrayList<Tree<StateSet>>(tree.getChildren().size()); for (Tree<StateSet> child : tree.getChildren()) { newChildren.add(copyTree(child)); } return new Tree<StateSet>(tree.getLabel().copy(), newChildren); } public class StateSetTreeListIterator implements Iterator<Tree<StateSet>> { Iterator<Tree<StateSet>> stringTreeListIterator; Tree<StateSet> currentTree; public StateSetTreeListIterator() { stringTreeListIterator = trees.iterator(); currentTree = null; } public boolean hasNext() { // A somewhat crappy API, the tree is deallocated when hasNext() is called, // which is PRESUMABLY when the current tree is no longer needed. if (currentTree != null) { deallocate(currentTree); } return stringTreeListIterator.hasNext(); } public Tree<StateSet> next() { currentTree = stringTreeListIterator.next(); //allocate(currentTree); return currentTree; } public void remove() { stringTreeListIterator.remove(); } } /** * * @param trees * @param numStates * @param allSplitTheSame * This should be true only if all states are being split the same * number of times. This number is taken from numStates[0]. * @param tagNumberer * @param dontSplitTags */ public StateSetTreeList(List<Tree<String>> trees, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer) { this.trees = new ArrayList<Tree<StateSet>>(); for (Tree<String> tree : trees) { this.trees.add(stringTreeToStatesetTree(tree, numStates, allSplitTheSame, tagNumberer)); tree = null; } } public StateSetTreeList(StateSetTreeList treeList, short[] numStates, boolean constant) { this.trees = new ArrayList<Tree<StateSet>>(); for (Tree<StateSet> tree : treeList.trees) { this.trees.add(resizeStateSetTree(tree,numStates,constant)); } } public StateSetTreeList() { this.trees = new ArrayList<Tree<StateSet>>(); } public boolean add(Tree<StateSet> tree){ return trees.add(tree); } public Tree<StateSet> get(int i){ return trees.get(i); } public int size() { return trees.size(); } public boolean isEmpty() { return trees.isEmpty(); } /* * An iterator over the StateSet trees (which are re-built on the fly) */ public Iterator<Tree<StateSet>> iterator() { return new StateSetTreeListIterator(); } /** * Convert a single Tree[String] to Tree[StateSet] * * @param tree * @param numStates * @param tagNumberer * @return */ public static Tree<StateSet> stringTreeToStatesetTree (Tree<String> tree, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer){ Tree<StateSet> result = stringTreeToStatesetTree(tree,numStates,allSplitTheSame,tagNumberer,false,0,tree.getYield().size()); // set the positions properly: List<StateSet> words = result.getYield(); //for all words in sentence for (short position = 0; position < words.size(); position++) { words.get(position).from = position; words.get(position).to = (short)(position + 1); } return result; } private static Tree<StateSet> stringTreeToStatesetTree (Tree<String> tree, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer, boolean splitRoot, int from, int to){ if (tree.isLeaf()) { StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to); return new Tree<StateSet>(newState); } short label = (short)tagNumberer.number(tree.getLabel()); if (label<0) label =0; // System.out.println(label + " " +tree.getLabel()); if (label>=numStates.length){ // System.err.println("Have never seen this state before: "+tree.getLabel()); // StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to); // return new Tree<StateSet>(newState); } short nodeNumStates = (allSplitTheSame||numStates.length<=label) ? numStates[0] : numStates[label]; if (!splitRoot) nodeNumStates = 1; StateSet newState = new StateSet(label, nodeNumStates, null, (short)from , (short)to); Tree<StateSet> newTree = new Tree<StateSet>(newState); List<Tree<StateSet>> newChildren = new ArrayList<Tree<StateSet>>(); for (Tree<String> child : tree.getChildren()) { short length = (short) child.getYield().size(); Tree<StateSet> newChild = stringTreeToStatesetTree(child, numStates, allSplitTheSame, tagNumberer, true, from, from+length); from += length; newChildren.add(newChild); } newTree.setChildren(newChildren); return newTree; } private static Tree<StateSet> resizeStateSetTree (Tree<StateSet> tree, short[] numStates, boolean constant) { if (tree.isLeaf()) { return tree; } short state = tree.getLabel().getState(); short newNumStates = constant ? numStates[0] : numStates[state]; StateSet newState = new StateSet(tree.getLabel(), newNumStates); Tree<StateSet> newTree = new Tree<StateSet>(newState); List<Tree<StateSet>> newChildren = new ArrayList<Tree<StateSet>>(); for (Tree<StateSet> child : tree.getChildren()) { newChildren.add(resizeStateSetTree(child, numStates, constant)); } newTree.setChildren(newChildren); return newTree; } /** * @param trainTrees * @param tagNumberer */ public static void initializeTagNumberer(List<Tree<String>> trees, Numberer tagNumberer) { short[] nSub = new short[2]; nSub[0] = 1; nSub[1] = 1; for (Tree<String> tree : trees) { Tree<StateSet> tmp = stringTreeToStatesetTree(tree, nSub, true, tagNumberer); } } }