package joshua.decoder.chart_parser;
import java.util.ArrayList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.logging.Level;
import java.util.logging.Logger;
import joshua.decoder.Support;
/**(1) relative threshold pruning
* when the logP of a new edge (or an existing node) is worse than the best by a threshold, prune it
* (2) when the number of node is greater than a threshold, prune some nodes
* (3) maintain bestLogP and nodesHeap
* */
public class BeamPruner<Obj extends Prunable> {
/**Ideally, if the goodness of an object changes, it should be
* removed from the heap (linear time), and re-insert it (logN time). But, this is
* too expensive. So, instead, we will mark an object as dead, and simply add
* a new object with the updated goodness.
* */
//num of corrupted items in this.heapItems
private int qtyDeadItems = 0;
/** cutoff = bestItemLogP - relative_threshold */
private double cutoffLogP = Double.NEGATIVE_INFINITY;
private double relativeThreshold;
private int maxNumObjs;
/**MIN-HEAP, we put the worst-cost item at the top
* of the heap by manipulating the compare function
* The only purpose of nodesHeap tis to help decide which
* nodes should be removed during pruning.
*/
private PriorityQueue<Obj> nodesHeap;
//=new PriorityQueue<HGNode>(1, HGNode.negtiveCostComparator);
// ===============================================================
// Static fields
// ===============================================================
private static final double EPSILON = 0.000001;
private static final Logger logger =
Logger.getLogger(BeamPruner.class.getName());
public BeamPruner(PriorityQueue<Obj> nodesHeap, double relativeThreshold, int maxNumObjs){
this.nodesHeap = nodesHeap;
this.relativeThreshold = relativeThreshold;
this.maxNumObjs = maxNumObjs;
}
/**threshold cutoff pruning
* */
public boolean relativeThresholdPrune(double logP) {
return (logP <= this.cutoffLogP);
}
public int incrementDeadObjs(){
return ++ this.qtyDeadItems;
}
/**This will add the object, update the cutOff logP,
* and trigger pruningObjs*/
public List<Obj> addOneObjInHeapWithPrune(Obj obj){
this.nodesHeap.add(obj);
//System.out.println("Add: " + obj.getPruneLogP()+ "; " +((HGNode)obj).i + "; " + ((HGNode)obj).j + "; best= " + ((HGNode)obj).bestHyperedge.bestDerivationLogP);
updateCutoffLogP(obj.getPruneLogP());
List<Obj> prunedNodes = pruningObjs();
return prunedNodes;
}
/**This will add the object, update the cutOff logP,*/
public List<Obj> addOneObjInHeapWithoutPrune(Obj obj){
this.nodesHeap.add(obj);
//System.out.println("Add: " + obj.getPruneLogP()+ "; " +((HGNode)obj).i + "; " + ((HGNode)obj).j + "; best= " + ((HGNode)obj).bestHyperedge.bestDerivationLogP);
updateCutoffLogP(obj.getPruneLogP());
return null;
}
public double getCutoffLogP(){
return this.cutoffLogP;
}
/**pruning at the object level
**/
private List<Obj> pruningObjs() {
if (logger.isLoggable(Level.FINEST))
logger.finest(String.format("Pruning: heap size: %d; n_dead_items: %d", this.nodesHeap.size(),this.qtyDeadItems));
if (this.nodesHeap.size() == this.qtyDeadItems) {
this.nodesHeap.clear();
this.qtyDeadItems = 0;
/**since all these objects are already dead,
* we do not consider them prunned objectives, so return null*/
return null;
}
List<Obj> prunedObjs = new ArrayList<Obj>();
while (this.nodesHeap.size() - this.qtyDeadItems > maxNumObjs //bin limit pruning
/**This pruning is necessary as the bestLogP may have been changed after the object is
* inserted into the heap*/
|| relativeThresholdPrune( this.nodesHeap.peek().getPruneLogP() ) ) { // relative threshold pruning
Obj worstNode = this.nodesHeap.poll();
if ( worstNode.isDead() ) { //dead object
this.qtyDeadItems--;
} else {
prunedObjs.add(worstNode);
}
}
/**if the heap reaches its capacity, we will do more
* aggressive threshold pruning, by increase the cutoffLogP
* */
if (this.nodesHeap.size() - this.qtyDeadItems >= maxNumObjs) {
greedyUpdateCutoffLogP(this.nodesHeap.peek().getPruneLogP());
}
return prunedObjs;
}
private void updateCutoffLogP(double newLogP){
this.cutoffLogP =
Support.findMax(this.cutoffLogP, newLogP - relativeThreshold);
}
/**if the heap is already full, then we do more
* aggressive threshold pruning
* */
private void greedyUpdateCutoffLogP(double worstHeapLogP){
this.cutoffLogP = Support.findMax(
this.cutoffLogP,
worstHeapLogP - EPSILON);
}
}