/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.decoder.hypergraph; import joshua.corpus.vocab.SymbolTable; import joshua.decoder.JoshuaConfiguration; import joshua.decoder.hypergraph.HyperGraph; import java.util.HashMap; /** * during the pruning process, many Item/Deductions may not be * explored at all due to the early-stop in pruning_deduction * * @author Zhifei Li, <zhifei.work@gmail.com> * @version $LastChangedDate: 2010-02-01 08:21:59 -0600 (Mon, 01 Feb 2010) $ */ public class HyperGraphPruning extends TrivialInsideOutside { HashMap<HGNode,Boolean> processedNodesTbl = new HashMap<HGNode,Boolean>(); double bestLogProb;//viterbi unnormalized log prob in the hypergraph boolean ViterbiPruning = true;//Viterbi or Posterior pruning boolean fixThresholdPruning = true; double THRESHOLD_GENERAL = 10;//if the merit is worse than the best_log_prob by this number, then prune double THRESHOLD_GLUE = 10;//if the merit is worse than the best_log_prob by this number, then prune int numSurvivedEdges = 0; int numSurvivedNodes = 0; int glueGrammarOwner=0;//TODO public HyperGraphPruning(SymbolTable symbolTable, boolean fixThreshold, double thresholdGeneral, double thresholdGlue){ fixThresholdPruning = fixThreshold; THRESHOLD_GENERAL = thresholdGeneral; THRESHOLD_GLUE = thresholdGlue; glueGrammarOwner = symbolTable.addTerminal(JoshuaConfiguration.glue_owner);//TODO } public void clearState(){ processedNodesTbl.clear(); super.clearState(); } // ######################### pruning here ############## public void pruningHG(HyperGraph hg) { runInsideOutside(hg, 0, 1, 1.0);//sum-max, log-semiring if (fixThresholdPruning) { pruningHGHelper(hg); super.clearState(); } else { throw new RuntimeException("wrong call"); } } private void pruningHGHelper(HyperGraph hg) { this.bestLogProb = getLogNormalizationConstant();//set the best_log_prob numSurvivedEdges = 0; numSurvivedNodes = 0; processedNodesTbl.clear(); pruningNode(hg.goalNode); //clear up processedNodesTbl.clear(); System.out.println("Item suvived ratio: "+ numSurvivedNodes*1.0/hg.numNodes + " = " + numSurvivedNodes + "/" + hg.numNodes); System.out.println("Deduct suvived ratio: "+ numSurvivedEdges*1.0/hg.numEdges + " = " + numSurvivedEdges + "/" + hg.numEdges); } private void pruningNode(HGNode it) { if (processedNodesTbl.containsKey(it)) return; processedNodesTbl.put(it,true); boolean shouldSurvive = false; //### recursive call on each deduction for (int i = 0; i < it.hyperedges.size(); i++) { HyperEdge dt = it.hyperedges.get(i); boolean survived = pruningEdge(dt, it);//deduction-specifc operation if (survived) { shouldSurvive = true; // at least one deduction survive } else { it.hyperedges.remove(i); i--; } } //TODO: now we simply remove the pruned deductions, but in general, we may want to update the variables mainted in the item (e.g., best_deduction); this depends on the pruning method used /*by defintion: "should_surive==false" should be impossible, since if I got called, then my upper-deduction must survive, then i will survive * because there must be one way to reach me from lower part in order for my upper-deduction survive*/ if (! shouldSurvive) { throw new RuntimeException("item explored but does not survive"); //TODO: since we always keep the best_deduction, this should never be true } else { numSurvivedNodes++; } } //if survive, return true //best-deduction is always kept private boolean pruningEdge(HyperEdge dt, HGNode parent) { /**TODO: theoretically, if an item is get called, then its best deduction should always be kept even just by the threshold-checling. * In reality, due to precision of Double, the threshold-checking may not be perfect*/ if (dt != parent.bestHyperedge) { // best deduction should always survive if the Item is get called //### prune? if (shouldPruneHyperedge(dt, parent)) { return false; // early stop } } //### still survive, recursive call all my ant-items if (null != dt.getAntNodes()) { for (HGNode ant_it : dt.getAntNodes()) { pruningNode(ant_it); // recursive call on each ant item, note: the ant_it will not be pruned as I need it } } //### if get to here, then survive; remember: if I survive, then my upper-item must survive numSurvivedEdges++; //System.err.println("survived"); return true; // survive } private boolean shouldPruneHyperedge(HyperEdge dt, HGNode parent) { //### get merit double postLogProb = getEdgeUnormalizedPosteriorLogProb(dt, parent); //System.err.println(dt.toString()); if (dt.getRule() != null && dt.getRule().getOwner() == glueGrammarOwner && dt.getRule().getArity() == 2) { // specicial rule: S->S X //TODO //System.err.println((postLogProb + " " + this.bestLogProb + " " + THRESHOLD_GLUE)); return (this.bestLogProb - postLogProb > THRESHOLD_GLUE); } else { //System.err.println((postLogProb + " " + this.bestLogProb + " " + THRESHOLD_GENERAL)); return (this.bestLogProb - postLogProb > THRESHOLD_GENERAL); } } }