package joshua.discriminative.training.risk_annealer.hypergraph.deprecated; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; import joshua.discriminative.semiring_parsingv2.applications.min_risk_da.MinRiskDADenseFeaturesSemiringParser; import joshua.discriminative.training.risk_annealer.GradientComputer; import joshua.discriminative.training.risk_annealer.hypergraph.FeatureForest; /** * (1) feature weights are stored in an array, the position is the featureID, implicitly * (2) features have a tbl mappting from String to Integers * (3) each edge stores a tbl which maps featureID to featureScore * (4) semiring parser does not see feature-string, but only featureID and featureScore * (5) semiring parser returns a tbl of gradients, which maps featureID to gradient * * * */ @Deprecated public class HGRiskGradientComputerBasedOnSemringV2 extends GradientComputer { private int numSentence; private boolean fixFirstFeature = false; private FeatureForestFactory hgFactory; private double sumGain = 0; //negative risk private double sumEntropy = 0; int numCalls = 0; /** Logger for this class. */ private static final Logger logger = Logger.getLogger(HGRiskGradientComputerBasedOnSemringV2.class.getName()); public HGRiskGradientComputerBasedOnSemringV2(int numSent_, int numFeatures_, double gainFactor_, double scale_, double temperature_, boolean computeScalingGradient, boolean fixFirstFeature_, FeatureForestFactory hgFactory_){ super(numFeatures_, gainFactor_, scale_, temperature_, computeScalingGradient); this.numSentence = numSent_; this.fixFirstFeature = fixFirstFeature_; this.hgFactory = hgFactory_; System.out.println("use HGRiskGradientComputerBasedOnSemringV2===="); } @Override public void reComputeFunctionValueAndGradient(double[] weights) { //==set up the current value double[] weights2 = weights; if(shouldComputeGradientForScalingFactor){//first weight is for scaling parameter //==sanity check if(weights.length!=numFeatures+1){System.out.println("number of weights is not right"); System.exit(1);} scalingFactor = weights[0];//!!!!!!!!!! big bug: old code does not have this!!!!!!!!!!!!!!! //System.out.println("scaling is " + annealing_scale + "; weight is " + weights[0]); weights2 = new double[numFeatures]; for(int i=0; i<numFeatures; i++) weights2[i] = weights[i+1]; } //==reset values for(int i=0; i<numFeatures; i++) gradientsForTheta[i] = 0; if(shouldComputeGradientForScalingFactor) gradientForScalingFactor = 0; functionValue = 0; sumGain =0; sumEntropy = 0; /* System.out.println("=====optimizeScale: "+ shouldComputeGradientForScalingFactor + "; scalingFactor: "+ scalingFactor + "; temperature: " + temperature); System.out.print("=====weights: "); for(int i=0; i<weights2.length; i++) System.out.print(" " + weights2[i]); System.out.print("\n"); */ //== compute gradients and function value hgFactory.startLoop(); reComputeFunctionValueAndGradientHelper(weights2); hgFactory.endLoop(); printLastestStatistics(); numCalls++; //logger.info("numTimesCalled=" + numCalls); } @Override public void printLastestStatistics() { //logger.info("one iteration (i.e. computing gradient and function value)......."); System.out.println("Func value=" + functionValue + "=" + sumGain+"*"+ gainFactor + "+" +temperature +"*" +sumEntropy); /*System.out.print("GradientsV2 are: "); for(int i=0; i<numFeatures; i++){ System.out.print( gradientsForTheta[i]+" " ); } System.out.print("\n"); System.out.println("Gradient for scale is : " + gradientForScalingFactor); */ } private void reComputeFunctionValueAndGradientHelper(double[] weightsForTheta){ MinRiskDADenseFeaturesSemiringParser gradientSemiringParser = new MinRiskDADenseFeaturesSemiringParser(temperature); for(int sentID=0; sentID < numSentence; sentID ++){ //System.out.println("#Process sentence " + sent_id); FeatureForest fForest = hgFactory.nextHG(sentID); fForest.setFeatureWeights(weightsForTheta); fForest.setScale(scalingFactor); /** Based on a model and a test hypergraph (which provides the topology and feature/risk annotation), * compute the gradient. **/ //@todo: we should check if hg_test is a feature forest or not gradientSemiringParser.setHyperGraph(fForest); //== compute gradient and function value HashMap<Integer, Double> gradients = gradientSemiringParser.computeGradientForTheta(); double gradientForScalingFactor = 0; if(shouldComputeGradientForScalingFactor) gradientForScalingFactor = computeGradientForScalingFactor(gradients, weightsForTheta, scalingFactor); double funcVal = gradientSemiringParser.getFuncVal(); double risk = gradientSemiringParser.getRisk(); double entropy = gradientSemiringParser.getEntropy(); //== accumulate gradient and function value accumulateGradient(gradients, gradientForScalingFactor, weightsForTheta, funcVal, risk, entropy); if(sentID > 0 && sentID%1000==0){ logger.info("======processed sentID =" + sentID); } } } private double computeGradientForScalingFactor(HashMap<Integer, Double> gradientForTheta, double[] weightsForTheta, double scale){ double gradientForScale = 0; for(Map.Entry<Integer, Double> feature : gradientForTheta.entrySet()){ gradientForScale += weightsForTheta[feature.getKey()] * feature.getValue(); //System.out.println("**featureWeights[i]: " + featureWeights[i] + "; gradientForTheta[i]: " + gradientForTheta[i] + "; gradientForScale" + gradientForScale); } gradientForScale /= scale; //System.out.println("****gradientForScale" + gradientForScale + "; scale: " + scale ); if(Double.isNaN(gradientForScale)){System.out.println("gradient value for scaling is NaN"); System.exit(1);} //System.out.println("Gradient for scale is : " + gradientForScale); return gradientForScale; } public synchronized void accumulateGradient(HashMap<Integer, Double> gradients, double gradientForScalingFactor_, double[] weightsForTheta, double funcVal, double risk, double entropy){ for(Map.Entry<Integer, Double> feature : gradients.entrySet()){ gradientsForTheta[feature.getKey()] -= feature.getValue(); //we are maximizing, instead of minizing } if(this.fixFirstFeature)//do not tune the baseline feature gradientsForTheta[0]=0; if(shouldComputeGradientForScalingFactor) gradientForScalingFactor -= gradientForScalingFactor_;//we are maximizing, instead of minizing //== compute function value functionValue -= funcVal;//we are maximizing, instead of minizing sumGain -= risk; sumEntropy += entropy; } }