ExponentiatedGradientTrainer.java example

Explorer
MinorThird-master
/*
 * Created on Oct 28, 2005
 *   This implements the exponentiated gradient optimization algorithm
 *   for training graphical models as described in 
 *   @incollection{NIPS2005_621,
 *      author = {Peter L. {Bartlett} and Michael {Collins} and Ben {Taskar} and David {McAllester}},
 *      title = {Exponentiated Gradient Algorithms for Large-margin Structured Classification},
 *      booktitle = {Advances in Neural Information Processing Systems 17},
 *      year = {2005}}
 *      
 */
package iitb.CRF;

import cern.colt.function.tdouble.DoubleFunction;
import cern.colt.matrix.tdouble.DoubleMatrix1D;
import cern.colt.matrix.tdouble.DoubleMatrix2D;
import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D;

/**
 * 
 * @author Sunita Sarawagi
 * @since 1.3
 * @version 1.3
 */
public class ExponentiatedGradientTrainer extends Trainer {
    double C = 1000;
    double eta = 1/C; // the learning rate.
    double zt[]; // as in the paper
    double wt[]; // learnt parameter values
    DoubleMatrix2D fMi_YY;
    Viterbi viterbi;
    static class AddConstant implements DoubleFunction {
        public double addend = 1.0;
        public double apply(double a) {return a + addend;}
    };
    static AddConstant addConstant = new AddConstant();
    public ExponentiatedGradientTrainer(CrfParams p) {
        super(p);
    }
    @Override
    protected void init(CRF model, DataIter data, double[] l) {
        super.init(model, data, l);
        zt = new double[numF];
        wt = lambda; // this is important to retain since lambda is passed to CRF.
        fMi_YY = new DenseDoubleMatrix2D(Mi_YY.rows(),Mi_YY.columns());
        logProcessing = true;
        viterbi = new LossAugmentedViterbi(model,1);
    }
    protected void doTrain() {
        icall=0;
        params.invSigmaSquare = 0;
        for (int j = 0 ; j < zt.length ; j++) {
            zt[j] = getInitValue();
        }
        double prevObjValue = Double.MAX_VALUE;
        do {
            double f = computeFunctionGradient(zt,gradLogli); 
            for (int j = 0 ; j < zt.length ; j++) {
                wt[j] = (C*gradLogli[j]);
                zt[j] += wt[j];
            } 
            if ((evaluator != null) && (evaluator.evaluate() == false))
                break;
            icall += 1;
            
            // calculate the value of the objective 0.5*wnorm + C\sum_i\xi
            double objective = 0.5*normSquare(wt);
            double sumXi = 0;
            diter.startScan();
            for (int numRecord = 0; diter.hasNext(); numRecord++) {
                double corrScore = viterbi.viterbiSearch(diter.next(),wt,true);
                double topViterbiScore = viterbi.getBestSoln(0).score;
                if (corrScore < topViterbiScore)
                    sumXi += C*(topViterbiScore-corrScore);
            }
            objective += sumXi;
            System.out.println("Primal objective " + objective + " norm^sq " + normSquare(wt) + " sumXi " + sumXi);
 //           if (prevObjValue-objective < params.epsForConvergence) break;
            prevObjValue = objective;
        } while (icall <= params.maxIters);
    }
    private double normSquare(double[] vec) {
        double v = norm(vec);
        return v*v;
    }
    protected boolean computeLogMiTrainMode(FeatureGenerator featureGen, double lambda[], 
            DataSequence dataSeq, int i, 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, boolean takeExp, boolean reuseM, boolean initMDone) {
        boolean retVal = Trainer.computeLogMi(featureGen,lambda,dataSeq,i,fMi_YY,Ri_Y,false,reuseM,initMDone);
        int iterNum = icall;
        for (int c = 0; c < Ri_Y.size();c++) {
            int loss =  (dataSeq.y(i)!=c)?1:0;
            Ri_Y.set(c,eta*C*(Ri_Y.get(c)+iterNum*loss));
            /*
             * don't need loss for this since it is already accounted for in Ri_Y
             */
            if (i > 0) {
                for (int r = 0; r < Mi_YY.rows(); r++) {
                    Mi_YY.set(r,c,eta*C*(fMi_YY.get(r,c)));
                }
            }
        }
        return retVal;
    }
    class LossAugmentedViterbi extends Viterbi {
        /**
		 * 
		 */
		private static final long serialVersionUID = 3806943864517186914L;
		LossAugmentedViterbi(CRF model, int bs) {
            super(model, bs);
        }
        protected void computeLogMi(DataSequence dataSeq, int i, int ell, double lambda[]) {
            Trainer.computeLogMi(model.featureGenerator,lambda,dataSeq,i,Mi,Ri,false);
            for (int c = 0; c < Ri.size();c++) {
               Ri.set(c,Ri.get(c)+((dataSeq.y(i)!=c)?1:0));
        }
    }
    }
}