SparseTrainer.java example

Explorer
MinorThird-master
package iitb.CRF;

import cern.colt.function.tdouble.*;
import cern.colt.matrix.tdouble.*;
import cern.colt.matrix.tdouble.impl.*;
/**
 *
 * @author Sunita Sarawagi
 *
 */ 

public class SparseTrainer extends Trainer {
    boolean logTrainer;
    static class  ExpFunc implements DoubleFunction {
        public double apply(double a) {return Math.exp(a);}
    };
    static class ExpFunc2D implements IntIntDoubleFunction {
        public double apply(int first, int second, double third) {
            return Math.exp(third);
        }
    };
    static class ExpFunc1D implements IntDoubleFunction {
        public double apply(int first, double third) {
            return Math.exp(third);
        }
    };
    
    static ExpFunc expFunc = new ExpFunc(); 
    static IntDoubleFunction expFunc1D = new ExpFunc1D();
    static IntIntDoubleFunction expFunc2D = new ExpFunc2D();
 
    public SparseTrainer(CrfParams p) {
        super(p);
        params = p;
        logTrainer = params.trainerType.equals("ll");
    }
    public void train(CRF model, DataIter data, double[] l, Evaluator eval) {
        init(model,data,l);
        evaluator = eval;
        if (params.debugLvl > 0) {
            Util.printDbg("Number of features :" + lambda.length);	    
        }
        doTrain();
    }
    /**
     * @param numY
     * @return
     */
    protected DoubleMatrix1D newLogDoubleMatrix1D(int numY) {
        if ((Boolean.valueOf(params.miscOptions.getProperty("sparse", "false"))).booleanValue())
            return new LogSparseDoubleMatrix1D(numY);
        return new LogDenseDoubleMatrix1D(numY);
    }
    protected DoubleMatrix2D newLogDoubleMatrix2D(int numR, int numC) {
        if ((Boolean.valueOf(params.miscOptions.getProperty("sparse", "false"))).booleanValue())
            return new LogSparseDoubleMatrix2D(numR, numC);
        return new LogDenseDoubleMatrix2D(numR, numC);
    }
    void initMatrices() {        
        if (!logTrainer) {
            Mi_YY = new SparseDoubleMatrix2D(numY,numY);
            Ri_Y = new SparseDoubleMatrix1D(numY);
            alpha_Y = new SparseDoubleMatrix1D(numY);
            newAlpha_Y = new SparseDoubleMatrix1D(numY);
            tmp_Y = new SparseDoubleMatrix1D(numY);
        } else {
            Mi_YY = newLogDoubleMatrix2D(numY,numY);
            Ri_Y = newLogDoubleMatrix1D(numY);
            alpha_Y = newLogDoubleMatrix1D(numY);
            newAlpha_Y = newLogDoubleMatrix1D(numY);
            tmp_Y = newLogDoubleMatrix1D(numY);
            
        }
    }

    /*
    protected double computeFunctionGradient(double lambda[], double grad[], double expFVals[]) {
        if (params.trainerType.equals("ll"))
            return computeFunctionGradientLL(lambda,  grad);
        double logli = 0;
        try {
            for (int f = 0; f < lambda.length; f++) {
                grad[f] = -1*lambda[f]*params.invSigmaSquare;
                logli -= ((lambda[f]*lambda[f])*params.invSigmaSquare)/2;
            }
            boolean doScaling = params.doScaling;
            
            diter.startScan();
            if (featureGenCache != null) featureGenCache.startDataScan();
            for (int numRecord = 0; diter.hasNext(); numRecord++) {
                DataSequence dataSeq = (DataSequence)diter.next();
                if (featureGenCache != null) featureGenCache.nextDataIndex();
                if (params.debugLvl > 1) {
                    Util.printDbg("Read next seq: " + numRecord + " logli " + logli);
                }
                alpha_Y.assign(1);
                for (int f = 0; f < lambda.length; f++)
                    ExpF[f] = 0;
                
                if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
                    beta_Y = new DoubleMatrix1D[2*dataSeq.length()];
                    for (int i = 0; i < beta_Y.length; i++)
                        beta_Y[i] = new SparseDoubleMatrix1D(numY);
                    
                    scale = new double[2*dataSeq.length()];
                }
                // compute beta values in a backward scan.
                // also scale beta-values to 1 to avoid numerical problems.
                scale[dataSeq.length()-1] = (doScaling)?numY:1;
                beta_Y[dataSeq.length()-1].assign(1.0/scale[dataSeq.length()-1]);
                for (int i = dataSeq.length()-1; i > 0; i--) {
                    if (params.debugLvl > 2) {
                        Util.printDbg("Features fired");
                        //featureGenerator.startScanFeaturesAt(dataSeq, i);    
                        //while (featureGenerator.hasNext()) { 
                        //Feature feature = featureGenerator.next();
                        //Util.printDbg(feature.toString());
                        //}
                    }
                    
                    // compute the Mi matrix
                    computeMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
                    tmp_Y.assign(beta_Y[i]);
                    tmp_Y.assign(Ri_Y,multFunc);
                    // RobustMath.Mult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false,edgeGen);
                    Mi_YY.zMult(tmp_Y, beta_Y[i-1]);
                    
                    // need to scale the beta-s to avoid overflow
                    scale[i-1] = doScaling?beta_Y[i-1].zSum():1;
                    if ((scale[i-1] < 1) && (scale[i-1] > -1))
                        scale[i-1] = 1;
                    constMultiplier.multiplicator = 1.0/scale[i-1];
                    beta_Y[i-1].assign(constMultiplier);
                }
                
                double thisSeqLogli = 0;
                for (int i = 0; i < dataSeq.length(); i++) {
                    // compute the Mi matrix
                    computeMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
                    // find features that fire at this position..
                    featureGenerator.startScanFeaturesAt(dataSeq, i);
                    
                    if (i > 0) {
                        //		    tmp_Y.assign(alpha_Y);
                        //		    RobustMath.Mult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
                        Mi_YY.zMult(alpha_Y, newAlpha_Y,1,0,true);
                        newAlpha_Y.assign(Ri_Y,multFunc); 
                    } else {
                        newAlpha_Y.assign(Ri_Y);     
                    }
                    while (featureGenerator.hasNext()) { 
                        Feature feature = featureGenerator.next();
                        int f = feature.index();
                        
                        int yp = feature.y();
                        int yprev = feature.yprev();
                        float val = feature.value();
                        if ((dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
                            grad[f] += val;
                            thisSeqLogli += val*lambda[f];
                        }
                        if (yprev < 0) {
                            ExpF[f] += newAlpha_Y.get(yp)*val*beta_Y[i].get(yp);
                        } else {
                            ExpF[f] += alpha_Y.get(yprev)*Ri_Y.get(yp)*Mi_YY.get(yprev,yp)*val*beta_Y[i].get(yp);
                        }
                    }
                    
                    alpha_Y.assign(newAlpha_Y);
                    // now scale the alpha-s to avoid overflow problems.
                    constMultiplier.multiplicator = 1.0/scale[i];
                    alpha_Y.assign(constMultiplier);
                    
                    if (params.debugLvl > 2) {
                        System.out.println("Alpha-i " + alpha_Y.toString());
                        System.out.println("Ri " + Ri_Y.toString());
                        System.out.println("Mi " + Mi_YY.toString());
                        System.out.println("Beta-i " + beta_Y[i].toString());
                    }
                    //badVector(alpha_Y);
                }
                double Zx = alpha_Y.zSum();
                //if (Zx == 0) {
                //Zx = (Double.MIN_VALUE*100000000);
                //}
                thisSeqLogli -= log(Zx);
                // correct for the fact that alpha-s were scaled.
                for (int i = 0; i < dataSeq.length(); i++) {
                    thisSeqLogli -= log(scale[i]);
                }
                if (thisSeqLogli > 0) {
                    System.out.println("This is shady: something is wrong Pr(y|x) > 1!");
                }
                logli += thisSeqLogli;
                // update grad.
                for (int f = 0; f < grad.length; f++)
                    grad[f] -= ExpF[f]/Zx;
                
                if (params.debugLvl > 1) {
                    System.out.println("Sequence "  + thisSeqLogli + " " + logli);
                }
                
            }
            if (params.debugLvl > 2) {
                for (int f = 0; f < lambda.length; f++)
                    System.out.print(lambda[f] + " ");
                System.out.println(" :x");
                for (int f = 0; f < lambda.length; f++)
                    System.out.print(grad[f] + " ");
                System.out.println(" :g");
            }
            
            if (params.debugLvl > 0)
                Util.printDbg("Iter " + icall + " log likelihood "+logli + " norm(grad logli) " + norm(grad) + " norm(x) "+ norm(lambda));
            
        } catch (Exception e) {
            System.out.println("Alpha-i " + alpha_Y.toString());
            System.out.println("Ri " + Ri_Y.toString());
            System.out.println("Mi " + Mi_YY.toString());
            
            e.printStackTrace();
            System.exit(0);
        }
        return logli;
    }
    */
    static void computeLogMi(FeatureGenerator featureGen, double lambda[], 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y) {
        double DEFAULT_VALUE = 0;
        Mi_YY.assign(DEFAULT_VALUE);
        Ri_Y.assign(DEFAULT_VALUE);
        computeLogMiInitDone(featureGen,lambda,Mi_YY,Ri_Y, DEFAULT_VALUE);
    }
    
    static void computeMi(FeatureGenerator featureGen, double lambda[], 
            DataSequence dataSeq, int i, 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y) {
        featureGen.startScanFeaturesAt(dataSeq, i);
        computeLogMi(featureGen, lambda, Mi_YY, Ri_Y);	
        Ri_Y.assign(expFunc);
        Mi_YY.assign(expFunc);
        //	Mi_YY.forEachNonZero(expFunc2D);
    }
    static void computeLogMi(FeatureGenerator featureGen, double lambda[], 
            DataSequence dataSeq, int i, 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y) {
        featureGen.startScanFeaturesAt(dataSeq, i);
        computeLogMi(featureGen, lambda, Mi_YY, Ri_Y);	
    }
    /*
    protected double computeFunctionGradientLL(double lambda[], double grad[]) {
        double logli = 0;
        try {
            for (int f = 0; f < lambda.length; f++) {
                grad[f] = -1*lambda[f]*params.invSigmaSquare;
                logli -= ((lambda[f]*lambda[f])*params.invSigmaSquare)/2;
            }
            diter.startScan();
            if (featureGenCache != null) featureGenCache.startDataScan();
            for (int numRecord = 0; diter.hasNext(); numRecord++) {
                DataSequence dataSeq = (DataSequence)diter.next();
                if (featureGenCache != null) featureGenCache.nextDataIndex();
                if (params.debugLvl > 1) {
                    Util.printDbg("Read next seq: " + numRecord + " logli " + logli);
                }
                alpha_Y.assign(0);
                for (int f = 0; f < lambda.length; f++)
                    ExpF[f] = RobustMath.LOG0;
                
                if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
                    beta_Y = new DoubleMatrix1D[2*dataSeq.length()];
                    for (int i = 0; i < beta_Y.length; i++)
                        beta_Y[i] = newLogDoubleMatrix1D(numY);
                }
                // compute beta values in a backward scan.
                // also scale beta-values to 1 to avoid numerical problems.
                beta_Y[dataSeq.length()-1].assign(0);
                for (int i = dataSeq.length()-1; i > 0; i--) {
                    if (params.debugLvl > 3) {
                        Util.printDbg("Features fired");
                        featureGenerator.startScanFeaturesAt(dataSeq, i);    
                        while (featureGenerator.hasNext()) { 
                            Feature feature = featureGenerator.next();
                            Util.printDbg(feature.toString());
                        }
                    }
                    
                    // compute the Mi matrix
                    computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
                    tmp_Y.assign(beta_Y[i]);
                    tmp_Y.assign(Ri_Y,sumFunc);
                    Mi_YY.zMult(tmp_Y, beta_Y[i-1],1,0,false);
                }
                
                
                double thisSeqLogli = 0;
                for (int i = 0; i < dataSeq.length(); i++) {
                    // compute the Mi matrix
                    computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
                    // find features that fire at this position..
                    featureGenerator.startScanFeaturesAt(dataSeq, i);
                    
                    if (i > 0) {
                        //tmp_Y.assign(alpha_Y);
                        Mi_YY.zMult(alpha_Y, newAlpha_Y,1,0,true);
                        newAlpha_Y.assign(Ri_Y,sumFunc); 
                    } else {
                        newAlpha_Y.assign(Ri_Y);
                    }
                    
                    while (featureGenerator.hasNext()) { 
                        Feature feature = featureGenerator.next();
                        int f = feature.index();
                        
                        int yp = feature.y();
                        int yprev = feature.yprev();
                        float val = feature.value();
                        if ((dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
                            grad[f] += val;
                            thisSeqLogli += val*lambda[f];
                        }
                        if (yprev < 0) {
                            ExpF[f] = RobustMath.logSumExp(ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp));
                        } else {
                            ExpF[f] = RobustMath.logSumExp(ExpF[f], alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+beta_Y[i].get(yp));
                        }
                    }
                    alpha_Y.assign(newAlpha_Y);
                    
                    if (params.debugLvl > 2) {
                        System.out.println("Alpha-i " + alpha_Y.toString());
                        System.out.println("Ri " + Ri_Y.toString());
                        System.out.println("Mi " + Mi_YY.toString());
                        System.out.println("Beta-i " + beta_Y[i].toString());
                    }
                }
                double lZx = alpha_Y.zSum();
                thisSeqLogli -= lZx;
                logli += thisSeqLogli;
                // update grad.
                for (int f = 0; f < grad.length; f++)
                    grad[f] -= RobustMath.exp(ExpF[f]-lZx);
                
                if (params.debugLvl > 1) {
                    System.out.println("Sequence "  + thisSeqLogli + " " + logli );
                }
                if (thisSeqLogli > 0) {
                    System.out.println("This is shady: something is wrong Pr(y|x) > 1!");
                }
            }
            if (params.debugLvl > 2) {
                for (int f = 0; f < lambda.length; f++)
                    System.out.print(lambda[f] + " ");
                System.out.println(" :x");
                for (int f = 0; f < lambda.length; f++)
                    System.out.print(grad[f] + " ");
                System.out.println(" :g");
            }
            
            if (params.debugLvl > 0)
                Util.printDbg("Iteration " + icall + " log-likelihood "+logli + " norm(grad logli) " + norm(grad) + " norm(x) "+ norm(lambda));
            
        } catch (Exception e) {
            System.out.println("Alpha-i " + alpha_Y.toString());
            System.out.println("Ri " + Ri_Y.toString());
            System.out.println("Mi " + Mi_YY.toString());
            
            e.printStackTrace();
            System.exit(0);
        }
        return logli;
    }
 */
}