Trainer.java example

Explorer
MinorThird-master
/** Trainer.java
 * 
 * @author Sunita Sarawagi
 * @version 1.3 
 */
package iitb.CRF;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;

import riso.numerical.LBFGS;
import cern.colt.function.tdouble.DoubleDoubleFunction;
import cern.colt.function.tdouble.DoubleFunction;
import cern.colt.matrix.tdouble.DoubleMatrix1D;
import cern.colt.matrix.tdouble.DoubleMatrix2D;
import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix1D;
import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D;
/**
 *
 * @author Sunita Sarawagi
 *
 */ 

public class Trainer {
    protected int numF,numY;
    protected double gradLogli[];
    double diag[];
    protected double lambda[];
    protected boolean reuseM, initMDone=false, logProcessing=false;
    
    protected double ExpF[], lZx;
    double scale[], rLogScale[];
    
    protected DoubleMatrix2D Mi_YY;
    protected DoubleMatrix1D Ri_Y;
    protected DoubleMatrix1D alpha_Y, newAlpha_Y;
    protected DoubleMatrix1D beta_Y[];
    protected DoubleMatrix1D tmp_Y;
    
    static class  MultFunc implements DoubleDoubleFunction {
        public double apply(double a, double b) {return a*b;}
    };
    static class  SumFunc implements DoubleDoubleFunction {
        public double apply(double a, double b) {return a+b;}
    };
    static MultFunc multFunc = new MultFunc(); 
    protected static SumFunc sumFunc = new SumFunc(); 
    
    class MultSingle implements DoubleFunction {
        public double multiplicator = 1.0;
        public double apply(double a) {return a*multiplicator;}
    };
    MultSingle constMultiplier = new MultSingle();
    
    protected DataIter diter;
    protected FeatureGenerator featureGenerator;
    protected CrfParams params;
    protected EdgeGenerator edgeGen;
    protected int icall;
    protected float instanceWts[];
    Evaluator evaluator = null;
    
    protected FeatureGenCache featureGenCache;
    
    protected double norm(double ar[]) {
        double v = 0;
        for (int f = 0; f < ar.length; f++)
            v += ar[f]*ar[f];
        return Math.sqrt(v);
    }
    public Trainer(CrfParams p) {
        params = p; 
    }
    public void train(CRF model, DataIter data, double[] l, Evaluator eval) {
        trainInternal(model,data,l,eval,null,null);
    }
    public void train(CRF model, DataIter data, double[] l, Evaluator eval, float[] instanceWts) {
        if (instanceWts==null) {
            // this is to ensure backward compatibility with trainers who might have overridden the above function.
            train(model,data,l,eval);
            return;
        }
        trainInternal(model,data,l,eval,instanceWts,null);
    }
    public void train(CRF model, DataIter data, double[] l, Evaluator eval, float[] instanceWts, float misClassifyCost[][]) {
        if ((instanceWts==null) && (misClassifyCost==null)) {
            // this is to ensure backward compatibility with trainers who might have overridden the above function.
            train(model,data,l,eval);
            return;
        }
        trainInternal(model,data,l,eval,instanceWts, misClassifyCost);
    }
    // this last argument is ignored for logistic trainers on sequence data.
    private void trainInternal(CRF model, DataIter data, double[] l, Evaluator eval, float[] instanceWts, float misClassifyCost[][]) {
        init(model,data,l);
        evaluator = eval;
        this.instanceWts = instanceWts;
        if (params.debugLvl > 0) {
            Util.printDbg("Number of features :" + lambda.length);      
        }
        doTrain();
    }
    
    protected void setInitValue(double lambda[]) {
        if (params.miscOptions.getProperty("initValues") != null) {
            // starting values stored in a file where each line has (featureName, value) pair
            String fname = params.miscOptions.getProperty("initValues");
            BufferedReader in;
            try {
                in = new BufferedReader(new FileReader(fname));
            
            String line;
            boolean idOrdered=Boolean.parseBoolean(params.miscOptions.getProperty("initValuesOrdered", "false"));
            Hashtable<String, Double> initVals = new Hashtable<String, Double>();
            for(int l = 0; ((line=in.readLine())!=null); l++) {
                StringTokenizer entry = new StringTokenizer(line);
                String featureName = entry.nextToken();
                double fval = Double.parseDouble(entry.nextToken());
                if (!idOrdered) 
                    initVals.put(featureName,fval);
                else
                    lambda[l] = fval;
            }
            if (!idOrdered) {
            for (int j = 0 ; j < lambda.length ; j ++) {
                String featureName = featureGenerator.featureName(j);
                lambda[j] = (initVals.get(featureName) != null)?initVals.get(featureName):getInitValue();
            }
            }
            return;
            } catch (IOException e) {
                e.printStackTrace();
                System.err.println("ERROR: in file initialization, using default init process");
            }
        } else if (Boolean.parseBoolean(params.miscOptions.getProperty("initValuesUseExisting", "false"))) {
            // use existing values of lambda from the model as starting point.
            return;
        } else
        for (int j = 0 ; j < lambda.length ; j ++) {
            lambda[j] = getInitValue();
        }
    }
    double getInitValue() { 
        return params.initValue;
    }
    protected void init(CRF model, DataIter data, double[] l) {
        edgeGen = model.edgeGen;
        lambda = l;
        numY = model.numY;
        diter = data;
        featureGenerator = model.featureGenerator;
        numF = featureGenerator.numFeatures();
        
        gradLogli = new double[numF];
        diag = new double [ numF ]; // needed by the optimizer
        ExpF = new double[lambda.length];
        initMatrices();
        reuseM = params.reuseM;
        if (params.trainerType.equals("ll"))
            logProcessing=true;
        
        if ((data != null) && params.miscOptions.getProperty("cache", "false").equals("true")) {
            featureGenCache = new FeatureGenCache(featureGenerator,reuseM);
            featureGenCache.setDataKeys(data);
            featureGenerator = featureGenCache;
        } else
            featureGenCache = null;
    }
    void initMatrices() {
        Mi_YY = new DenseDoubleMatrix2D(numY,numY);
        Ri_Y = new DenseDoubleMatrix1D(numY);
        
        alpha_Y = new DenseDoubleMatrix1D(numY);
        newAlpha_Y = new DenseDoubleMatrix1D(numY);
        tmp_Y = new DenseDoubleMatrix1D(numY);
    }
    
    protected void doTrain() {
        double f, xtol = 1.0e-16; // machine precision
        int iprint[] = new int [2], iflag[] = new int[1];
        icall=0;
        
        iprint [0] = params.debugLvl-2;
        iprint [1] = params.debugLvl-1;
        iflag[0]=0;
        double variables[] = lambda;
        boolean positiveConstraint = params.miscOptions.getProperty("prior", "gaussian").equals("exp");
        if (positiveConstraint) {
            variables = new double[lambda.length];
        }
        
        setInitValue(variables);
        
        do {
            if (positiveConstraint) {
                for (int i = 0; i < variables.length; i++) {
                    lambda[i] = Math.exp(variables[i]);
                }
                f = computeFunctionGradient(lambda,gradLogli); 
                for (int i = 0; i < gradLogli.length; i++) {
                    gradLogli[i] *= Math.exp(variables[i]);
                }
            } else {
                f = computeFunctionGradient(lambda,gradLogli); 
            }
            f = -1*f; // since the routine below minimizes and we want to maximize logli
            for (int j = 0 ; j < lambda.length ; j ++) {
                gradLogli[j] *= -1;
            } 
            
            if ((evaluator != null) && (evaluator.evaluate() == false))
                break;
            try	{
                LBFGS.lbfgs (numF, params.mForHessian, variables, f, gradLogli, false, diag, iprint, params.epsForConvergence, xtol, iflag);
            } catch (LBFGS.ExceptionWithIflag e)  {
                System.err.println( "CRF: lbfgs failed.\n"+e );
                if (e.iflag == -1) {
                    System.err.println("Possible reasons could be: \n \t 1. Bug in the feature generation or data handling code\n\t 2. Not enough features to make observed feature value==expected value\n");
                }
                return;
            }
            icall += 1;
        } while (( iflag[0] != 0) && (icall <= params.maxIters));
        reInit();
    }
    protected double computeFunctionGradient(double lambda[], double grad[]) {
        return computeFunctionGradient(lambda,grad,null,featureGenerator);
    }
    protected double finishGradCompute(double grad[], double lambda[], double logli) {
        return logli;
    }
    protected void computeFeatureExpectedValue(DataIter dataIter, FeatureGenerator fgen, double lambda[], double expFVals[]) {
        diter = dataIter;
        featureGenCache = null;
        for (int i = 0; i < expFVals.length; expFVals[i++] = 0);
        if (fgen.numFeatures() > ExpF.length) {
            // a different feature generator..
            ExpF = new double[fgen.numFeatures()];
        }
        computeFunctionGradient(lambda,null,expFVals,fgen);
    }
    protected double addPrior(double lambda[], double grad[], double logli) {
        if (params.miscOptions.getProperty("prior", "gaussian").equalsIgnoreCase("exp")) {
            for (int f = 0; f < lambda.length; f++) {
                grad[f] = -1*params.invSigmaSquare;
                logli -= (lambda[f]*params.invSigmaSquare);
            }
        } else if (params.miscOptions.getProperty("prior", "gaussian").equalsIgnoreCase("laplaceApprox")) {
            for (int f = 0; f < lambda.length; f++) {
                double approxL = Math.sqrt(lambda[f]*lambda[f]+1e-3);
                grad[f] = -1*lambda[f]/approxL*params.invSigmaSquare;
                logli -= params.invSigmaSquare*approxL; 
            }
        } else  {
            for (int f = 0; f < lambda.length; f++) {
                grad[f] = -1*lambda[f]*params.invSigmaSquare;
                logli -= ((lambda[f]*lambda[f])*params.invSigmaSquare)/2;
            }
        }
        return logli;
    }
    protected double computeFunctionGradient(double lambda[], double grad[], double expFVals[], 
            FeatureGenerator fgenForExpValCompute) {    
        try {
            double logli = 0;
            if (grad != null) {
                logli = addPrior(lambda,grad,logli);
            }
            diter.startScan();
            initMDone=false;
            if (featureGenCache != null) featureGenCache.startDataScan();
            int numRecord;
            for (numRecord = 0; diter.hasNext(); numRecord++) {
                if (params.debugLvl > 1)
                    Util.printDbg("Read next seq: " + numRecord + " logli " + logli);
                if (featureGenCache != null) featureGenCache.nextDataIndex();
                logli += sumProduct(diter.next(),featureGenerator,lambda,grad,expFVals,
                        false, numRecord, fgenForExpValCompute);
            } 
            logli = finishGradCompute(grad,lambda,logli);
            if (params.debugLvl > 2) {
                for (int f = 0; f < lambda.length; f++)
                    System.out.print(lambda[f] + " ");
                System.out.println(" :x");
                for (int f = 0; f < lambda.length; f++)
                    System.out.println(f + " " + featureGenerator.featureName(f) + " " + grad[f] + " ");
                System.out.println(" :g");
            }
            if (params.debugLvl > 0) {
                if (icall == 0) {
                    Util.printDbg("Number of training records " + numRecord);
                }
                if (grad != null) Util.printDbg("Iter " + icall + " loglikelihood "+logli + " gnorm " + norm(grad) + " xnorm "+ norm(lambda));
            }
            return logli;
        } catch (Exception e) {
            System.out.println("Alpha-i " + alpha_Y.toString());
            System.out.println("Ri " + Ri_Y.toString());
            System.out.println("Mi " + Mi_YY.toString());
            
            e.printStackTrace();
        }
        return 0;
    }
    protected double sumProduct(DataSequence dataSeq, FeatureGenerator featureGenerator, 
            double lambda[], double grad[], double expFVals[], boolean onlyForwardPass, int numRecord, 
            FeatureGenerator fgenForExpVals) {
        if (logProcessing) {
            return sumProductLL(dataSeq,featureGenerator,lambda,grad,expFVals,onlyForwardPass,numRecord,fgenForExpVals);
        }
        boolean doScaling = params.doScaling;
        alpha_Y.assign(1);
        for (int f = 0; f < lambda.length; f++)
            ExpF[f] = 0;
        
        if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
            beta_Y = new DenseDoubleMatrix1D[2*dataSeq.length()];
            for (int i = 0; i < beta_Y.length; i++)
                beta_Y[i] = new DenseDoubleMatrix1D(numY);
            
            scale = new double[2*dataSeq.length()];
        }
        float instanceWt = (float) ((instanceWts!=null)?instanceWts[numRecord]:1);
        // compute beta values in a backward scan.
        // also scale beta-values to 1 to avoid numerical problems.
        scale[dataSeq.length()-1] = (doScaling)?numY:1;
        beta_Y[dataSeq.length()-1].assign(1.0/scale[dataSeq.length()-1]);
        for (int i = dataSeq.length()-1; i > 0; i--) {
            
            // compute the Mi matrix
            initMDone = computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,true,reuseM,initMDone);
            tmp_Y.assign(beta_Y[i]);
            tmp_Y.assign(Ri_Y,multFunc);
            RobustMath.Mult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false,edgeGen);
            //		Mi_YY.zMult(tmp_Y, beta_Y[i-1]);
            
            // need to scale the beta-s to avoid overflow
            scale[i-1] = doScaling?beta_Y[i-1].zSum():1;
            if ((scale[i-1] < 1) && (scale[i-1] > -1))
                scale[i-1] = 1;
            constMultiplier.multiplicator = 1.0/scale[i-1];
            beta_Y[i-1].assign(constMultiplier);
        }
        
        double thisSeqLogli = 0;
        for (int i = 0; i < dataSeq.length(); i++) {
            // compute the Mi matrix
            initMDone = computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,true,reuseM,initMDone);
            if (i > 0) {
                tmp_Y.assign(alpha_Y);
                RobustMath.Mult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
                //		Mi_YY.zMult(tmp_Y, newAlpha_Y,1,0,true);
                newAlpha_Y.assign(Ri_Y,multFunc); 
            } else {
                newAlpha_Y.assign(Ri_Y);     
            }
            if ((grad !=null) || (expFVals!=null)) {
//          find features that fire at this position..
            fgenForExpVals.startScanFeaturesAt(dataSeq, i);
            while (fgenForExpVals.hasNext()) { 
                Feature feature = fgenForExpVals.next();
                int f = feature.index();
                
                int yp = feature.y();
                int yprev = feature.yprev();
                float val = feature.value();
                if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
                    grad[f] += instanceWt*val;
                    thisSeqLogli += val*lambda[f];
                }
                if (yprev < 0) {
                    ExpF[f] += newAlpha_Y.get(yp)*val*beta_Y[i].get(yp);
                } else {
                    ExpF[f] += alpha_Y.get(yprev)*Ri_Y.get(yp)*Mi_YY.get(yprev,yp)*val*beta_Y[i].get(yp);
                }
            }
            }
            alpha_Y.assign(newAlpha_Y);
            // now scale the alpha-s to avoid overflow problems.
            constMultiplier.multiplicator = 1.0/scale[i];
            alpha_Y.assign(constMultiplier);
            
            if (params.debugLvl > 2) {
                System.out.println("Alpha-i " + alpha_Y.toString());
                System.out.println("Ri " + Ri_Y.toString());
                System.out.println("Mi " + Mi_YY.toString());
                System.out.println("Beta-i " + beta_Y[i].toString());
            }
        }
        double Zx = alpha_Y.zSum();
        thisSeqLogli -= log(Zx);
        // correct for the fact that alpha-s were scaled.
        for (int i = 0; i < dataSeq.length(); i++) {
            thisSeqLogli -= log(scale[i]);
        }
        // update grad.
        if (grad != null) {
        for (int f = 0; f < grad.length; f++)
            grad[f] -= instanceWt*ExpF[f]/Zx;
        }
        if (expFVals!=null) {
            for (int f = 0; f < lambda.length; f++) {
                expFVals[f] += ExpF[f]/Zx;
            }
        }
        if (params.debugLvl > 1) {
            System.out.println("Sequence "  + thisSeqLogli + " log(Zx) " + Math.log(Zx) + " Zx " + Zx);
        }
        return thisSeqLogli*instanceWt;
    }
    static void computeLogMi(FeatureGenerator featureGen, double lambda[], 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, boolean takeExp) {
        computeLogMi(featureGen,lambda,Mi_YY,Ri_Y,takeExp,false,false);
    }
    static boolean computeLogMiInitDone(FeatureGenerator featureGen, double lambda[], 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, double DEFAULT_VALUE) {
        if ((Mi_YY==null) && (featureGen instanceof FeatureGenCache) && (DEFAULT_VALUE==0)) {
            ((FeatureGenCache)featureGen).noEdgeFeatures();
        }
        boolean mSet = false;
        while (featureGen.hasNext()) { 
            Feature feature = featureGen.next();
            int f = feature.index();
            int yp = feature.y();
            int yprev = feature.yprev();
            float val = feature.value();
            if (yprev == -1) {
                // this is a single state feature.
                
                // if default value was a negative_infinity, need to
                // reset to.
                double oldVal = Ri_Y.get(yp);
                if (oldVal == DEFAULT_VALUE)
                    oldVal = 0;
                Ri_Y.set(yp,oldVal+lambda[f]*val);
            } else {
                //if (Ri_Y.get(yp) == DEFAULT_VALUE)
                 //   Ri_Y.set(yp,0);
                if (Mi_YY != null) {
                    double oldVal = Mi_YY.get(yprev,yp);
                    if (oldVal == DEFAULT_VALUE) {
                        oldVal = 0;
                    }
                    Mi_YY.set(yprev,yp,oldVal+lambda[f]*val);
                    mSet = true;
                }
            }
        }
        return mSet;
    }
    public static double initLogMi(double defaultValue, Iterator constraints,
            DoubleMatrix2D Mi, DoubleMatrix1D Ri) {
        if (constraints != null) {
            defaultValue = RobustMath.LOG0;
            if (Mi != null) Mi.assign(defaultValue);
            Ri.assign(defaultValue);
            for (; constraints.hasNext();) {
                Constraint constraint = (Constraint)constraints.next();
                if (constraint.type() == Constraint.ALLOW_ONLY) {
                    RestrictConstraint cons = (RestrictConstraint)constraint;
                    /*
                     for (int c = cons.numAllowed()-1; c >= 0; c--) {
                     Ri.set(cons.allowed(c),0);
                     }
                     */
                    for (cons.startScan(); cons.hasNext();) {
                        cons.advance();
                        int y = cons.y();
                        int yprev = cons.yprev();
                        if (yprev < 0) {
                            Ri.set(y,0);
                        } else {
                            if (Mi != null) Mi.set(yprev,y,0);
                        }
                    }
                }
            }
        } else {
            if (Mi != null) Mi.assign(defaultValue);
            Ri.assign(defaultValue);    
        } 
        return defaultValue;
    }
    static boolean computeLogMi(FeatureGenerator featureGen, double lambda[], 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, boolean takeExp,boolean reuseM, boolean initMDone) {
        
        if (reuseM && initMDone) {
            Mi_YY = null;
        } else {
            initMDone = false;
        }
        if (Mi_YY != null) Mi_YY.assign(0);
        Ri_Y.assign(0);
        initMDone = computeLogMiInitDone(featureGen,lambda,Mi_YY,Ri_Y,0);
        if (takeExp) {
            for(int r = (int) (Ri_Y.size()-1); r >= 0; r--) {
                Ri_Y.setQuick(r,expE(Ri_Y.getQuick(r)));
                if (Mi_YY != null)
                    for(int c = Mi_YY.columns()-1; c >= 0; c--) {
                        Mi_YY.setQuick(r,c,expE(Mi_YY.getQuick(r,c)));
                    }
            }
        }
        return initMDone;
    }
    public static void computeLogMi(FeatureGenerator featureGen, double lambda[], 
            DataSequence dataSeq, int i, 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, boolean takeExp) {
        computeLogMi(featureGen, lambda, dataSeq, i, Mi_YY, Ri_Y, takeExp,false,false);
    }
    public static boolean computeLogMi(FeatureGenerator featureGen, double lambda[], 
            DataSequence dataSeq, int i, 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, boolean takeExp, boolean reuseM, boolean initMDone) {
        featureGen.startScanFeaturesAt(dataSeq, i);
        return computeLogMi(featureGen, lambda, Mi_YY, Ri_Y, takeExp,reuseM, initMDone);
    }
    protected void allocateAlphaBeta(int newSize) {
        beta_Y = new DoubleMatrix1D[newSize];
        for (int i = 0; i < beta_Y.length; i++)
            beta_Y[i] = newLogDoubleMatrix1D(numY);
    }
    protected DoubleMatrix1D newLogDoubleMatrix1D(int numY) {
        return new DenseDoubleMatrix1D(numY);
    }
    protected DoubleMatrix2D newLogDoubleMatrix2D(int numR, int numC) {
        return new DenseDoubleMatrix2D(numR,numC);
    }
    protected double sumProductLL(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], 
            double grad[], double expFVals[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) {
    	
    	float instanceWt = (float) ((instanceWts!=null)?instanceWts[numRecord]:1);
    	
        for (int f = 0; f < ExpF.length; f++)
            ExpF[f] = RobustMath.LOG0;
        
        double gradThisInstance[] =grad;
        if ((instanceWt != 1) && (grad != null)) {
        	gradThisInstance = new double[grad.length];
        }
        double thisSeqLogli = sumProductInner(dataSeq,featureGenerator,lambda,gradThisInstance
                ,onlyForwardPass, numRecord, ((grad != null)||(expFVals!=null))?fgenForExpVals:null);
        
        thisSeqLogli -= lZx;
        
        // update grad.
        if (grad != null) {
            for (int f = 0; f < grad.length; f++) {
                grad[f] -= RobustMath.exp(ExpF[f]-lZx)*instanceWt;
                if (gradThisInstance != grad) {
                	grad[f] += gradThisInstance[f]*instanceWt;
                }
            }
        }
        if (expFVals!=null) {
            for (int f = 0; f < expFVals.length; f++) {
                expFVals[f] += RobustMath.exp(ExpF[f]-lZx)*instanceWt;
            }
        }
        if (params.debugLvl > 1) {
            System.out.println("Sequence "  + thisSeqLogli  + " log(Zx) " + lZx + " Zx " + Math.exp(lZx));
        }
        return (grad == null)?-lZx:thisSeqLogli * instanceWt;
    }
    
    protected void getMarginals(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], float nodeMargs[][], float edgeMargs[][][]) {
    	allocateAlphaBeta(2*dataSeq.length()+1);
    	beta_Y = computeBetaArray(dataSeq,lambda,featureGenerator);
    	alpha_Y.assign(0);
    	for (int i = 0; i < dataSeq.length(); i++) {
            // compute the Mi matrix
            initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
            
            if (i > 0) {
                tmp_Y.assign(alpha_Y);
                RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
                newAlpha_Y.assign(Ri_Y,sumFunc); 
            } else {
                newAlpha_Y.assign(Ri_Y);
            }
            for (int y = 0; y < numY; y++) {
            	nodeMargs[i][y] = (float) (newAlpha_Y.get(y)+beta_Y[i].get(y));
            	if (i > 0) {
            		for (int yprev = 0; yprev < numY; yprev++) {
						edgeMargs[i][yprev][y] = (float) (alpha_Y.get(yprev)+beta_Y[i].get(y)+Ri_Y.get(y)+Mi_YY.get(yprev,y));
					}
            	}
			}
            alpha_Y.assign(newAlpha_Y);
        }
        double logZx = RobustMath.logSumExp(alpha_Y);
        for (int i = 0; i < edgeMargs.length; i++) {
			for (int y = 0; y < numY; y++) {
				nodeMargs[i][y] = (float) Math.exp(nodeMargs[i][y] - logZx);
				if (i==0) continue;
				for (int yprev = 0; yprev < edgeMargs.length; yprev++) {
					edgeMargs[i][yprev][y] = (float) Math.exp(edgeMargs[i][yprev][y]-logZx);
				}
			}
		}
    }

    protected double sumProductInner(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], 
            double grad[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) {
        if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
           allocateAlphaBeta(2*dataSeq.length()+1);
        }
        // compute beta values in a backward scan.
        // also scale beta-values to 1 to avoid numerical problems.
        if (!onlyForwardPass) {
            beta_Y = computeBetaArray(dataSeq,lambda,featureGenerator);
        }
        alpha_Y.assign(0);
        double thisSeqLogli = 0;
        for (int i = 0; i < dataSeq.length(); i++) {
            // compute the Mi matrix
            initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
            
            if (i > 0) {
                tmp_Y.assign(alpha_Y);
                RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
                newAlpha_Y.assign(Ri_Y,sumFunc); 
            } else {
                newAlpha_Y.assign(Ri_Y);
            }

            if (fgenForExpVals != null) {
            // find features that fire at this position..
                fgenForExpVals.startScanFeaturesAt(dataSeq, i);
                while (fgenForExpVals.hasNext()) { 
                    Feature feature = fgenForExpVals.next();
                    int f = feature.index();
                    
                    int yp = feature.y();
                    int yprev = feature.yprev();
                    float val = feature.value();
                    
                    if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
                        grad[f] += val;
                        thisSeqLogli += val*lambda[f];
                        if (params.debugLvl > 2) {
                            System.out.println("Feature fired " + f + " " + feature);
                        } 
                    }
                    if (Math.abs(val) < Double.MIN_VALUE) continue;
                    if (val < 0) {
                        System.out.println("ERROR: Cannot process negative feature values in log domains: " 
                                + "either disable the '-trainer=ll' flag or ensure feature values are not -ve");
                        continue;
                    }
                    if (yprev < 0) {
                        ExpF[f] = RobustMath.logSumExp(ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp));
                    } else {
                        ExpF[f] = RobustMath.logSumExp(ExpF[f], alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+beta_Y[i].get(yp));
                    }
                }
            }
            alpha_Y.assign(newAlpha_Y);
            
            if (params.debugLvl > 2) {
                System.out.println("Alpha-i " + alpha_Y.toString());
                System.out.println("Ri " + Ri_Y.toString());
                System.out.println("Mi " + Mi_YY.toString());
                System.out.println("Beta-i " + beta_Y[i].toString());
            }
        }
        lZx = RobustMath.logSumExp(alpha_Y);
        return thisSeqLogli;
    }
    protected DoubleMatrix1D[] computeBetaArray(DataSequence dataSeq, double[] lambda, FeatureGenerator featureGenerator) {
        beta_Y[dataSeq.length()-1].assign(0);
        for (int i = dataSeq.length()-1; i > 0; i--) {
            // compute the Mi matrix
            initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
            tmp_Y.assign(beta_Y[i]);
            tmp_Y.assign(Ri_Y,sumFunc);
            RobustMath.logMult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false,edgeGen);
        }
        return beta_Y;
    }
    protected boolean computeLogMiTrainMode(FeatureGenerator featureGen, double lambda[], 
            DataSequence dataSeq, int i, 
            DoubleMatrix2D Mi_YY,
            DoubleMatrix1D Ri_Y, boolean takeExp, boolean reuseM, boolean initMDone) {
        return computeLogMi(featureGen,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone) || initMDone;
    }
    static double log(double val) {
        try {
            return logE(val);
        } catch (Exception e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
        }
        return -1*Double.MAX_VALUE;
    }
    
    static double logE(double val) throws Exception {
        double pr = Math.log(val);
        if (Double.isNaN(pr) || Double.isInfinite(pr)) {
            throw new Exception("Overflow error when taking log of " + val);
        }
        return pr;
    } 
    static double expE(double val)  {
        double pr = RobustMath.exp(val);
        if (Double.isNaN(pr) || Double.isInfinite(pr)) {
            try {
                throw new Exception("Overflow error when taking exp of " + val + "\n Try running the CRF with the following option \"trainer ll\" to perform computations in the log-space.");
            } catch (Exception e) {
                System.out.println(e.getMessage());
                e.printStackTrace();
                return Double.MAX_VALUE;
            }
        }
        return pr;
    }
    static double expLE(double val) {
        double pr = RobustMath.exp(val);
        if (Double.isNaN(pr) || Double.isInfinite(pr)) {
            try {
                throw new Exception("Overflow error when taking exp of " + val 
                        + " you might need to redesign feature values so as to not reach such high values");
            } catch (Exception e) {
                System.out.println(e.getMessage());
                e.printStackTrace();
                return Double.MAX_VALUE;
            }
        }
        return pr;
    }
    public void addFeatureVector(DataSequence dataSeq, double[] grad) {
        for (int i = 0; i < dataSeq.length(); i++) {
            // find features that fire at this position..
                featureGenerator.startScanFeaturesAt(dataSeq, i);
                while (featureGenerator.hasNext()) { 
                    Feature feature = featureGenerator.next();
                    int f = feature.index();
                    int yp = feature.y();
                    int yprev = feature.yprev();
                    float val = feature.value();
                    
                    if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
                        grad[f] += val;
                    }
                }
        }
    }
    public void reInit() {
        initMDone=false;
    }
}