PartialTrainer.java example

Explorer
MinorThird-master
package iitb.CRF;

import java.util.Arrays;

import cern.colt.matrix.tdouble.DoubleMatrix1D;

public class PartialTrainer extends SparseTrainer {

	protected DoubleMatrix1D constrained_alpha_Y, constrained_newAlpha_Y;
	protected DoubleMatrix1D constrained_beta_Y[];
	protected double constrainedExpF[], constrainedlZx;

	public PartialTrainer(CrfParams p) {
		super(p);
		logTrainer = true;
		logProcessing = true;	
	}
	protected void init(CRF model, DataIter data, double[] l) {
        super.init(model,data,l);
    	constrained_alpha_Y = newLogDoubleMatrix1D(numY);
    	constrained_newAlpha_Y = newLogDoubleMatrix1D(numY);
    	constrainedExpF = new double[l.length];
		logTrainer = true;
		logProcessing = true;	
	}
	
    protected DoubleMatrix1D[] computeBetaArray(DataSequence dataSeq, double[] lambda, FeatureGenerator featureGenerator) {
    	
    	if(dataSeq.y(dataSeq.length()-1) < 0) {
    		constrained_beta_Y[dataSeq.length()-1].assign(0);	
    	} else {
    		constrained_beta_Y[dataSeq.length()-1].assign(RobustMath.LOG0);
    		constrained_beta_Y[dataSeq.length()-1].set(dataSeq.y(dataSeq.length()-1), 0);
    	}

    	beta_Y[dataSeq.length()-1].assign(0);
        for (int i = dataSeq.length()-1; i > 0; i--) {
            // compute the Mi matrix
            //initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
            if(dataSeq instanceof CandSegDataSequence)
            	initMDone = SegmentTrainer.computeLogMi((CandSegDataSequence) dataSeq,i-1,i,(FeatureGeneratorNested) featureGenerator,lambda,Mi_YY,Ri_Y,reuseM,initMDone);
            else
            	initMDone = SegmentTrainer.computeLogMi(featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, false, reuseM, initMDone);
            tmp_Y.assign(beta_Y[i]);
            tmp_Y.assign(Ri_Y,sumFunc);
            RobustMath.logMult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false);
            
            tmp_Y.assign(constrained_beta_Y[i]);
            tmp_Y.assign(Ri_Y,sumFunc);
            RobustMath.logMult(Mi_YY, tmp_Y, constrained_beta_Y[i-1],1,0,false);

            if(dataSeq.y(i-1) >= 0) {
                for(int y=0;y < numY;y++) {
                    if(y != dataSeq.y(i-1)) {
                        constrained_beta_Y[i-1].set(y, RobustMath.LOG0);
                    }
                }
            }
        }
        return beta_Y;
    }
    
    protected void allocateAlphaBeta(int newSize) {
    	super.allocateAlphaBeta(newSize);
    	constrained_beta_Y = new DoubleMatrix1D[newSize];
        for (int i = 0; i < constrained_beta_Y.length; i++)
            constrained_beta_Y[i] = newLogDoubleMatrix1D(numY);
    }
	
	protected double sumProductInner(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], 
            double grad[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) {
        if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
           allocateAlphaBeta(2*dataSeq.length()+1);
        }
        
        // compute beta values in a backward scan.
        // also scale beta-values to 1 to avoid numerical problems.
        if (!onlyForwardPass) {
            beta_Y = computeBetaArray(dataSeq,lambda,featureGenerator);
        }
        alpha_Y.assign(0);
        constrained_alpha_Y.assign(0);
        
        double constrainedlZx = RobustMath.LOG0;
        
        Arrays.fill(constrainedExpF, RobustMath.LOG0);
        
        double wDotF = 0;
        
        for (int i = 0; i < dataSeq.length(); i++) {
            // compute the Mi matrix
            //initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
        	if(dataSeq instanceof CandSegDataSequence)
        		initMDone = SegmentTrainer.computeLogMi((CandSegDataSequence) dataSeq,i-1,i,(FeatureGeneratorNested) featureGenerator,lambda,Mi_YY,Ri_Y,reuseM,initMDone);
        	else
        		initMDone = SegmentTrainer.computeLogMi(featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, false, reuseM, initMDone);
            if (i > 0) {
                tmp_Y.assign(alpha_Y);
                RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true);
                newAlpha_Y.assign(Ri_Y,sumFunc); 
                
                tmp_Y.assign(constrained_alpha_Y);
                RobustMath.logMult(Mi_YY, tmp_Y, constrained_newAlpha_Y,1,0,true);
                constrained_newAlpha_Y.assign(Ri_Y,sumFunc); 
                
            } else {
                newAlpha_Y.assign(Ri_Y);
                constrained_newAlpha_Y.assign(Ri_Y);
            }

            if(dataSeq.y(i) >= 0) {
            	double d = constrained_newAlpha_Y.get(dataSeq.y(i));
            	constrained_newAlpha_Y.assign(RobustMath.LOG0);
            	constrained_newAlpha_Y.set(dataSeq.y(i), d);
            }
            
            if (fgenForExpVals != null) {
            // find features that fire at this position..
                fgenForExpVals.startScanFeaturesAt(dataSeq, i);
                while (fgenForExpVals.hasNext()) { 
                    Feature feature = fgenForExpVals.next();
                    int f = feature.index();
                    
                    int yp = feature.y();
                    int yprev = feature.yprev();
                    float val = feature.value();
        
                    if ((dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1)) && (dataSeq.y(i-1)>=0)) || (yprev < 0))) {
                    	wDotF += lambda[f]*val;
                    }
                    /*
                    if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
                        grad[f] += val;
                        if (params.debugLvl > 2) {
                            System.out.println("Feature fired " + f + " " + feature);
                        } 
                    }*/
                    if (Math.abs(val) < Double.MIN_VALUE) continue;
                    if (val < 0) {
                        System.out.println("ERROR: Cannot process negative feature values in log domains: " 
                                + "either disable the '-trainer=ll' flag or ensure feature values are not -ve");
                        continue;
                    }
                    if (yprev < 0) {
                        ExpF[f] = RobustMath.logSumExp(ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp));
                        constrainedExpF[f] = RobustMath.logSumExp(constrainedExpF[f], constrained_newAlpha_Y.get(yp) + RobustMath.log(val) + constrained_beta_Y[i].get(yp));
                    } else {
                        ExpF[f] = RobustMath.logSumExp(ExpF[f], alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+beta_Y[i].get(yp));
                        constrainedExpF[f] = RobustMath.logSumExp(constrainedExpF[f], constrained_alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+constrained_beta_Y[i].get(yp));
                    }
                }
            }
            alpha_Y.assign(newAlpha_Y);
            //System.out.println("alpha_Y at i="+i+": "+alpha_Y);            		
            constrained_alpha_Y.assign(constrained_newAlpha_Y);
            
            if (params.debugLvl > 2) {
                System.out.println("Alpha-i " + alpha_Y.toString());
                System.out.println("Ri " + Ri_Y.toString());
                System.out.println("Mi " + Mi_YY.toString());
                System.out.println("Beta-i " + beta_Y[i].toString());
            }
        }
        lZx = alpha_Y.zSum();
        constrainedlZx = constrained_alpha_Y.zSum();
        if(params.debugLvl > 1) {
            System.out.println("constrainedLZx " + constrainedlZx+", wDotF "+wDotF+ " constrainedLZxBeta "+constrained_beta_Y[0].zSum());
        }
        if(grad != null) {
        	for(int i=0;i < grad.length;i++) {
        		grad[i] += RobustMath.exp(constrainedExpF[i] - constrainedlZx);
        	}
        }
        
        return constrainedlZx;
    }

	 protected void getMarginals(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], float nodeMargs[][], float edgeMargs[][][]) {
	    	allocateAlphaBeta(2*dataSeq.length()+1);
	    	computeBetaArray(dataSeq,lambda,featureGenerator);
	    	constrained_alpha_Y.assign(0);
	    	for (int i = 0; i < dataSeq.length(); i++) {
	            // compute the Mi matrix
	            initMDone = SegmentTrainer.computeLogMi((CandSegDataSequence) dataSeq,i-1,i,(FeatureGeneratorNested) featureGenerator,lambda,Mi_YY,Ri_Y,reuseM,initMDone);
	            if (i > 0) {
	                tmp_Y.assign(constrained_alpha_Y);
	                RobustMath.logMult(Mi_YY, tmp_Y, constrained_newAlpha_Y,1,0,true);
	                constrained_newAlpha_Y.assign(Ri_Y,sumFunc); 
	            } else {
	                constrained_newAlpha_Y.assign(Ri_Y);
	            }
	            if(dataSeq.y(i) >= 0) {
	            	double d = constrained_newAlpha_Y.get(dataSeq.y(i));
	            	constrained_newAlpha_Y.assign(RobustMath.LOG0);
	            	constrained_newAlpha_Y.set(dataSeq.y(i), d);
	            }
	            for (int y = 0; y < numY; y++) {
	            	nodeMargs[i][y] = (float) (constrained_newAlpha_Y.get(y)+constrained_beta_Y[i].get(y));
	            	if (i > 0) {
	            		for (int yprev = 0; yprev < numY; yprev++) {
							edgeMargs[i][yprev][y] = (float) (constrained_alpha_Y.get(yprev)+constrained_beta_Y[i].get(y)+Ri_Y.get(y)+Mi_YY.get(yprev,y));
						}
	            	}
				}
	            constrained_alpha_Y.assign(constrained_newAlpha_Y);
	        }
	        double logZx = constrained_alpha_Y.zSum();
	        for (int i = 0; i < edgeMargs.length; i++) {
				for (int y = 0; y < numY; y++) {
					nodeMargs[i][y] = (float) Math.exp(nodeMargs[i][y] - logZx);
					assert(!Float.isNaN(nodeMargs[i][y]));
					if (i==0) continue;
					for (int yprev = 0; yprev < numY; yprev++) {
						edgeMargs[i][yprev][y] = (float) Math.exp(edgeMargs[i][yprev][y]-logZx);
						assert(!Float.isNaN(edgeMargs[i][yprev][y]));
					}
				}
			}
	    }
}