package iitb.CRF;
import java.util.Arrays;
import cern.colt.matrix.tdouble.DoubleMatrix1D;
public class PartialTrainer extends SparseTrainer {
protected DoubleMatrix1D constrained_alpha_Y, constrained_newAlpha_Y;
protected DoubleMatrix1D constrained_beta_Y[];
protected double constrainedExpF[], constrainedlZx;
public PartialTrainer(CrfParams p) {
super(p);
logTrainer = true;
logProcessing = true;
}
protected void init(CRF model, DataIter data, double[] l) {
super.init(model,data,l);
constrained_alpha_Y = newLogDoubleMatrix1D(numY);
constrained_newAlpha_Y = newLogDoubleMatrix1D(numY);
constrainedExpF = new double[l.length];
logTrainer = true;
logProcessing = true;
}
protected DoubleMatrix1D[] computeBetaArray(DataSequence dataSeq, double[] lambda, FeatureGenerator featureGenerator) {
if(dataSeq.y(dataSeq.length()-1) < 0) {
constrained_beta_Y[dataSeq.length()-1].assign(0);
} else {
constrained_beta_Y[dataSeq.length()-1].assign(RobustMath.LOG0);
constrained_beta_Y[dataSeq.length()-1].set(dataSeq.y(dataSeq.length()-1), 0);
}
beta_Y[dataSeq.length()-1].assign(0);
for (int i = dataSeq.length()-1; i > 0; i--) {
// compute the Mi matrix
//initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
if(dataSeq instanceof CandSegDataSequence)
initMDone = SegmentTrainer.computeLogMi((CandSegDataSequence) dataSeq,i-1,i,(FeatureGeneratorNested) featureGenerator,lambda,Mi_YY,Ri_Y,reuseM,initMDone);
else
initMDone = SegmentTrainer.computeLogMi(featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, false, reuseM, initMDone);
tmp_Y.assign(beta_Y[i]);
tmp_Y.assign(Ri_Y,sumFunc);
RobustMath.logMult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false);
tmp_Y.assign(constrained_beta_Y[i]);
tmp_Y.assign(Ri_Y,sumFunc);
RobustMath.logMult(Mi_YY, tmp_Y, constrained_beta_Y[i-1],1,0,false);
if(dataSeq.y(i-1) >= 0) {
for(int y=0;y < numY;y++) {
if(y != dataSeq.y(i-1)) {
constrained_beta_Y[i-1].set(y, RobustMath.LOG0);
}
}
}
}
return beta_Y;
}
protected void allocateAlphaBeta(int newSize) {
super.allocateAlphaBeta(newSize);
constrained_beta_Y = new DoubleMatrix1D[newSize];
for (int i = 0; i < constrained_beta_Y.length; i++)
constrained_beta_Y[i] = newLogDoubleMatrix1D(numY);
}
protected double sumProductInner(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[],
double grad[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) {
if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
allocateAlphaBeta(2*dataSeq.length()+1);
}
// compute beta values in a backward scan.
// also scale beta-values to 1 to avoid numerical problems.
if (!onlyForwardPass) {
beta_Y = computeBetaArray(dataSeq,lambda,featureGenerator);
}
alpha_Y.assign(0);
constrained_alpha_Y.assign(0);
double constrainedlZx = RobustMath.LOG0;
Arrays.fill(constrainedExpF, RobustMath.LOG0);
double wDotF = 0;
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
//initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
if(dataSeq instanceof CandSegDataSequence)
initMDone = SegmentTrainer.computeLogMi((CandSegDataSequence) dataSeq,i-1,i,(FeatureGeneratorNested) featureGenerator,lambda,Mi_YY,Ri_Y,reuseM,initMDone);
else
initMDone = SegmentTrainer.computeLogMi(featureGenerator, lambda, dataSeq, i, Mi_YY, Ri_Y, false, reuseM, initMDone);
if (i > 0) {
tmp_Y.assign(alpha_Y);
RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true);
newAlpha_Y.assign(Ri_Y,sumFunc);
tmp_Y.assign(constrained_alpha_Y);
RobustMath.logMult(Mi_YY, tmp_Y, constrained_newAlpha_Y,1,0,true);
constrained_newAlpha_Y.assign(Ri_Y,sumFunc);
} else {
newAlpha_Y.assign(Ri_Y);
constrained_newAlpha_Y.assign(Ri_Y);
}
if(dataSeq.y(i) >= 0) {
double d = constrained_newAlpha_Y.get(dataSeq.y(i));
constrained_newAlpha_Y.assign(RobustMath.LOG0);
constrained_newAlpha_Y.set(dataSeq.y(i), d);
}
if (fgenForExpVals != null) {
// find features that fire at this position..
fgenForExpVals.startScanFeaturesAt(dataSeq, i);
while (fgenForExpVals.hasNext()) {
Feature feature = fgenForExpVals.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if ((dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1)) && (dataSeq.y(i-1)>=0)) || (yprev < 0))) {
wDotF += lambda[f]*val;
}
/*
if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
grad[f] += val;
if (params.debugLvl > 2) {
System.out.println("Feature fired " + f + " " + feature);
}
}*/
if (Math.abs(val) < Double.MIN_VALUE) continue;
if (val < 0) {
System.out.println("ERROR: Cannot process negative feature values in log domains: "
+ "either disable the '-trainer=ll' flag or ensure feature values are not -ve");
continue;
}
if (yprev < 0) {
ExpF[f] = RobustMath.logSumExp(ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp));
constrainedExpF[f] = RobustMath.logSumExp(constrainedExpF[f], constrained_newAlpha_Y.get(yp) + RobustMath.log(val) + constrained_beta_Y[i].get(yp));
} else {
ExpF[f] = RobustMath.logSumExp(ExpF[f], alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+beta_Y[i].get(yp));
constrainedExpF[f] = RobustMath.logSumExp(constrainedExpF[f], constrained_alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+constrained_beta_Y[i].get(yp));
}
}
}
alpha_Y.assign(newAlpha_Y);
//System.out.println("alpha_Y at i="+i+": "+alpha_Y);
constrained_alpha_Y.assign(constrained_newAlpha_Y);
if (params.debugLvl > 2) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
System.out.println("Beta-i " + beta_Y[i].toString());
}
}
lZx = alpha_Y.zSum();
constrainedlZx = constrained_alpha_Y.zSum();
if(params.debugLvl > 1) {
System.out.println("constrainedLZx " + constrainedlZx+", wDotF "+wDotF+ " constrainedLZxBeta "+constrained_beta_Y[0].zSum());
}
if(grad != null) {
for(int i=0;i < grad.length;i++) {
grad[i] += RobustMath.exp(constrainedExpF[i] - constrainedlZx);
}
}
return constrainedlZx;
}
protected void getMarginals(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], float nodeMargs[][], float edgeMargs[][][]) {
allocateAlphaBeta(2*dataSeq.length()+1);
computeBetaArray(dataSeq,lambda,featureGenerator);
constrained_alpha_Y.assign(0);
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
initMDone = SegmentTrainer.computeLogMi((CandSegDataSequence) dataSeq,i-1,i,(FeatureGeneratorNested) featureGenerator,lambda,Mi_YY,Ri_Y,reuseM,initMDone);
if (i > 0) {
tmp_Y.assign(constrained_alpha_Y);
RobustMath.logMult(Mi_YY, tmp_Y, constrained_newAlpha_Y,1,0,true);
constrained_newAlpha_Y.assign(Ri_Y,sumFunc);
} else {
constrained_newAlpha_Y.assign(Ri_Y);
}
if(dataSeq.y(i) >= 0) {
double d = constrained_newAlpha_Y.get(dataSeq.y(i));
constrained_newAlpha_Y.assign(RobustMath.LOG0);
constrained_newAlpha_Y.set(dataSeq.y(i), d);
}
for (int y = 0; y < numY; y++) {
nodeMargs[i][y] = (float) (constrained_newAlpha_Y.get(y)+constrained_beta_Y[i].get(y));
if (i > 0) {
for (int yprev = 0; yprev < numY; yprev++) {
edgeMargs[i][yprev][y] = (float) (constrained_alpha_Y.get(yprev)+constrained_beta_Y[i].get(y)+Ri_Y.get(y)+Mi_YY.get(yprev,y));
}
}
}
constrained_alpha_Y.assign(constrained_newAlpha_Y);
}
double logZx = constrained_alpha_Y.zSum();
for (int i = 0; i < edgeMargs.length; i++) {
for (int y = 0; y < numY; y++) {
nodeMargs[i][y] = (float) Math.exp(nodeMargs[i][y] - logZx);
assert(!Float.isNaN(nodeMargs[i][y]));
if (i==0) continue;
for (int yprev = 0; yprev < numY; yprev++) {
edgeMargs[i][yprev][y] = (float) Math.exp(edgeMargs[i][yprev][y]-logZx);
assert(!Float.isNaN(edgeMargs[i][yprev][y]));
}
}
}
}
}