package iitb.CRF;
import cern.colt.function.tdouble.*;
import cern.colt.matrix.tdouble.*;
import cern.colt.matrix.tdouble.impl.*;
/**
*
* @author Sunita Sarawagi
*
*/
public class SparseTrainer extends Trainer {
boolean logTrainer;
static class ExpFunc implements DoubleFunction {
public double apply(double a) {return Math.exp(a);}
};
static class ExpFunc2D implements IntIntDoubleFunction {
public double apply(int first, int second, double third) {
return Math.exp(third);
}
};
static class ExpFunc1D implements IntDoubleFunction {
public double apply(int first, double third) {
return Math.exp(third);
}
};
static ExpFunc expFunc = new ExpFunc();
static IntDoubleFunction expFunc1D = new ExpFunc1D();
static IntIntDoubleFunction expFunc2D = new ExpFunc2D();
public SparseTrainer(CrfParams p) {
super(p);
params = p;
logTrainer = params.trainerType.equals("ll");
}
public void train(CRF model, DataIter data, double[] l, Evaluator eval) {
init(model,data,l);
evaluator = eval;
if (params.debugLvl > 0) {
Util.printDbg("Number of features :" + lambda.length);
}
doTrain();
}
/**
* @param numY
* @return
*/
protected DoubleMatrix1D newLogDoubleMatrix1D(int numY) {
if ((Boolean.valueOf(params.miscOptions.getProperty("sparse", "false"))).booleanValue())
return new LogSparseDoubleMatrix1D(numY);
return new LogDenseDoubleMatrix1D(numY);
}
protected DoubleMatrix2D newLogDoubleMatrix2D(int numR, int numC) {
if ((Boolean.valueOf(params.miscOptions.getProperty("sparse", "false"))).booleanValue())
return new LogSparseDoubleMatrix2D(numR, numC);
return new LogDenseDoubleMatrix2D(numR, numC);
}
void initMatrices() {
if (!logTrainer) {
Mi_YY = new SparseDoubleMatrix2D(numY,numY);
Ri_Y = new SparseDoubleMatrix1D(numY);
alpha_Y = new SparseDoubleMatrix1D(numY);
newAlpha_Y = new SparseDoubleMatrix1D(numY);
tmp_Y = new SparseDoubleMatrix1D(numY);
} else {
Mi_YY = newLogDoubleMatrix2D(numY,numY);
Ri_Y = newLogDoubleMatrix1D(numY);
alpha_Y = newLogDoubleMatrix1D(numY);
newAlpha_Y = newLogDoubleMatrix1D(numY);
tmp_Y = newLogDoubleMatrix1D(numY);
}
}
/*
protected double computeFunctionGradient(double lambda[], double grad[], double expFVals[]) {
if (params.trainerType.equals("ll"))
return computeFunctionGradientLL(lambda, grad);
double logli = 0;
try {
for (int f = 0; f < lambda.length; f++) {
grad[f] = -1*lambda[f]*params.invSigmaSquare;
logli -= ((lambda[f]*lambda[f])*params.invSigmaSquare)/2;
}
boolean doScaling = params.doScaling;
diter.startScan();
if (featureGenCache != null) featureGenCache.startDataScan();
for (int numRecord = 0; diter.hasNext(); numRecord++) {
DataSequence dataSeq = (DataSequence)diter.next();
if (featureGenCache != null) featureGenCache.nextDataIndex();
if (params.debugLvl > 1) {
Util.printDbg("Read next seq: " + numRecord + " logli " + logli);
}
alpha_Y.assign(1);
for (int f = 0; f < lambda.length; f++)
ExpF[f] = 0;
if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
beta_Y = new DoubleMatrix1D[2*dataSeq.length()];
for (int i = 0; i < beta_Y.length; i++)
beta_Y[i] = new SparseDoubleMatrix1D(numY);
scale = new double[2*dataSeq.length()];
}
// compute beta values in a backward scan.
// also scale beta-values to 1 to avoid numerical problems.
scale[dataSeq.length()-1] = (doScaling)?numY:1;
beta_Y[dataSeq.length()-1].assign(1.0/scale[dataSeq.length()-1]);
for (int i = dataSeq.length()-1; i > 0; i--) {
if (params.debugLvl > 2) {
Util.printDbg("Features fired");
//featureGenerator.startScanFeaturesAt(dataSeq, i);
//while (featureGenerator.hasNext()) {
//Feature feature = featureGenerator.next();
//Util.printDbg(feature.toString());
//}
}
// compute the Mi matrix
computeMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
tmp_Y.assign(beta_Y[i]);
tmp_Y.assign(Ri_Y,multFunc);
// RobustMath.Mult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false,edgeGen);
Mi_YY.zMult(tmp_Y, beta_Y[i-1]);
// need to scale the beta-s to avoid overflow
scale[i-1] = doScaling?beta_Y[i-1].zSum():1;
if ((scale[i-1] < 1) && (scale[i-1] > -1))
scale[i-1] = 1;
constMultiplier.multiplicator = 1.0/scale[i-1];
beta_Y[i-1].assign(constMultiplier);
}
double thisSeqLogli = 0;
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
computeMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
// find features that fire at this position..
featureGenerator.startScanFeaturesAt(dataSeq, i);
if (i > 0) {
// tmp_Y.assign(alpha_Y);
// RobustMath.Mult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
Mi_YY.zMult(alpha_Y, newAlpha_Y,1,0,true);
newAlpha_Y.assign(Ri_Y,multFunc);
} else {
newAlpha_Y.assign(Ri_Y);
}
while (featureGenerator.hasNext()) {
Feature feature = featureGenerator.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if ((dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
grad[f] += val;
thisSeqLogli += val*lambda[f];
}
if (yprev < 0) {
ExpF[f] += newAlpha_Y.get(yp)*val*beta_Y[i].get(yp);
} else {
ExpF[f] += alpha_Y.get(yprev)*Ri_Y.get(yp)*Mi_YY.get(yprev,yp)*val*beta_Y[i].get(yp);
}
}
alpha_Y.assign(newAlpha_Y);
// now scale the alpha-s to avoid overflow problems.
constMultiplier.multiplicator = 1.0/scale[i];
alpha_Y.assign(constMultiplier);
if (params.debugLvl > 2) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
System.out.println("Beta-i " + beta_Y[i].toString());
}
//badVector(alpha_Y);
}
double Zx = alpha_Y.zSum();
//if (Zx == 0) {
//Zx = (Double.MIN_VALUE*100000000);
//}
thisSeqLogli -= log(Zx);
// correct for the fact that alpha-s were scaled.
for (int i = 0; i < dataSeq.length(); i++) {
thisSeqLogli -= log(scale[i]);
}
if (thisSeqLogli > 0) {
System.out.println("This is shady: something is wrong Pr(y|x) > 1!");
}
logli += thisSeqLogli;
// update grad.
for (int f = 0; f < grad.length; f++)
grad[f] -= ExpF[f]/Zx;
if (params.debugLvl > 1) {
System.out.println("Sequence " + thisSeqLogli + " " + logli);
}
}
if (params.debugLvl > 2) {
for (int f = 0; f < lambda.length; f++)
System.out.print(lambda[f] + " ");
System.out.println(" :x");
for (int f = 0; f < lambda.length; f++)
System.out.print(grad[f] + " ");
System.out.println(" :g");
}
if (params.debugLvl > 0)
Util.printDbg("Iter " + icall + " log likelihood "+logli + " norm(grad logli) " + norm(grad) + " norm(x) "+ norm(lambda));
} catch (Exception e) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
e.printStackTrace();
System.exit(0);
}
return logli;
}
*/
static void computeLogMi(FeatureGenerator featureGen, double lambda[],
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y) {
double DEFAULT_VALUE = 0;
Mi_YY.assign(DEFAULT_VALUE);
Ri_Y.assign(DEFAULT_VALUE);
computeLogMiInitDone(featureGen,lambda,Mi_YY,Ri_Y, DEFAULT_VALUE);
}
static void computeMi(FeatureGenerator featureGen, double lambda[],
DataSequence dataSeq, int i,
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y) {
featureGen.startScanFeaturesAt(dataSeq, i);
computeLogMi(featureGen, lambda, Mi_YY, Ri_Y);
Ri_Y.assign(expFunc);
Mi_YY.assign(expFunc);
// Mi_YY.forEachNonZero(expFunc2D);
}
static void computeLogMi(FeatureGenerator featureGen, double lambda[],
DataSequence dataSeq, int i,
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y) {
featureGen.startScanFeaturesAt(dataSeq, i);
computeLogMi(featureGen, lambda, Mi_YY, Ri_Y);
}
/*
protected double computeFunctionGradientLL(double lambda[], double grad[]) {
double logli = 0;
try {
for (int f = 0; f < lambda.length; f++) {
grad[f] = -1*lambda[f]*params.invSigmaSquare;
logli -= ((lambda[f]*lambda[f])*params.invSigmaSquare)/2;
}
diter.startScan();
if (featureGenCache != null) featureGenCache.startDataScan();
for (int numRecord = 0; diter.hasNext(); numRecord++) {
DataSequence dataSeq = (DataSequence)diter.next();
if (featureGenCache != null) featureGenCache.nextDataIndex();
if (params.debugLvl > 1) {
Util.printDbg("Read next seq: " + numRecord + " logli " + logli);
}
alpha_Y.assign(0);
for (int f = 0; f < lambda.length; f++)
ExpF[f] = RobustMath.LOG0;
if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
beta_Y = new DoubleMatrix1D[2*dataSeq.length()];
for (int i = 0; i < beta_Y.length; i++)
beta_Y[i] = newLogDoubleMatrix1D(numY);
}
// compute beta values in a backward scan.
// also scale beta-values to 1 to avoid numerical problems.
beta_Y[dataSeq.length()-1].assign(0);
for (int i = dataSeq.length()-1; i > 0; i--) {
if (params.debugLvl > 3) {
Util.printDbg("Features fired");
featureGenerator.startScanFeaturesAt(dataSeq, i);
while (featureGenerator.hasNext()) {
Feature feature = featureGenerator.next();
Util.printDbg(feature.toString());
}
}
// compute the Mi matrix
computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
tmp_Y.assign(beta_Y[i]);
tmp_Y.assign(Ri_Y,sumFunc);
Mi_YY.zMult(tmp_Y, beta_Y[i-1],1,0,false);
}
double thisSeqLogli = 0;
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y);
// find features that fire at this position..
featureGenerator.startScanFeaturesAt(dataSeq, i);
if (i > 0) {
//tmp_Y.assign(alpha_Y);
Mi_YY.zMult(alpha_Y, newAlpha_Y,1,0,true);
newAlpha_Y.assign(Ri_Y,sumFunc);
} else {
newAlpha_Y.assign(Ri_Y);
}
while (featureGenerator.hasNext()) {
Feature feature = featureGenerator.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if ((dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
grad[f] += val;
thisSeqLogli += val*lambda[f];
}
if (yprev < 0) {
ExpF[f] = RobustMath.logSumExp(ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp));
} else {
ExpF[f] = RobustMath.logSumExp(ExpF[f], alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+beta_Y[i].get(yp));
}
}
alpha_Y.assign(newAlpha_Y);
if (params.debugLvl > 2) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
System.out.println("Beta-i " + beta_Y[i].toString());
}
}
double lZx = alpha_Y.zSum();
thisSeqLogli -= lZx;
logli += thisSeqLogli;
// update grad.
for (int f = 0; f < grad.length; f++)
grad[f] -= RobustMath.exp(ExpF[f]-lZx);
if (params.debugLvl > 1) {
System.out.println("Sequence " + thisSeqLogli + " " + logli );
}
if (thisSeqLogli > 0) {
System.out.println("This is shady: something is wrong Pr(y|x) > 1!");
}
}
if (params.debugLvl > 2) {
for (int f = 0; f < lambda.length; f++)
System.out.print(lambda[f] + " ");
System.out.println(" :x");
for (int f = 0; f < lambda.length; f++)
System.out.print(grad[f] + " ");
System.out.println(" :g");
}
if (params.debugLvl > 0)
Util.printDbg("Iteration " + icall + " log-likelihood "+logli + " norm(grad logli) " + norm(grad) + " norm(x) "+ norm(lambda));
} catch (Exception e) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
e.printStackTrace();
System.exit(0);
}
return logli;
}
*/
}