/** Trainer.java
*
* @author Sunita Sarawagi
* @version 1.3
*/
package iitb.CRF;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import riso.numerical.LBFGS;
import cern.colt.function.tdouble.DoubleDoubleFunction;
import cern.colt.function.tdouble.DoubleFunction;
import cern.colt.matrix.tdouble.DoubleMatrix1D;
import cern.colt.matrix.tdouble.DoubleMatrix2D;
import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix1D;
import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D;
/**
*
* @author Sunita Sarawagi
*
*/
public class Trainer {
protected int numF,numY;
protected double gradLogli[];
double diag[];
protected double lambda[];
protected boolean reuseM, initMDone=false, logProcessing=false;
protected double ExpF[], lZx;
double scale[], rLogScale[];
protected DoubleMatrix2D Mi_YY;
protected DoubleMatrix1D Ri_Y;
protected DoubleMatrix1D alpha_Y, newAlpha_Y;
protected DoubleMatrix1D beta_Y[];
protected DoubleMatrix1D tmp_Y;
static class MultFunc implements DoubleDoubleFunction {
public double apply(double a, double b) {return a*b;}
};
static class SumFunc implements DoubleDoubleFunction {
public double apply(double a, double b) {return a+b;}
};
static MultFunc multFunc = new MultFunc();
protected static SumFunc sumFunc = new SumFunc();
class MultSingle implements DoubleFunction {
public double multiplicator = 1.0;
public double apply(double a) {return a*multiplicator;}
};
MultSingle constMultiplier = new MultSingle();
protected DataIter diter;
protected FeatureGenerator featureGenerator;
protected CrfParams params;
protected EdgeGenerator edgeGen;
protected int icall;
protected float instanceWts[];
Evaluator evaluator = null;
protected FeatureGenCache featureGenCache;
protected double norm(double ar[]) {
double v = 0;
for (int f = 0; f < ar.length; f++)
v += ar[f]*ar[f];
return Math.sqrt(v);
}
public Trainer(CrfParams p) {
params = p;
}
public void train(CRF model, DataIter data, double[] l, Evaluator eval) {
trainInternal(model,data,l,eval,null,null);
}
public void train(CRF model, DataIter data, double[] l, Evaluator eval, float[] instanceWts) {
if (instanceWts==null) {
// this is to ensure backward compatibility with trainers who might have overridden the above function.
train(model,data,l,eval);
return;
}
trainInternal(model,data,l,eval,instanceWts,null);
}
public void train(CRF model, DataIter data, double[] l, Evaluator eval, float[] instanceWts, float misClassifyCost[][]) {
if ((instanceWts==null) && (misClassifyCost==null)) {
// this is to ensure backward compatibility with trainers who might have overridden the above function.
train(model,data,l,eval);
return;
}
trainInternal(model,data,l,eval,instanceWts, misClassifyCost);
}
// this last argument is ignored for logistic trainers on sequence data.
private void trainInternal(CRF model, DataIter data, double[] l, Evaluator eval, float[] instanceWts, float misClassifyCost[][]) {
init(model,data,l);
evaluator = eval;
this.instanceWts = instanceWts;
if (params.debugLvl > 0) {
Util.printDbg("Number of features :" + lambda.length);
}
doTrain();
}
protected void setInitValue(double lambda[]) {
if (params.miscOptions.getProperty("initValues") != null) {
// starting values stored in a file where each line has (featureName, value) pair
String fname = params.miscOptions.getProperty("initValues");
BufferedReader in;
try {
in = new BufferedReader(new FileReader(fname));
String line;
boolean idOrdered=Boolean.parseBoolean(params.miscOptions.getProperty("initValuesOrdered", "false"));
Hashtable<String, Double> initVals = new Hashtable<String, Double>();
for(int l = 0; ((line=in.readLine())!=null); l++) {
StringTokenizer entry = new StringTokenizer(line);
String featureName = entry.nextToken();
double fval = Double.parseDouble(entry.nextToken());
if (!idOrdered)
initVals.put(featureName,fval);
else
lambda[l] = fval;
}
if (!idOrdered) {
for (int j = 0 ; j < lambda.length ; j ++) {
String featureName = featureGenerator.featureName(j);
lambda[j] = (initVals.get(featureName) != null)?initVals.get(featureName):getInitValue();
}
}
return;
} catch (IOException e) {
e.printStackTrace();
System.err.println("ERROR: in file initialization, using default init process");
}
} else if (Boolean.parseBoolean(params.miscOptions.getProperty("initValuesUseExisting", "false"))) {
// use existing values of lambda from the model as starting point.
return;
} else
for (int j = 0 ; j < lambda.length ; j ++) {
lambda[j] = getInitValue();
}
}
double getInitValue() {
return params.initValue;
}
protected void init(CRF model, DataIter data, double[] l) {
edgeGen = model.edgeGen;
lambda = l;
numY = model.numY;
diter = data;
featureGenerator = model.featureGenerator;
numF = featureGenerator.numFeatures();
gradLogli = new double[numF];
diag = new double [ numF ]; // needed by the optimizer
ExpF = new double[lambda.length];
initMatrices();
reuseM = params.reuseM;
if (params.trainerType.equals("ll"))
logProcessing=true;
if ((data != null) && params.miscOptions.getProperty("cache", "false").equals("true")) {
featureGenCache = new FeatureGenCache(featureGenerator,reuseM);
featureGenCache.setDataKeys(data);
featureGenerator = featureGenCache;
} else
featureGenCache = null;
}
void initMatrices() {
Mi_YY = new DenseDoubleMatrix2D(numY,numY);
Ri_Y = new DenseDoubleMatrix1D(numY);
alpha_Y = new DenseDoubleMatrix1D(numY);
newAlpha_Y = new DenseDoubleMatrix1D(numY);
tmp_Y = new DenseDoubleMatrix1D(numY);
}
protected void doTrain() {
double f, xtol = 1.0e-16; // machine precision
int iprint[] = new int [2], iflag[] = new int[1];
icall=0;
iprint [0] = params.debugLvl-2;
iprint [1] = params.debugLvl-1;
iflag[0]=0;
double variables[] = lambda;
boolean positiveConstraint = params.miscOptions.getProperty("prior", "gaussian").equals("exp");
if (positiveConstraint) {
variables = new double[lambda.length];
}
setInitValue(variables);
do {
if (positiveConstraint) {
for (int i = 0; i < variables.length; i++) {
lambda[i] = Math.exp(variables[i]);
}
f = computeFunctionGradient(lambda,gradLogli);
for (int i = 0; i < gradLogli.length; i++) {
gradLogli[i] *= Math.exp(variables[i]);
}
} else {
f = computeFunctionGradient(lambda,gradLogli);
}
f = -1*f; // since the routine below minimizes and we want to maximize logli
for (int j = 0 ; j < lambda.length ; j ++) {
gradLogli[j] *= -1;
}
if ((evaluator != null) && (evaluator.evaluate() == false))
break;
try {
LBFGS.lbfgs (numF, params.mForHessian, variables, f, gradLogli, false, diag, iprint, params.epsForConvergence, xtol, iflag);
} catch (LBFGS.ExceptionWithIflag e) {
System.err.println( "CRF: lbfgs failed.\n"+e );
if (e.iflag == -1) {
System.err.println("Possible reasons could be: \n \t 1. Bug in the feature generation or data handling code\n\t 2. Not enough features to make observed feature value==expected value\n");
}
return;
}
icall += 1;
} while (( iflag[0] != 0) && (icall <= params.maxIters));
reInit();
}
protected double computeFunctionGradient(double lambda[], double grad[]) {
return computeFunctionGradient(lambda,grad,null,featureGenerator);
}
protected double finishGradCompute(double grad[], double lambda[], double logli) {
return logli;
}
protected void computeFeatureExpectedValue(DataIter dataIter, FeatureGenerator fgen, double lambda[], double expFVals[]) {
diter = dataIter;
featureGenCache = null;
for (int i = 0; i < expFVals.length; expFVals[i++] = 0);
if (fgen.numFeatures() > ExpF.length) {
// a different feature generator..
ExpF = new double[fgen.numFeatures()];
}
computeFunctionGradient(lambda,null,expFVals,fgen);
}
protected double addPrior(double lambda[], double grad[], double logli) {
if (params.miscOptions.getProperty("prior", "gaussian").equalsIgnoreCase("exp")) {
for (int f = 0; f < lambda.length; f++) {
grad[f] = -1*params.invSigmaSquare;
logli -= (lambda[f]*params.invSigmaSquare);
}
} else if (params.miscOptions.getProperty("prior", "gaussian").equalsIgnoreCase("laplaceApprox")) {
for (int f = 0; f < lambda.length; f++) {
double approxL = Math.sqrt(lambda[f]*lambda[f]+1e-3);
grad[f] = -1*lambda[f]/approxL*params.invSigmaSquare;
logli -= params.invSigmaSquare*approxL;
}
} else {
for (int f = 0; f < lambda.length; f++) {
grad[f] = -1*lambda[f]*params.invSigmaSquare;
logli -= ((lambda[f]*lambda[f])*params.invSigmaSquare)/2;
}
}
return logli;
}
protected double computeFunctionGradient(double lambda[], double grad[], double expFVals[],
FeatureGenerator fgenForExpValCompute) {
try {
double logli = 0;
if (grad != null) {
logli = addPrior(lambda,grad,logli);
}
diter.startScan();
initMDone=false;
if (featureGenCache != null) featureGenCache.startDataScan();
int numRecord;
for (numRecord = 0; diter.hasNext(); numRecord++) {
if (params.debugLvl > 1)
Util.printDbg("Read next seq: " + numRecord + " logli " + logli);
if (featureGenCache != null) featureGenCache.nextDataIndex();
logli += sumProduct(diter.next(),featureGenerator,lambda,grad,expFVals,
false, numRecord, fgenForExpValCompute);
}
logli = finishGradCompute(grad,lambda,logli);
if (params.debugLvl > 2) {
for (int f = 0; f < lambda.length; f++)
System.out.print(lambda[f] + " ");
System.out.println(" :x");
for (int f = 0; f < lambda.length; f++)
System.out.println(f + " " + featureGenerator.featureName(f) + " " + grad[f] + " ");
System.out.println(" :g");
}
if (params.debugLvl > 0) {
if (icall == 0) {
Util.printDbg("Number of training records " + numRecord);
}
if (grad != null) Util.printDbg("Iter " + icall + " loglikelihood "+logli + " gnorm " + norm(grad) + " xnorm "+ norm(lambda));
}
return logli;
} catch (Exception e) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
e.printStackTrace();
}
return 0;
}
protected double sumProduct(DataSequence dataSeq, FeatureGenerator featureGenerator,
double lambda[], double grad[], double expFVals[], boolean onlyForwardPass, int numRecord,
FeatureGenerator fgenForExpVals) {
if (logProcessing) {
return sumProductLL(dataSeq,featureGenerator,lambda,grad,expFVals,onlyForwardPass,numRecord,fgenForExpVals);
}
boolean doScaling = params.doScaling;
alpha_Y.assign(1);
for (int f = 0; f < lambda.length; f++)
ExpF[f] = 0;
if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
beta_Y = new DenseDoubleMatrix1D[2*dataSeq.length()];
for (int i = 0; i < beta_Y.length; i++)
beta_Y[i] = new DenseDoubleMatrix1D(numY);
scale = new double[2*dataSeq.length()];
}
float instanceWt = (float) ((instanceWts!=null)?instanceWts[numRecord]:1);
// compute beta values in a backward scan.
// also scale beta-values to 1 to avoid numerical problems.
scale[dataSeq.length()-1] = (doScaling)?numY:1;
beta_Y[dataSeq.length()-1].assign(1.0/scale[dataSeq.length()-1]);
for (int i = dataSeq.length()-1; i > 0; i--) {
// compute the Mi matrix
initMDone = computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,true,reuseM,initMDone);
tmp_Y.assign(beta_Y[i]);
tmp_Y.assign(Ri_Y,multFunc);
RobustMath.Mult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false,edgeGen);
// Mi_YY.zMult(tmp_Y, beta_Y[i-1]);
// need to scale the beta-s to avoid overflow
scale[i-1] = doScaling?beta_Y[i-1].zSum():1;
if ((scale[i-1] < 1) && (scale[i-1] > -1))
scale[i-1] = 1;
constMultiplier.multiplicator = 1.0/scale[i-1];
beta_Y[i-1].assign(constMultiplier);
}
double thisSeqLogli = 0;
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
initMDone = computeLogMi(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,true,reuseM,initMDone);
if (i > 0) {
tmp_Y.assign(alpha_Y);
RobustMath.Mult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
// Mi_YY.zMult(tmp_Y, newAlpha_Y,1,0,true);
newAlpha_Y.assign(Ri_Y,multFunc);
} else {
newAlpha_Y.assign(Ri_Y);
}
if ((grad !=null) || (expFVals!=null)) {
// find features that fire at this position..
fgenForExpVals.startScanFeaturesAt(dataSeq, i);
while (fgenForExpVals.hasNext()) {
Feature feature = fgenForExpVals.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
grad[f] += instanceWt*val;
thisSeqLogli += val*lambda[f];
}
if (yprev < 0) {
ExpF[f] += newAlpha_Y.get(yp)*val*beta_Y[i].get(yp);
} else {
ExpF[f] += alpha_Y.get(yprev)*Ri_Y.get(yp)*Mi_YY.get(yprev,yp)*val*beta_Y[i].get(yp);
}
}
}
alpha_Y.assign(newAlpha_Y);
// now scale the alpha-s to avoid overflow problems.
constMultiplier.multiplicator = 1.0/scale[i];
alpha_Y.assign(constMultiplier);
if (params.debugLvl > 2) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
System.out.println("Beta-i " + beta_Y[i].toString());
}
}
double Zx = alpha_Y.zSum();
thisSeqLogli -= log(Zx);
// correct for the fact that alpha-s were scaled.
for (int i = 0; i < dataSeq.length(); i++) {
thisSeqLogli -= log(scale[i]);
}
// update grad.
if (grad != null) {
for (int f = 0; f < grad.length; f++)
grad[f] -= instanceWt*ExpF[f]/Zx;
}
if (expFVals!=null) {
for (int f = 0; f < lambda.length; f++) {
expFVals[f] += ExpF[f]/Zx;
}
}
if (params.debugLvl > 1) {
System.out.println("Sequence " + thisSeqLogli + " log(Zx) " + Math.log(Zx) + " Zx " + Zx);
}
return thisSeqLogli*instanceWt;
}
static void computeLogMi(FeatureGenerator featureGen, double lambda[],
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y, boolean takeExp) {
computeLogMi(featureGen,lambda,Mi_YY,Ri_Y,takeExp,false,false);
}
static boolean computeLogMiInitDone(FeatureGenerator featureGen, double lambda[],
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y, double DEFAULT_VALUE) {
if ((Mi_YY==null) && (featureGen instanceof FeatureGenCache) && (DEFAULT_VALUE==0)) {
((FeatureGenCache)featureGen).noEdgeFeatures();
}
boolean mSet = false;
while (featureGen.hasNext()) {
Feature feature = featureGen.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if (yprev == -1) {
// this is a single state feature.
// if default value was a negative_infinity, need to
// reset to.
double oldVal = Ri_Y.get(yp);
if (oldVal == DEFAULT_VALUE)
oldVal = 0;
Ri_Y.set(yp,oldVal+lambda[f]*val);
} else {
//if (Ri_Y.get(yp) == DEFAULT_VALUE)
// Ri_Y.set(yp,0);
if (Mi_YY != null) {
double oldVal = Mi_YY.get(yprev,yp);
if (oldVal == DEFAULT_VALUE) {
oldVal = 0;
}
Mi_YY.set(yprev,yp,oldVal+lambda[f]*val);
mSet = true;
}
}
}
return mSet;
}
public static double initLogMi(double defaultValue, Iterator constraints,
DoubleMatrix2D Mi, DoubleMatrix1D Ri) {
if (constraints != null) {
defaultValue = RobustMath.LOG0;
if (Mi != null) Mi.assign(defaultValue);
Ri.assign(defaultValue);
for (; constraints.hasNext();) {
Constraint constraint = (Constraint)constraints.next();
if (constraint.type() == Constraint.ALLOW_ONLY) {
RestrictConstraint cons = (RestrictConstraint)constraint;
/*
for (int c = cons.numAllowed()-1; c >= 0; c--) {
Ri.set(cons.allowed(c),0);
}
*/
for (cons.startScan(); cons.hasNext();) {
cons.advance();
int y = cons.y();
int yprev = cons.yprev();
if (yprev < 0) {
Ri.set(y,0);
} else {
if (Mi != null) Mi.set(yprev,y,0);
}
}
}
}
} else {
if (Mi != null) Mi.assign(defaultValue);
Ri.assign(defaultValue);
}
return defaultValue;
}
static boolean computeLogMi(FeatureGenerator featureGen, double lambda[],
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y, boolean takeExp,boolean reuseM, boolean initMDone) {
if (reuseM && initMDone) {
Mi_YY = null;
} else {
initMDone = false;
}
if (Mi_YY != null) Mi_YY.assign(0);
Ri_Y.assign(0);
initMDone = computeLogMiInitDone(featureGen,lambda,Mi_YY,Ri_Y,0);
if (takeExp) {
for(int r = (int) (Ri_Y.size()-1); r >= 0; r--) {
Ri_Y.setQuick(r,expE(Ri_Y.getQuick(r)));
if (Mi_YY != null)
for(int c = Mi_YY.columns()-1; c >= 0; c--) {
Mi_YY.setQuick(r,c,expE(Mi_YY.getQuick(r,c)));
}
}
}
return initMDone;
}
public static void computeLogMi(FeatureGenerator featureGen, double lambda[],
DataSequence dataSeq, int i,
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y, boolean takeExp) {
computeLogMi(featureGen, lambda, dataSeq, i, Mi_YY, Ri_Y, takeExp,false,false);
}
public static boolean computeLogMi(FeatureGenerator featureGen, double lambda[],
DataSequence dataSeq, int i,
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y, boolean takeExp, boolean reuseM, boolean initMDone) {
featureGen.startScanFeaturesAt(dataSeq, i);
return computeLogMi(featureGen, lambda, Mi_YY, Ri_Y, takeExp,reuseM, initMDone);
}
protected void allocateAlphaBeta(int newSize) {
beta_Y = new DoubleMatrix1D[newSize];
for (int i = 0; i < beta_Y.length; i++)
beta_Y[i] = newLogDoubleMatrix1D(numY);
}
protected DoubleMatrix1D newLogDoubleMatrix1D(int numY) {
return new DenseDoubleMatrix1D(numY);
}
protected DoubleMatrix2D newLogDoubleMatrix2D(int numR, int numC) {
return new DenseDoubleMatrix2D(numR,numC);
}
protected double sumProductLL(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[],
double grad[], double expFVals[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) {
float instanceWt = (float) ((instanceWts!=null)?instanceWts[numRecord]:1);
for (int f = 0; f < ExpF.length; f++)
ExpF[f] = RobustMath.LOG0;
double gradThisInstance[] =grad;
if ((instanceWt != 1) && (grad != null)) {
gradThisInstance = new double[grad.length];
}
double thisSeqLogli = sumProductInner(dataSeq,featureGenerator,lambda,gradThisInstance
,onlyForwardPass, numRecord, ((grad != null)||(expFVals!=null))?fgenForExpVals:null);
thisSeqLogli -= lZx;
// update grad.
if (grad != null) {
for (int f = 0; f < grad.length; f++) {
grad[f] -= RobustMath.exp(ExpF[f]-lZx)*instanceWt;
if (gradThisInstance != grad) {
grad[f] += gradThisInstance[f]*instanceWt;
}
}
}
if (expFVals!=null) {
for (int f = 0; f < expFVals.length; f++) {
expFVals[f] += RobustMath.exp(ExpF[f]-lZx)*instanceWt;
}
}
if (params.debugLvl > 1) {
System.out.println("Sequence " + thisSeqLogli + " log(Zx) " + lZx + " Zx " + Math.exp(lZx));
}
return (grad == null)?-lZx:thisSeqLogli * instanceWt;
}
protected void getMarginals(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], float nodeMargs[][], float edgeMargs[][][]) {
allocateAlphaBeta(2*dataSeq.length()+1);
beta_Y = computeBetaArray(dataSeq,lambda,featureGenerator);
alpha_Y.assign(0);
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
if (i > 0) {
tmp_Y.assign(alpha_Y);
RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
newAlpha_Y.assign(Ri_Y,sumFunc);
} else {
newAlpha_Y.assign(Ri_Y);
}
for (int y = 0; y < numY; y++) {
nodeMargs[i][y] = (float) (newAlpha_Y.get(y)+beta_Y[i].get(y));
if (i > 0) {
for (int yprev = 0; yprev < numY; yprev++) {
edgeMargs[i][yprev][y] = (float) (alpha_Y.get(yprev)+beta_Y[i].get(y)+Ri_Y.get(y)+Mi_YY.get(yprev,y));
}
}
}
alpha_Y.assign(newAlpha_Y);
}
double logZx = RobustMath.logSumExp(alpha_Y);
for (int i = 0; i < edgeMargs.length; i++) {
for (int y = 0; y < numY; y++) {
nodeMargs[i][y] = (float) Math.exp(nodeMargs[i][y] - logZx);
if (i==0) continue;
for (int yprev = 0; yprev < edgeMargs.length; yprev++) {
edgeMargs[i][yprev][y] = (float) Math.exp(edgeMargs[i][yprev][y]-logZx);
}
}
}
}
protected double sumProductInner(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[],
double grad[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) {
if ((beta_Y == null) || (beta_Y.length < dataSeq.length())) {
allocateAlphaBeta(2*dataSeq.length()+1);
}
// compute beta values in a backward scan.
// also scale beta-values to 1 to avoid numerical problems.
if (!onlyForwardPass) {
beta_Y = computeBetaArray(dataSeq,lambda,featureGenerator);
}
alpha_Y.assign(0);
double thisSeqLogli = 0;
for (int i = 0; i < dataSeq.length(); i++) {
// compute the Mi matrix
initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
if (i > 0) {
tmp_Y.assign(alpha_Y);
RobustMath.logMult(Mi_YY, tmp_Y, newAlpha_Y,1,0,true,edgeGen);
newAlpha_Y.assign(Ri_Y,sumFunc);
} else {
newAlpha_Y.assign(Ri_Y);
}
if (fgenForExpVals != null) {
// find features that fire at this position..
fgenForExpVals.startScanFeaturesAt(dataSeq, i);
while (fgenForExpVals.hasNext()) {
Feature feature = fgenForExpVals.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
grad[f] += val;
thisSeqLogli += val*lambda[f];
if (params.debugLvl > 2) {
System.out.println("Feature fired " + f + " " + feature);
}
}
if (Math.abs(val) < Double.MIN_VALUE) continue;
if (val < 0) {
System.out.println("ERROR: Cannot process negative feature values in log domains: "
+ "either disable the '-trainer=ll' flag or ensure feature values are not -ve");
continue;
}
if (yprev < 0) {
ExpF[f] = RobustMath.logSumExp(ExpF[f], newAlpha_Y.get(yp) + RobustMath.log(val) + beta_Y[i].get(yp));
} else {
ExpF[f] = RobustMath.logSumExp(ExpF[f], alpha_Y.get(yprev)+Ri_Y.get(yp)+Mi_YY.get(yprev,yp)+RobustMath.log(val)+beta_Y[i].get(yp));
}
}
}
alpha_Y.assign(newAlpha_Y);
if (params.debugLvl > 2) {
System.out.println("Alpha-i " + alpha_Y.toString());
System.out.println("Ri " + Ri_Y.toString());
System.out.println("Mi " + Mi_YY.toString());
System.out.println("Beta-i " + beta_Y[i].toString());
}
}
lZx = RobustMath.logSumExp(alpha_Y);
return thisSeqLogli;
}
protected DoubleMatrix1D[] computeBetaArray(DataSequence dataSeq, double[] lambda, FeatureGenerator featureGenerator) {
beta_Y[dataSeq.length()-1].assign(0);
for (int i = dataSeq.length()-1; i > 0; i--) {
// compute the Mi matrix
initMDone = computeLogMiTrainMode(featureGenerator,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone);
tmp_Y.assign(beta_Y[i]);
tmp_Y.assign(Ri_Y,sumFunc);
RobustMath.logMult(Mi_YY, tmp_Y, beta_Y[i-1],1,0,false,edgeGen);
}
return beta_Y;
}
protected boolean computeLogMiTrainMode(FeatureGenerator featureGen, double lambda[],
DataSequence dataSeq, int i,
DoubleMatrix2D Mi_YY,
DoubleMatrix1D Ri_Y, boolean takeExp, boolean reuseM, boolean initMDone) {
return computeLogMi(featureGen,lambda,dataSeq,i,Mi_YY,Ri_Y,false,reuseM,initMDone) || initMDone;
}
static double log(double val) {
try {
return logE(val);
} catch (Exception e) {
System.out.println(e.getMessage());
e.printStackTrace();
}
return -1*Double.MAX_VALUE;
}
static double logE(double val) throws Exception {
double pr = Math.log(val);
if (Double.isNaN(pr) || Double.isInfinite(pr)) {
throw new Exception("Overflow error when taking log of " + val);
}
return pr;
}
static double expE(double val) {
double pr = RobustMath.exp(val);
if (Double.isNaN(pr) || Double.isInfinite(pr)) {
try {
throw new Exception("Overflow error when taking exp of " + val + "\n Try running the CRF with the following option \"trainer ll\" to perform computations in the log-space.");
} catch (Exception e) {
System.out.println(e.getMessage());
e.printStackTrace();
return Double.MAX_VALUE;
}
}
return pr;
}
static double expLE(double val) {
double pr = RobustMath.exp(val);
if (Double.isNaN(pr) || Double.isInfinite(pr)) {
try {
throw new Exception("Overflow error when taking exp of " + val
+ " you might need to redesign feature values so as to not reach such high values");
} catch (Exception e) {
System.out.println(e.getMessage());
e.printStackTrace();
return Double.MAX_VALUE;
}
}
return pr;
}
public void addFeatureVector(DataSequence dataSeq, double[] grad) {
for (int i = 0; i < dataSeq.length(); i++) {
// find features that fire at this position..
featureGenerator.startScanFeaturesAt(dataSeq, i);
while (featureGenerator.hasNext()) {
Feature feature = featureGenerator.next();
int f = feature.index();
int yp = feature.y();
int yprev = feature.yprev();
float val = feature.value();
if ((grad != null) && (dataSeq.y(i) == yp) && (((i-1 >= 0) && (yprev == dataSeq.y(i-1))) || (yprev < 0))) {
grad[f] += val;
}
}
}
}
public void reInit() {
initMDone=false;
}
}