/* BSegmentTrainer.java * Created on Apr 2, 2005 * * @author Sunita Sarawagi * @version 1.3 */ package iitb.BSegmentCRF; import iitb.CRF.CRF; import iitb.CRF.CrfParams; import iitb.CRF.DataIter; import iitb.CRF.DataSequence; import iitb.CRF.FeatureGenerator; import iitb.CRF.RobustMath; import iitb.CRF.SegmentTrainer; import iitb.CRF.Segmentation; import cern.colt.matrix.tdouble.DoubleMatrix1D; /** * * @author Sunita Sarawagi * */ class BSegmentTrainer extends SegmentTrainer { BFeatureGenerator bfgen; DoubleMatrix1D openBeta[]; DoubleMatrix1D openAlpha[]; DoubleMatrix1D deltaRi,openDeltaRi; DoubleMatrix1D aMdRs[]=null; DoubleMatrix1D exactETerms[], prevExactETerms[]; DoubleMatrix1D termSE = null, termSEPart; DoubleMatrix1D exactETerm=null, exactSTerm=null; DoubleMatrix1D rbetaTerms = null; DoubleMatrix1D rbeta=null; MatrixWithRange iOpenR = null,endOpenRi; FeatureStore.Iter fIter; static class MatrixWithRange { DoubleMatrix1D mat; int start; int end; /** * @param numY */ public MatrixWithRange(DoubleMatrix1D mat) { this.mat = mat; } void init(int start, int end) { this.start = start; this.end = end; mat.assign(RobustMath.LOG0); } /* void assign(MatrixWithRange arg) { this.start = arg.start; this.end = arg.end; this.mat.assign(arg.mat); } */ /** * @param i * @param j * @param fstore */ // initialize with all features where end boundary is open public void init(int start, int end, FeatureStore fstore,boolean startOpen) { init(start,Math.min(start-1,end)); for (int i = start; i <= end; i++) { fstore.incrementRightB(null,this,startOpen); } } // initialize with all features where start and end boundaries are open public void init(int start, int end, FeatureStore fstore) { init(start,end,fstore,true); } /** * @param openRi * @param ri_Y */ public void assign(MatrixWithRange arg, DoubleMatrix1D ri_Y) { this.start = arg.start; this.end = arg.end; this.mat.assign(ri_Y); } } MatrixWithRange openRi; FeatureStoreCache fstore; double F[]; public BSegmentTrainer(CrfParams p) { super(p); } protected void init(CRF model, DataIter data, double[] l) { super.init(model,data,l); bfgen = ((BSegmentCRF)model).bfgen; F = new double[bfgen.numFeatures()]; aMdRs = new DoubleMatrix1D[3*bfgen.maxBoundaryGap()]; for (int i = 0; i < aMdRs.length; i++) { aMdRs[i] = newLogDoubleMatrix1D(numY); } exactETerms = new DoubleMatrix1D[bfgen.maxBoundaryGap()+1]; prevExactETerms = new DoubleMatrix1D[bfgen.maxBoundaryGap()+1]; for (int i = 0; i < exactETerms.length; i++) { prevExactETerms[i] = newLogDoubleMatrix1D(numY); exactETerms[i] = newLogDoubleMatrix1D(numY); } termSE = newLogDoubleMatrix1D(numY); termSEPart = newLogDoubleMatrix1D(numY); exactETerm = newLogDoubleMatrix1D(numY); exactSTerm = newLogDoubleMatrix1D(numY); rbetaTerms = newLogDoubleMatrix1D(numY); rbeta = newLogDoubleMatrix1D(numY); fstore = new FeatureStoreCache(model.params.miscOptions.getProperty("cache","false").equals("true"), reuseM); fIter = fstore.getIterator(); iOpenR = new MatrixWithRange(newLogDoubleMatrix1D(numY)); openRi = new MatrixWithRange(newLogDoubleMatrix1D(numY)); endOpenRi = new MatrixWithRange(newLogDoubleMatrix1D(numY)); deltaRi = newLogDoubleMatrix1D(numY); openDeltaRi = newLogDoubleMatrix1D(numY); } protected void allocateAlphaBeta(int newSize) { super.allocateAlphaBeta(newSize); openAlpha = new DoubleMatrix1D[newSize]; for (int i = 0; i < openAlpha.length; i++) { openAlpha[i] = newLogDoubleMatrix1D(numY); } openBeta = new DoubleMatrix1D[newSize]; for (int i = 0; i < openBeta.length; i++) { openBeta[i] = newLogDoubleMatrix1D(numY); } } boolean holdsInTrainingData(BFeature feature, Segmentation data) { if (data.getSegmentId(feature.start()) != data.getSegmentId(feature.end())) return false; int segNum = data.getSegmentId(feature.start()); if (data.segmentLabel(segNum) != feature.y()) return false; if (!feature.startOpen() && (data.segmentStart(segNum) != feature.start())) return false; if (!feature.endOpen() && (data.segmentEnd(segNum) != feature.end())) return false; if ((segNum==0) && (feature.yprev() >= 0)) return false; if ((segNum > 0) && (feature.yprev() >= 0) && (data.segmentLabel(segNum-1) != feature.yprev())) return false; return true; } DoubleMatrix1D exactCompute(int ls, int le, int rs, int re, boolean multiplyAlpha) { int start = ls; MatrixWithRange openRi = new MatrixWithRange(newLogDoubleMatrix1D(numY)); MatrixWithRange iOpenR = new MatrixWithRange(newLogDoubleMatrix1D(numY)); DoubleMatrix1D rbeta = newLogDoubleMatrix1D(numY); DoubleMatrix1D rbetaTerms = newLogDoubleMatrix1D(numY); DoubleMatrix1D Ri_Y = newLogDoubleMatrix1D(numY); openRi.init(start+1,rs-1,fstore); rbeta.assign(RobustMath.LOG0); for (int ip = start; ip >= le; ip--) { rbetaTerms.assign(RobustMath.LOG0); fstore.decrementLeftB(Ri_Y,openRi,true); iOpenR.assign(openRi, Ri_Y); for (int i = rs; i <= re; i++) { fstore.incrementRightB(Ri_Y,iOpenR); assert ((iOpenR.end == i) && (iOpenR.start == ip)); tmp_Y.assign(beta_Y[i]); tmp_Y.assign(Ri_Y,sumFunc); rbetaTerms.assign(tmp_Y,RobustMath.logSumExpFunc); } int base = -1; if ((ip > 0) && multiplyAlpha) rbetaTerms.assign(alpha_Y_ArrayM[ip-1-base],sumFunc); rbeta.assign(rbetaTerms,RobustMath.logSumExpFunc); } return rbeta; } private double computeFeatureGradOpt(FeatureStore.Iter fIter, DataSequence dataSeq, int m, double grad[], double lambda[], double thisSeqLogli) { if (!fIter.hasNext()) return thisSeqLogli; for (int i = 0; i <= m; exactETerms[i++].assign(RobustMath.LOG0)) { prevExactETerms[i].assign(RobustMath.LOG0); } int dataSize = dataSeq.length(); int base = -1; // now compute the feature gradient for state features. // go over each feature in increasing (s,e) order. BFeature f = fIter.next(); DoubleMatrix1D aMdR = aMdRs[0]; for (int s = 0; s < dataSize; s++) { fstore.deltaR_LShift(s,s+m,deltaRi,openDeltaRi); if (s > 0) { aMdR.assign(openDeltaRi,sumFunc); tmp_Y.assign(deltaRi); tmp_Y.assign(alpha_Y_ArrayM[s-1-base],sumFunc); aMdR.assign(tmp_Y,RobustMath.logSumExpFunc); } else { aMdR.assign(deltaRi); } // if ((f != null) && (f.start() > s)) // continue; if (!reuseM) fstore.getLogMi(s,Mi_YY); int e = Math.min(dataSize,s+m); DoubleMatrix1D tmpArr[]=prevExactETerms; prevExactETerms = exactETerms; exactETerms = tmpArr; if (s+m < dataSize) { exactETerm.assign(openAlpha[e-1]); exactETerm.assign(beta_Y[e],sumFunc); fstore.deltaR_RShift(s,e,deltaRi, openDeltaRi); exactETerm.assign(deltaRi,sumFunc); exactETerms[m].assign(exactETerm); fstore.deltaR_LShift(s,s+m,deltaRi, openDeltaRi); exactSTerm.assign(deltaRi); exactSTerm.assign(openBeta[s], sumFunc); termSE.assign(aMdR); termSE.assign(openBeta[s],sumFunc); } else { exactSTerm.assign(RobustMath.LOG0); termSE.assign(RobustMath.LOG0); } iOpenR.init(s,e,fstore,false); for (e--; e >= s; e--) { int l = e-s; assert (l < m); fstore.decrementRightB(Ri_Y,iOpenR); rbeta.assign(Ri_Y); rbeta.assign(beta_Y[e],sumFunc); tmp_Y.assign(rbeta); if (s > 0) { tmp_Y.assign(alpha_Y_ArrayM[s-1-base],sumFunc); } exactETerm.assign(tmp_Y); if (s > 0) { exactETerm.assign(prevExactETerms[l+1],RobustMath.logSumExpFunc); } exactETerms[l].assign(exactETerm); exactSTerm.assign(rbeta,RobustMath.logSumExpFunc); termSE.assign(exactETerm,RobustMath.logSumExpFunc); //assert (exactCompute(s,0,e,dataSize-1,true).equals(termSE)); // termSE.assign(exactCompute(s,0,e,dataSize-1,true)); //assert (exactCompute(s,0,e,e,true).equals(exactETerm)); //exactETerm.assign(exactCompute(s,0,e,e,true)); //assert (exactCompute(s,s,e,dataSize-1,false).equals(exactSTerm)); //exactSTerm.assign(exactCompute(s,s,e,dataSize-1,false)); // process all features with boundary (s,e) if (params.debugLvl > 2) System.out.println("Features for boundary: [" + s + " " + e + "]"); while ((f != null) && ((f.start()==s) && (f.end()==e))) { if ((icall == 0) && (grad != null) && holdsInTrainingData(f, (Segmentation)dataSeq)) { F[f.index()] += f.value(); if (params.debugLvl > 2) System.out.println("Holds " + f.index() + " " + bfgen.featureName(f.index()) + " " + f.start() + " " + f.startOpen() + " " + f.end()); } if (params.debugLvl > 3) System.out.println(f); double val = RobustMath.log(f.value()); if (!f.endOpen() && !f.startOpen()) { val += (Ri_Y.get(f.y())+beta_Y[e].get(f.y())); if (s > 0) val += alpha_Y_ArrayM[s-base-1].get(f.y()); } else if (f.endOpen() && !f.startOpen()) { val += exactSTerm.get(f.y()); if (s > 0) { if (f.yprev() >= 0) { val += (alpha_Y_Array[s-1-base].get(f.yprev()) + Mi_YY.get(f.yprev(), f.y())); } else { val += (alpha_Y_ArrayM[s-1-base].get(f.y())); } } } else if (f.startOpen() && !f.endOpen()) { val += exactETerm.get(f.y()); } else { val += termSE.get(f.y()); } ExpF[f.index()] = RobustMath.logSumExp(ExpF[f.index()], val); if (fIter.hasNext()) { f = fIter.next(); assert ((f.start() > s) || ((f.start() == s) && (f.end() <= e))); } else f = null; } } } return thisSeqLogli; } protected double sumProductInner(DataSequence dataSeq, FeatureGenerator featureGenerator, double lambda[], double grad[], boolean onlyForwardPass, int numRecord, FeatureGenerator fgenForExpVals) { fstore.init(dataSeq,bfgen,lambda,numY,numRecord); int m = bfgen.maxBoundaryGap(); int base = -1; int dataSize = dataSeq.length(); if ((beta_Y==null) || beta_Y.length < dataSize+1) allocateAlphaBeta(2*dataSize); for (int i = dataSeq.length(); i >= 0; i--) initAlphaMDone[i] = false; DoubleMatrix1D oldBeta = beta_Y[dataSeq.length()-1]; beta_Y[dataSeq.length()-1] = allZeroVector; for (int i = dataSeq.length()-2; i >= 0; i--) { beta_Y[i].assign(RobustMath.LOG0); } if (reuseM && (dataSeq.length() > 0)) fstore.getLogMi(1,Mi_YY); for (int i = dataSeq.length()-2; i >= 0; i--) { // compute beta[i] openRi.init(i+1,i); for (int ip = i+1; (ip <= i+m) && (ip < dataSize); ip++) { fstore.incrementRightB(Ri_Y,openRi); tmp_Y.assign(Ri_Y); if (ip < dataSize-1) tmp_Y.assign(beta_Y[ip], sumFunc); beta_Y[i].assign(tmp_Y, RobustMath.logSumExpFunc); } if (i <= dataSize-1-m) { fstore.removeExactStartFeatures(Ri_Y,i+1,i+m); openBeta[i].assign(Ri_Y); // R(i+1,i+m) } if (i < dataSize-m-1) { fstore.deltaR_LShift(i+1,i+m+1,deltaRi, openDeltaRi); tmp_Y.assign(deltaRi); tmp_Y.assign(openBeta[i+1], sumFunc); beta_Y[i].assign(tmp_Y, RobustMath.logSumExpFunc); openBeta[i].assign(beta_Y[i+m], sumFunc); tmp_Y.assign(openDeltaRi); tmp_Y.assign(openBeta[i+1], sumFunc); openBeta[i].assign(tmp_Y, RobustMath.logSumExpFunc); } if (i >= 0) { // get Mi. if (!reuseM) fstore.getLogMi(i+1,Mi_YY); tmp_Y.assign(beta_Y[i]); Mi_YY.zMult(tmp_Y, beta_Y[i],1,0,false); } } double thisSeqLogli = 0; alpha_Y_Array[0] = allZeroVector; for (int i = 0; i < dataSize; i++) { alpha_Y_Array[i-base].assign(RobustMath.LOG0); openRi.init(i+1,i); for (int ip = i; (ip > i-m) && (ip >= 0); ip--) { fstore.decrementLeftB(Ri_Y,openRi); if (ip-1 >= 0) { if (!reuseM) { fstore.getLogMi(ip,Mi_YY); } if (!initAlphaMDone[ip-1-base]) { alpha_Y_ArrayM[ip-1-base].assign(RobustMath.LOG0); Mi_YY.zMult(alpha_Y_Array[ip-1-base],alpha_Y_ArrayM[ip-1-base],1,0,true); initAlphaMDone[ip-1-base] = true; } newAlpha_Y.assign(alpha_Y_ArrayM[ip-1-base]); newAlpha_Y.assign(Ri_Y,sumFunc); } else { newAlpha_Y.assign(Ri_Y); } if (params.debugLvl > 2) { System.out.println("At sequence position "+i + " " + ip); System.out.println("Alpha-i " + newAlpha_Y.toString()); } alpha_Y_Array[i-base].assign(newAlpha_Y, RobustMath.logSumExpFunc); } // Ri at this point contains features for segment (i-m+1, i) if (i-m >= -1) { fstore.removeExactEndFeatures(Ri_Y, i-m+1, i); openAlpha[i].assign(Ri_Y); } if (i-m >= 0) { fstore.deltaR_RShift(i-m,i,deltaRi, openDeltaRi); tmp_Y.assign(openAlpha[i-1]); tmp_Y.assign(deltaRi, sumFunc); alpha_Y_Array[i-base].assign(tmp_Y, RobustMath.logSumExpFunc); // compute open-alpha tmp_Y.assign(openDeltaRi); tmp_Y.assign(openAlpha[i-1], sumFunc); openAlpha[i].assign(alpha_Y_ArrayM[i-m-base],sumFunc); openAlpha[i].assign(tmp_Y, RobustMath.logSumExpFunc); } if (params.debugLvl > 2) { System.out.println("At sequence position "+i); System.out.println("Alpha-i " + alpha_Y_Array[i-base].toString()); System.out.println("Ri " + Ri_Y.toString()); if (!reuseM) System.out.println("Mi " + Mi_YY.toString()); System.out.println("Beta-i " + beta_Y[i].toString()); } } if (fgenForExpVals != null) { FeatureStore.Iter featureIter = fIter; if (fgenForExpVals != featureGenerator) { // a feature generator different than used for training. FeatureStore featureStore = new FeatureStore(reuseM); featureStore.init(dataSeq,(BFeatureGenerator)fgenForExpVals,lambda,numY); featureIter = featureStore.getIterator(); featureStore.scanFeaturesSorted(featureIter); } else { fstore.scanFeaturesSorted(fIter); } thisSeqLogli = computeFeatureGradOpt(featureIter, dataSeq,m,grad,lambda,thisSeqLogli); } lZx = alpha_Y_Array[dataSeq.length()-1-base].zSum(); beta_Y[dataSeq.length()-1] = oldBeta; return thisSeqLogli; } protected double finishGradCompute(double grad[], double lambda[], double logli) { if (grad != null) { for (int fi = 0; fi < grad.length; fi++) { logli += F[fi]*lambda[fi]; grad[fi] += F[fi]; } } return logli; } };