/** FeatureStore.java * Created on Apr 2, 2005 * * @author Sunita Sarawagi * @since 1.2 * @version 1.3 */ package iitb.BSegmentCRF; import iitb.BSegmentCRF.BSegmentTrainer.MatrixWithRange; import iitb.CRF.DataSequence; import iitb.CRF.Feature; import iitb.CRF.LogSparseDoubleMatrix1D; import iitb.CRF.RobustMath; import iitb.CRF.FeatureGenCache; import java.util.Iterator; import cern.colt.matrix.tdouble.DoubleMatrix1D; import cern.colt.matrix.tdouble.DoubleMatrix2D; public class FeatureStore { BFeatureGenerator bfgen; int numLabels; class BasicFeature { float val; int _y; int _index; BasicFeature() {} BasicFeature(BFeature feature) { this.val = feature.value(); this._y = feature.y(); this._index = feature.index(); } /* (non-Javadoc) * @see iitb.CRF.Feature#index() */ public int index() { return _index; } /* (non-Javadoc) * @see iitb.CRF.Feature#y() */ public int y() { return _y; } /* (non-Javadoc) * @see iitb.CRF.Feature#yprev() */ public int yprev() { return -1; } /* (non-Javadoc) * @see iitb.CRF.Feature#value() */ public float value() { return val; } public String toString() { return bfgen.featureName(_index) + " v" + value() + " y" + y(); } } class BasicEdgeFeature extends BasicFeature { int _yprev; /** * @param f */ public BasicEdgeFeature(BFeature f) { super(f); _yprev = f.yprev(); } public BasicEdgeFeature(){} /* (non-Javadoc) * @see iitb.CRF.Feature#yprev() */ public int yprev() { return _yprev; } }; class FeatureImpl extends BasicEdgeFeature implements BFeature { int startB; boolean openS; int endB; boolean openE; public FeatureImpl() {super();} /** * @param f */ public FeatureImpl(BFeature f) { super(f); this.startB = f.start(); endB = f.end(); openS = f.startOpen(); openE = f.endOpen(); } public void init(Feature f, int s, int e, int type) { startB = s; endB = e; openS = ((type/2) == 1)?true:false; openE = ((type & 1)==1)?true:false; _y = f.y(); val = f.value(); _index = f.index(); _yprev = -1; } public void init(Feature f, int s, int e) { init(f,s,e,1); _yprev = f.yprev(); } /* (non-Javadoc) * @see iitb.BSegmentCRF.BFeature#start() */ public int start() { return startB; } /* (non-Javadoc) * @see iitb.BSegmentCRF.BFeature#startOpen() */ public boolean startOpen() { return openS; } /* (non-Javadoc) * @see iitb.BSegmentCRF.BFeature#end() */ public int end() { return endB; } /* (non-Javadoc) * @see iitb.BSegmentCRF.BFeature#endOpen() */ public boolean endOpen() { return openE; } /* (non-Javadoc) * @see iitb.CRF.Feature#yprevArray() */ public int[] yprevArray() { return null; } public String toString() { return bfgen.featureName(_index) + " v" + value() + " y" + y() + " s" + start() + ":" + startOpen() + " e" + end() + ":" + endOpen(); } }; static int endOpen = 1; static int endExact = 0; static int startExact = 2; static int startOpen = 3; FeatureGenCache.AllFeatureCache allFeatureCache; FeatureGenCache.AllFeatureCache.Flist[][] stateFeatures[]=null; class EdgeFeatures { FeatureGenCache.AllFeatureCache.FeatureVector edgeFeatureArray[]; public EdgeFeatures(int size) { edgeFeatureArray = new FeatureGenCache.AllFeatureCache.FeatureVector[size]; for (int i = 0; i < size; i++) { add(i); } } FeatureGenCache.AllFeatureCache.FeatureVector get(int i) {return edgeFeatureArray[i];} public void add(int i) { if (allFeatureCache.edgeFeaturesXIndependent && (i > 0)) { edgeFeatureArray[i] = allFeatureCache.edgeFeatures.getEdgeIds(0,1,dataLen); } else { edgeFeatureArray[i] = allFeatureCache.newFeatureVector(); } } public void addEdgeFeature(int i, BFeature f) { if (!allFeatureCache.edgeFeaturesXIndependent) { edgeFeatureArray[i].add(f); } else { if (i == 1) { allFeatureCache.edgeFeatures.addEdgeFeature(allFeatureCache.add(f), i-1,i,dataLen); } } } public void clear() { if (!allFeatureCache.edgeFeaturesXIndependent) { for (int i = 0; i < edgeFeatureArray.length; i++) { edgeFeatureArray[i].clear(); } } } } EdgeFeatures edgeFeatures; double DEFAULT_VALUE = RobustMath.LOG0; double lambda[]; int dataLen; FeatureStore(boolean edgeFeatureXIndependent){ allFeatureCache = new FeatureGenCache.AllFeatureCache(edgeFeatureXIndependent); } FeatureStore(FeatureGenCache.AllFeatureCache allFeatureCache) { this.allFeatureCache = allFeatureCache; } void init(DataSequence data, BFeatureGenerator fgen, double[] lambda, int numY) { bfgen = fgen; numLabels = numY; dataLen = data.length(); this.lambda = lambda; allocateScratch(data.length()); fgen.startScanFeaturesAt(data); boolean featuresFired = false; while (fgen.hasNext()) { BFeature f = fgen.nextFeature(); if (f.yprev() >= 0) { edgeFeatures.addEdgeFeature(f.start(),f); } else { int type = (f.endOpen()?1:0) + (f.startOpen()?1:0)*2; stateFeatures[type][f.end()-f.start()][f.start()].add(f,lambda); featuresFired=true; } } allFeatureCache.edgeFeatures.doneOneRoundEdges(); assert(featuresFired); } void copy(FeatureStore fstore) { dataLen = fstore.dataLen; stateFeatures = fstore.stateFeatures; edgeFeatures = fstore.edgeFeatures; lambda = fstore.lambda; bfgen = fstore.bfgen; numLabels = fstore.numLabels; } void setLambda(double[] lambda) { this.lambda = lambda; for (int i = 0; i < dataLen; i++) { for (int type = 0; type < 4; type++) { for (int l = bfgen.maxBoundaryGap()-1; l >=0; l--) { stateFeatures[type][l][i].calcMatrix(lambda); } } } } /** * @param n */ private void allocateScratch(int n) { if (stateFeatures == null) { int m = bfgen.maxBoundaryGap(); stateFeatures = new FeatureGenCache.AllFeatureCache.Flist[4][m][0]; } if ((stateFeatures[0][0] != null) && (stateFeatures[0][0].length >= n)) { for (int i = 0; i < n; i++) { for (int type = 0; type < 4; type++) { for (int l = bfgen.maxBoundaryGap()-1; l >=0; l--) { stateFeatures[type][l][i].clear(); } } } edgeFeatures.clear(); return; } int size = 2*n; for (int type = 0; type < 4; type++) { for (int l = bfgen.maxBoundaryGap()-1; l >=0; l--) { stateFeatures[type][l] = new FeatureGenCache.AllFeatureCache.Flist[size]; } } edgeFeatures = new EdgeFeatures(size); for (int i = 0; i < size; i++) { for (int type = 0; type < 4; type++) { for (int l = bfgen.maxBoundaryGap()-1; l >=0; l--) { stateFeatures[type][l][i] = allFeatureCache.newFlist(numLabels); } } } } class Iter { int currentType; int currentLen; int s; Iterator<Feature> viter; FeatureImpl featureImpl = new FeatureImpl(); public void init() { int m = bfgen.maxBoundaryGap(); s = 0; currentLen = m-1; currentType = -1; viter = null; advance(); } void advance() { while (true) { if ((viter != null) && viter.hasNext()) return; currentType++; if (currentType < 4) { viter = stateFeatures[currentType][currentLen][s].iterator(); continue; } else if ((currentType == 4) && (currentLen == 0)) { viter = edgeFeatures.get(s).iterator(); continue; } else { currentType = -1; currentLen--; if (currentLen < 0) { s++; currentLen = bfgen.maxBoundaryGap()-1; if (s >= dataLen) return; } } } } /** * @return */ public boolean hasNext() { return (viter != null) && viter.hasNext(); } /** * @return */ public BFeature next() { if (currentType == 4) { featureImpl.init((Feature)viter.next(),s,s+currentLen); } else { featureImpl.init((Feature)viter.next(),s,s+currentLen,currentType); } advance(); return featureImpl; } } /* * */ public void scanFeaturesSorted(Iter iter) { iter.init(); } /** * @param i * @param mi_YY * edge features are constrainted to have "=s" and >=s" as the start and end boundaries */ public void getLogMi(int i, DoubleMatrix2D mi_YY) { double DEFAULT_VALUE = RobustMath.LOG0; mi_YY.assign(DEFAULT_VALUE); for (Iterator<Feature> iter = edgeFeatures.get(i).iterator(); iter.hasNext(); ) { Feature f = iter.next(); double oldVal = mi_YY.get(f.yprev(), f.y()); if (oldVal == DEFAULT_VALUE) oldVal = 0; mi_YY.set(f.yprev(),f.y(),oldVal+lambda[f.index()]*f.value()); } } static class Condition { static int GE = 0; static int LE = 1; int val; char b; int op; boolean openOnly; void init(int val, char b, int op, boolean openOnly) { this.val = val; this.b = b; this.op = op; this.openOnly = openOnly; } /** * @param feature * @return */ public boolean satisfies(BFeature feature) { int fval = ((b=='S')?feature.start():feature.end()); boolean open = ((b=='S')?feature.startOpen():feature.endOpen()); if (!open && openOnly) return false; if (!open) return (val == fval); return (op==GE)?(fval >= val):(fval <= val); } } private void addFeatures(DoubleMatrix1D mat, int type, int index, Condition predicate) { addFeatures(mat,type,index,predicate,true); } private void addFeatures(DoubleMatrix1D mat, int type, int index, Condition predicate, boolean add) { if ((type == endExact) || (type == endOpen)) { int t = 2 + ((type==endOpen)?1:0); int startLB = Math.max(index-bfgen.maxBoundaryGap()+1,0); int startUB = Math.min(dataLen-1,index); if (cond.op == Condition.GE) { startLB = Math.max(cond.val,startLB); } else { startUB = Math.min(cond.val,startUB); } for (int s = startLB; s <= startUB; s++) { if (index-s >= 0) addFeatures(stateFeatures[t][index-s][s],mat,add); } if (!cond.openOnly) { t = (type==endOpen)?1:0; int startB = cond.val; if ((index-startB < bfgen.maxBoundaryGap()) && (startB >= 0) && (startB < dataLen)) addFeatures(stateFeatures[t][index-startB][startB],mat,add); } } else { int t = 1 + 2*((type==startOpen)?1:0); int endUB = Math.min(index+bfgen.maxBoundaryGap()-1,dataLen-1); int endLB = index; if (cond.op == Condition.GE) { endLB = cond.val; } else { endUB = Math.min(cond.val,endUB); } for (int e = endLB; e <= endUB; e++) { if (e-index >= 0) addFeatures(stateFeatures[t][e-index][index],mat,add); } if (!cond.openOnly) { t = 2*((type==startOpen)?1:0); int endB = cond.val; if ((endB-index < bfgen.maxBoundaryGap()) && (endB-index >= 0) && (index < dataLen)) addFeatures(stateFeatures[t][endB-index][index],mat,add); } } } /** * @param vector * @param mat * @param add */ boolean printFeatures = false; private void addFeatures3(FeatureGenCache.AllFeatureCache.Flist vector, DoubleMatrix1D mat, boolean add) { for (Iterator<Feature> iter = vector.iterator(); iter.hasNext();) { Feature feature = iter.next(); if (printFeatures) System.out.println(feature); int f = feature.index(); double oldVal = mat.get(feature.y()); if (add) { if (oldVal == DEFAULT_VALUE) oldVal = 0; mat.set(feature.y(),oldVal+lambda[f]*feature.value()); } else { mat.set(feature.y(),oldVal-lambda[f]*feature.value()); } } } /** * @param vector * @param mat * @param add */ private void addFeatures(FeatureGenCache.AllFeatureCache.Flist vector, DoubleMatrix1D mat, boolean add) { if (vector.size()==0) { if (printFeatures) {System.out.println("No features");} return; } DoubleMatrix1D precomputedMat = vector.mat; for (int y = (int) (mat.size()-1); y >= 0; y--) { double val = precomputedMat.get(y); if (val == DEFAULT_VALUE) continue; double oldVal = mat.get(y); if (add) { if (oldVal==DEFAULT_VALUE) oldVal = 0; mat.set(y,oldVal+val); } else mat.set(y,oldVal-val); } if (printFeatures) { for (Iterator<Feature> iter = vector.iterator(); iter.hasNext();) { Feature feature = iter.next(); System.out.println(bfgen.featureName(feature.index())); } } } /** * @param f * @param y * @param mat */ private void removeFeatures(DoubleMatrix1D mat, int type, int index, Condition predicate) { addFeatures(mat,type,index,predicate,false); } Condition cond = new Condition(); public void incrementRightB(DoubleMatrix1D ri_Y, MatrixWithRange openRi) { incrementRightB(ri_Y,openRi,false); } /** * @param ri_Y * @param openRi */ public void incrementRightB(DoubleMatrix1D ri_Y, MatrixWithRange openRi, boolean openOnly) { openRi.end++; cond.init(openRi.start,'S', Condition.GE,openOnly); // add these to openRi addFeatures(openRi.mat,endOpen, openRi.end,cond); if (ri_Y != null) { ri_Y.assign(openRi.mat); addFeatures(ri_Y,endExact, openRi.end,cond); //if (!openOnly) checkMatrix(ri_Y,openRi); } } public void checkMatrix(DoubleMatrix1D ri_Y, MatrixWithRange openRi) { if (ri_Y==null) return; if ((openRi.end < 0) || (openRi.start < 0)) return; assert(getExactR(openRi.start,openRi.end,new LogSparseDoubleMatrix1D((int) openRi.mat.size())).equals(ri_Y)); } /** * @param ri_Y * @param openRi * get all features with end boundary LE openRi.end and start boundary = openRi.start */ public void decrementLeftB(DoubleMatrix1D ri_Y, MatrixWithRange openRi) { decrementLeftB(ri_Y,openRi,false); } /** * @param ri_Y * @param openRi * get all features with end boundary LE openRi.end and start boundary = openRi.start */ public void decrementLeftB(DoubleMatrix1D ri_Y, MatrixWithRange openRi, boolean endOpen) { openRi.start--; cond.init(openRi.end,'E', Condition.LE,endOpen); addFeatures(openRi.mat,startOpen, openRi.start,cond); if (ri_Y != null) { ri_Y.assign(openRi.mat); addFeatures(ri_Y,startExact, openRi.start, cond); } // if (!endOpen) checkMatrix(ri_Y,openRi); } /** * @param leftB * @param rightB * @param deltaRi * @param openDeltaRi * * Get all features which are applicable for segments with left boundary <= leftB and right boundary = rightB */ public void deltaR_RShift(int leftB, int rightB, DoubleMatrix1D deltaRi, DoubleMatrix1D openDeltaRi) { // feature should have open start boundary with start() at >= leftB cond.init(leftB,'S',Condition.GE, true); openDeltaRi.assign(0); addFeatures(openDeltaRi,endOpen, rightB,cond); deltaRi.assign(openDeltaRi); addFeatures(deltaRi,endExact,rightB,cond); } /** * @param leftB * @param rightB * @param deltaRi * @param openDeltaRi * * Get all features with left boundary = leftB, right boundary open with a value <= rightB */ public void deltaR_LShift(int leftB, int rightB, DoubleMatrix1D deltaRi, DoubleMatrix1D openDeltaRi) { // TODO -- default value here should be set so as not to undo positions that are already enabled in full R. // current code will only word for the case of no restrict constraint. cond.init(rightB,'E',Condition.LE,true); if (openDeltaRi != null) { openDeltaRi.assign(0); addFeatures(openDeltaRi,startOpen,leftB,cond); deltaRi.assign(openDeltaRi); } else { deltaRi.assign(0); } addFeatures(deltaRi,startExact,leftB,cond); } /** * @return */ public Iter getIterator() { return new Iter(); } /** * @param ri_Y * @param i */ public void removeExactEndFeatures(DoubleMatrix1D ri_Y, int leftB, int rightB) { if (rightB < 0) return; cond.init(leftB,'S',Condition.GE,false); removeFeatures(ri_Y,endExact,rightB,cond); // assert(getExactR(leftB,rightB,new LogSparseDoubleMatrix1D(ri_Y.size()),false).equals(ri_Y)); } /** * @param ri_Y * @param i * @param j */ public void removeExactStartFeatures(DoubleMatrix1D ri_Y, int leftB, int rightB) { cond.init(rightB,'E',Condition.LE,false); removeFeatures(ri_Y,startExact,leftB,cond); } /** * @param s * @param e * @param ri_Y */ public DoubleMatrix1D getExactR(int s, int e, DoubleMatrix1D ri_Y) { return getExactR(s,e,ri_Y,true); } public DoubleMatrix1D getExactR(int s, int e, DoubleMatrix1D ri_Y, boolean endIsExact) { ri_Y.assign(DEFAULT_VALUE); cond.init(s,'S',Condition.GE,false); if (endIsExact) addFeatures(ri_Y,endExact,e,cond); for (int i = e; i >= s; i--) addFeatures(ri_Y,endOpen,i,cond); return ri_Y; } /** * @param ri_Y * @param openR */ public void decrementRightB(DoubleMatrix1D ri_Y, MatrixWithRange openRi) { openRi.end--; cond.init(openRi.start,'S', Condition.GE,false); // add these to openRi removeFeatures(openRi.mat,endOpen,openRi.end+1,cond); if (ri_Y != null) { ri_Y.assign(openRi.mat); addFeatures(ri_Y,endExact,openRi.end,cond); // checkMatrix(ri_Y,openRi); } } }