/* * Created on Nov 9, 2008 * @author sunita * * Feature that can be used to decide if an edge should be added between two words. */ package iitb.Model; import java.util.Random; import iitb.CRF.DataSequence; import iitb.CRF.KeyedDataSequence; public class RandomEdgeSelector extends EdgeSelector { Random rand; public RandomEdgeSelector(FeatureGenImpl fgen, int width, String patternFile, int histSize) { super(fgen,width,patternFile,histSize,0); windowSize=width; this.histSize = histSize; assert(histSize >= 1); rand = new Random(); } public RandomEdgeSelector(FeatureGenImpl fgen,String patternFile) { this(fgen,0,patternFile,1); } public RandomEdgeSelector(FeatureGenImpl fgen,String patternFile, int histSize) { this(fgen,0,patternFile, histSize); } public RandomEdgeSelector(FeatureGenImpl fgen) { this(fgen,0,null,1); } @Override public boolean hasNext() { return (index < patternOccurence.length) && super.hasNext(); } @Override public void next(FeatureImpl f) { f.val = 1; f.strId.id = index*histSize+(currentHistSize-1); f.id = f.strId.id; f.ystart = -1; if(featureCollectMode()){ f.strId.name = featureName(f.id); } advance(); } @Override protected boolean advance() { index += rand.nextInt(2); return hasNext(); } @Override public boolean startScanFeaturesAt(DataSequence data, int prevPos, int pos) { currentHistSize = pos-prevPos; assert(currentHistSize >=1); assert(currentHistSize <= histSize); segLen = Math.min(pos+windowSize,data.length()-1)-Math.max(pos-windowSize-histSize,0)+1; index = 0; rand.setSeed(((KeyedDataSequence)data).getKey()*data.length()+pos); return advance(); } @Override public int labelIndependentId(FeatureImpl f) { return f.id; } @Override public int maxFeatureId() { return patternString.length*histSize; } @Override public String name() { return "RandomEdgeSel"; } public String featureName(int index) { return name()+"_"+patternString[index/histSize][0]+((histSize > 1)?("_H"+histSize):""); } }