/**
* Created on Nov 9, 2008
* @author Sunita Sarawagi
* @since 1.2
* @version 1.3
*
* Feature that can be used to decide if an edge should be added between two words.
*/
package iitb.Model;
import iitb.CRF.DataSequence;
public class EdgeSelector extends RegexCountFeatures {
/**
*
*/
private static final long serialVersionUID = 3669485110606844964L;
int windowSize=0;
int segLen;
int histSize;
int currentHistSize;
int minHist;
public EdgeSelector(FeatureGenImpl fgen, int width, String patternFile, int histSize, int minHist) {
super(fgen,2*width+2,patternFile);
windowSize=width;
this.histSize = histSize;
assert(histSize >= 1);
}
public EdgeSelector(FeatureGenImpl fgen,String patternFile) {
this(fgen,0,patternFile,1,0);
}
public EdgeSelector(FeatureGenImpl fgen,String patternFile, int histSize) {
this(fgen,0,patternFile, histSize,0);
}
public EdgeSelector(FeatureGenImpl fgen) {
this(fgen,0,null,1,0);
}
@Override
public boolean hasNext() {
return (segLen > 1) && super.hasNext();
}
@Override
public void next(FeatureImpl f) {
f.val = (float)patternOccurence[index]/segLen;
assert(f.val>0);
f.strId.id = index*histSize+(currentHistSize-1);
f.id = f.strId.id;
f.ystart = -1;
if(featureCollectMode()){
f.strId.name = featureName(f.id);
//System.out.println((String)f.strId.name +" " +index + " " + f.strId.id);
}
advance();
}
@Override
public boolean startScanFeaturesAt(DataSequence data, int prevPos, int pos) {
currentHistSize = pos-prevPos;
assert(currentHistSize >=1);
assert(currentHistSize <= histSize);
segLen = Math.min(pos+windowSize,data.length()-1)-Math.max(pos-windowSize-histSize,0)+1;
return super.startScanFeaturesAt(data, Math.max(pos-windowSize-histSize,0)-1, Math.min(pos+windowSize,data.length()-1));
}
@Override
public int labelIndependentId(FeatureImpl f) {
return f.id;
}
@Override
public int maxFeatureId() {
return patternString.length*histSize;
}
@Override
public String name() {
return "EdgeSel";
}
public String featureName(int index) {
return name()+"_"+patternString[index/histSize][0]+((histSize > 1)?("_H"+(index % histSize)):"");
}
public int historySize() {
return histSize;
}
}