/*
* Created on Apr 13, 2005
*
*/
package iitb.BSegment;
import iitb.BSegmentCRF.BFeature;
import iitb.BSegmentCRF.BFeatureGenerator;
import iitb.CRF.DataIter;
import iitb.CRF.DataSequence;
import iitb.CRF.Segmentation;
import iitb.Model.ClassPriorFeature;
import iitb.Model.ConcatRegexFeatures;
import iitb.Model.EdgeFeatures;
import iitb.Model.EndFeatures;
import iitb.Model.FeatureGenImpl;
import iitb.Model.FeatureImpl;
import iitb.Model.FeatureTypes;
import iitb.Model.Model;
import iitb.Model.StartFeatures;
import iitb.Model.WindowFeatures;
import iitb.Model.WordFeatures;
import iitb.Model.WordsInTrain;
/**
* @author sunita
* @since 1.2
* @version 1.3
*/
public class BFeatureGenImpl extends FeatureGenImpl implements
BFeatureGenerator {
/**
* Comment for <code>serialVersionUID</code>
*/
private static final long serialVersionUID = 1L;
boolean bfeatureMode=false;
BFeatureImpl bfeature = new BFeatureImpl(), bfeatureToReturn = new BFeatureImpl();
int maxGap = 1;
/**
* @param arg0
* @param arg1
* @throws java.lang.Exception
*/
public BFeatureGenImpl(String arg0, int arg1) throws Exception {
super(arg0, arg1);
}
public BFeatureGenImpl(String arg0, int arg1, java.util.Properties options) throws Exception {
super(arg0, arg1,false);
if (options.getProperty("MaxMemory") != null) {
maxGap = Integer.parseInt(options.getProperty("MaxMemory"));
}
addFeatures();
}
/**
* @param arg0
* @param arg1
* @param arg2
* @throws java.lang.Exception
*/
public BFeatureGenImpl(String arg0, int arg1, boolean arg2)
throws Exception {
super(arg0, arg1, arg2);
}
/**
* @param arg0
* @param arg1
* @param arg2
* @throws java.lang.Exception
*/
public BFeatureGenImpl(Model arg0, int arg1, boolean arg2) throws Exception {
super(arg0, arg1, arg2);
}
public void addFeature(BFeatureTypes fType, boolean retainThis) {
super.addFeature(fType,retainThis);
maxGap = Math.max(maxGap,fType.maxBoundaryGap());
}
public void addFeature(BFeatureTypes fType) {
super.addFeature(fType);
maxGap = Math.max(maxGap,fType.maxBoundaryGap());
}
/* (non-Javadoc)
* @see iitb.BSegmentCRF.BFeatureGenerator#maxBoundaryGap()
*/
public int maxBoundaryGap() {
return maxGap;
}
protected void addFeatures() {
addFeature(new BFeatureEachPosition(
new BFeatureEachPosition.TypePosEndOpen(new StartFeatures(this))));
addFeature(new BFeatureEachPosition(
new BFeatureEachPosition.TypePosStartOpen(new EndFeatures(this))));
addFeature(new BFeatureEachPosition(new BFeatureEachPosition.TypePosEndOpen(new ClassPriorFeature(this))),true);
addFeature(new BFeatureEachPosition(
new BFeatureEachPosition.TypePosEndOpen(new EdgeFeatures(this))),true);
WindowFeatures.Window windows[] = new WindowFeatures.Window[] {
new WindowFeatures.Window(0,true,0,true,"start",2,Integer.MAX_VALUE),
new WindowFeatures.Window(0,false,0,false,"end",2,Integer.MAX_VALUE),
new WindowFeatures.Window(1,true,-1,false,"continue",3,Integer.MAX_VALUE),
new WindowFeatures.Window(0,true,0,false,"Unique",1,1),
new WindowFeatures.Window(-1,true,-1,true,"left-1"),
new WindowFeatures.Window(1,false,1,false,"right+1"),
};
dict = new WordsInTrain();
addFeature(new BFeatureEachPosition(new BWindowFeatureMulti(windows, new WordFeatures(this, dict))));
FeatureTypes features = new ConcatRegexFeatures(this,0,0);
addFeature(new BFeatureEachPosition(new BFeatureTypesEachLabel(this,
new BWindowFeatureMulti(windows, features))));
addFeature(new BFeatureEachPosition(new BFeatureTypesEachLabel(this,
new BSegmentLength(this,maxGap))));
}
/* (non-Javadoc)
* @see iitb.BSegmentCRF.BFeatureGenerator#startScanFeaturesAt(iitb.BSegmentCRF.BDataSequence)
*/
public void startScanFeaturesAt(DataSequence d) {
bfeatureMode = true;
for (int i = numFeatureTypes()-1; i >= 0; i--) {
((BFeatureTypes)getFeature(i)).startScanFeaturesAt(d);
}
super.initScanFeaturesAt(d);
}
protected void copyNextFeature(FeatureImpl featureToReturn) {
if (!bfeatureMode) {
super.copyNextFeature(featureToReturn);
return;
}
((BFeatureTypes)currentFeatureType).next(bfeatureToReturn);
featureToReturn.copy(bfeatureToReturn);
}
@Override
public boolean featureValid(DataSequence data, int cposStart, int cposEnd, FeatureImpl featureToReturn, Model model, boolean cacheEdgeFeatures) {
return true;
}
/* (non-Javadoc)
* @see iitb.BSegmentCRF.BFeatureGenerator#nextFeature()
*/
public BFeature nextFeature() {
bfeature.copy(bfeatureToReturn);
bfeature.copy(featureToReturn);
assert(featureToReturn.identifier().equals(bfeatureToReturn.identifier()));
advance();
return bfeature;
}
public int addTrainRecord(DataSequence seq) {
int numF = 0;
for (startScanFeaturesAt((DataSequence)seq); hasNext(); ) {
BFeature feature = nextFeature();
numF++;
}
return numF;
}
public boolean train(DataIter trainData, boolean cachedLabels) throws Exception {
boolean retVal = super.train(trainData,cachedLabels,false);
for (trainData.startScan(); trainData.hasNext();) {
DataSequence seq = trainData.next();
for (startScanFeaturesAt(seq); hasNext(); nextFeature());
}
freezeFeatures();
return retVal;
}
protected boolean retainFeature(DataSequence seq, FeatureImpl f) {
Segmentation data = (Segmentation)seq;
BFeature feature = (BFeature)bfeatureToReturn;
if (data.getSegmentId(feature.start()) != data.getSegmentId(feature.end()))
return false;
int segNum = data.getSegmentId(feature.start());
if (data.segmentLabel(segNum) != feature.y())
return false;
if (!feature.startOpen() && (data.segmentStart(segNum) != feature.start()))
return false;
if (!feature.endOpen() && (data.segmentEnd(segNum) != feature.end()))
return false;
if ((segNum==0) && (feature.yprev() >= 0))
return false;
if ((segNum > 0) && (feature.yprev() >= 0) && (data.segmentLabel(segNum-1) != feature.yprev()))
return false;
return true;
}
public void startScanFeaturesAt(DataSequence d, int prev, int p) {
bfeatureMode=false;
super.startScanFeaturesAt(d, prev, p);
}
}