package iitb.Model; import iitb.CRF.DataSequence; import iitb.CRF.FeatureGeneratorNested; import iitb.CRF.SegmentDataSequence; /** * * @author Sunita Sarawagi * @since 1.2 * @version 1.3 */ public class NestedFeatureGenImpl extends FeatureGenImpl implements FeatureGeneratorNested { /** * */ private static final long serialVersionUID = 6957722060399675011L; /* protected boolean holdsInData(DataSequence seq, FeatureImpl f) { return (cposEnd == ((SegmentDataSequence)seq).getSegmentEnd(cposStart)) && ((cposStart == 0) || (cposStart-1 == ((SegmentDataSequence)seq).getSegmentEnd(cposStart-1))) && super.holdsInData(seq, f); } */ int maxMem[]; int maxMemOverall=1; public int addTrainRecord(DataSequence data) { int numF = 0; if (addOnlyTrainFeatures) { SegmentDataSequence seq = (SegmentDataSequence)data; int segEnd; for (int l = 0; l < seq.length(); l = segEnd+1) { segEnd = seq.getSegmentEnd(l); for (startScanFeaturesAt(seq,l-1,segEnd); hasNext(); next(),numF++); } } else { for (int l = 0; l < data.length(); l++) { for (int m = 1; (m <= maxMemOverall) && (l-m >= -1); m++) { for (startScanFeaturesAt(data,l-m,l); hasNext(); ) { next(); numF++; } } } } return numF; } /** * @param featureType * @param seq */ protected void trainFeatureType(FeatureTypes featureType, DataSequence data) { SegmentDataSequence seq = (SegmentDataSequence)data; int segEnd; for (int l = 0; l < seq.length(); l = segEnd+1) { segEnd = seq.getSegmentEnd(l); featureType.train(seq,l,segEnd); } } public NestedFeatureGenImpl(int numLabels,java.util.Properties options, boolean addFeatureNow) throws Exception { super("naive",numLabels,false); if (options.getProperty("MaxMemory") != null) { maxMemOverall = Integer.parseInt(options.getProperty("MaxMemory")); } if (addFeatureNow) { addFeature(new EdgeFeatures(this)); addFeature(new StartFeatures(this)); addFeature(new EndFeatures(this)); dict = new WordsInTrain(); addFeature(new FeatureTypesMulti(new UnknownFeature(this,dict))); addFeature(new FeatureTypesMulti(new WordFeatures(this, dict))); addFeature(new FeatureTypesEachLabel(this, new FeatureTypesSegmentLength(this))); WindowFeatures.Window windows[] = new WindowFeatures.Window[] { new WindowFeatures.Window(0,true,0,true,"start"), new WindowFeatures.Window(0,false,0,false,"end"), new WindowFeatures.Window(1,true,-1,false,"continue"), new WindowFeatures.Window(-1,true,-1,true,"left-1"), new WindowFeatures.Window(1,false,1,false,"right+1"), }; /* addFeature(new FeatureTypesEachLabel(model, new WindowFeatures(windows, new FeatureTypesConcat(model, new ConcatRegexFeatures(model,0,0), maxMemOverall)))); */ addFeature(new FeatureTypesEachLabel(this, new WindowFeatures(windows, new FeatureTypesMulti( new ConcatRegexFeatures(this,0,0))))); } } public NestedFeatureGenImpl(int numLabels,java.util.Properties options) throws Exception { this(numLabels,options,true); } /** * @param modelSpecs * @param numLabels * @param addFeatureNow */ public NestedFeatureGenImpl(String modelSpecs, int numLabels, boolean addFeatureNow) throws Exception { super(modelSpecs,numLabels,addFeatureNow); } public void startScanFeaturesAt(DataSequence data, int pos) { startScanFeaturesAt(data,pos-1,pos); } public int maxMemory() { return maxMemOverall; } public void setMaxMemory(int i) { maxMemOverall = i; } // we assume each label is associated with a maximum length for which it // is willing to output a grouped probability. That is, different y-s // have different value of maxMem. public void startScanFeaturesAt(DataSequence d, int prevPos, int pos) { data = d; cposEnd = pos; cposStart = prevPos+1; for (int i = 0; i < features.size(); i++) { getFeature(i).startScanFeaturesAt(data,prevPos,cposEnd); } currentFeatureType = null; featureIter = features.iterator(); advance(); // if no word features activated, do not send the edge and // start/end features. /* if ((currentFeatureType != getFeature(0)) && (cpos-prevPos > 1)) { featureToReturn.id = -1; } */ } // TODO do not send any features where the maxMem property of the y // is violated. };