/** WordFeatures.java * * @author Sunita Sarawagi * @since 1.1 * @version 1.3 */ package iitb.Model; import iitb.CRF.DataSequence; public class WordFeatures extends FeatureTypes { /** * */ private static final long serialVersionUID = -202366673127245027L; protected int stateId; int statePos; Object token; int tokenId; protected WordsInTrain dict; int _numWordStatePairs; public static int RARE_THRESHOLD=0; protected int frequency_cutOff; boolean assignStateIds=true; int numStates; public WordFeatures(FeatureGenImpl m, WordsInTrain d) { super(m); dict = d; frequency_cutOff=RARE_THRESHOLD; numStates = m.numStates(); } public WordFeatures(FeatureGenImpl m, WordsInTrain d, int freqCuttOff) { this(m,d); if (freqCuttOff >= 0) frequency_cutOff=freqCuttOff; } public WordFeatures(FeatureGenImpl m, WordsInTrain d, int freqCuttOff, boolean assignStateIds) { this(m,d); if (freqCuttOff >= 0) frequency_cutOff=freqCuttOff; this.assignStateIds=assignStateIds; if (assignStateIds==false) numStates=1; } private void nextStateId() { stateId = dict.nextStateWithWord(token, stateId); statePos++; } public boolean startScanFeaturesAt(DataSequence data, int prevPos, int pos) { stateId = -1; if (dict.count(data.x(pos)) > frequency_cutOff) { tokenId = dict.getIndex(data.x(pos)); token = data.x(pos); if (assignStateIds) { statePos = -1; nextStateId(); } else { stateId=0; } return true; } return false; } public boolean hasNext() { return (stateId != -1); } public void next(FeatureImpl f) { if (assignStateIds) { if (featureCollectMode()) setFeatureIdentifier(tokenId*numStates+stateId,stateId,name()+dict.getKey(token),f); else setFeatureIdentifier(tokenId*numStates+stateId,stateId,null,f); f.yend = stateId; nextStateId(); } else { f.yend = 0; if (featureCollectMode()) f.strId.name = name()+dict.getKey(token); f.strId.id = tokenId; stateId=-1; } f.ystart = -1; f.val = 1; } /* (non-Javadoc) * @see iitb.Model.FeatureTypes#maxFeatureId() */ public int maxFeatureId() { return dict.dictionaryLength()*numStates; } public String name() { return "W_"; } };