package iitb.Model;
import iitb.CRF.DataSequence;
/**
* These return one feature per state. The value of the feature is the
* fraction of training instances passing through this state that contain
* the word
*
* @author Sunita Sarawagi
* @since 1.2
* @version 1.3
*/
public class WordScoreFeatures extends FeatureTypes {
/**
*
*/
private static final long serialVersionUID = 5855042861074710317L;
int stateId;
int wordPos;
int wordCnt;
int scoreType;
int numScoreType=2;
WordsInTrain dict;
public WordScoreFeatures(FeatureGenImpl m, WordsInTrain d) {
super(m);
dict = d;
}
private void nextStateId() {
stateId = dict.nextStateWithWord(wordPos, stateId);
}
public boolean startScanFeaturesAt(DataSequence data, int prevPos, int pos) {
stateId = -1;
scoreType=0;
wordCnt = dict.count(data.x(pos));
if (wordCnt > WordFeatures.RARE_THRESHOLD) {
Object token = (data.x(pos));
wordPos = dict.getIndex(token);
stateId = -1;
nextStateId();
return true;
}
return false;
}
public boolean hasNext() {
return (stateId < model.numStates()) && (stateId >= 0);
}
public void next(FeatureImpl f) {
switch (scoreType) {
case 0:
//pr(stateId|w)
//if (wordCnt > 1) {
f.val = (float) Math.log(1 + (double)dict.count(wordPos,stateId)/(double)wordCnt);
//f.val = (float) (dict.count(wordPos,stateId)/((double)wordCnt));
break;
//} else {
// scoreType++;
//}
case 1:
//Pr(stateId,smoothed|w)
f.val = (float) ((1+(double)dict.count(wordPos,stateId))/((double)(wordCnt+model.numStates()) ));
break;
case 2:
//Pr(w|statedId)
f.val = (float)Math.log(1+((double)dict.count(wordPos,stateId))/dict.count(stateId));
break;
default:
f.val = (float) (1 + Math.log(1+Math.log(dict.count(wordPos,stateId))));
break;
//f.val = (float) (1 + Math.log(1+Math.log(dict.count(wordPos,stateId))));
}
if (featureCollectMode())
setFeatureIdentifier(stateId*numScoreType+scoreType,stateId,"WS_"+scoreType+"_"+stateId,f);
else
setFeatureIdentifier(stateId*numScoreType+scoreType,stateId,null,f);
f.yend = stateId;
f.ystart = -1;
//for (int s = 0; s < model.numStates(); s = dict.nextStateWithWord(wordPos, s)) {}
// System.out.println(f.toString());
if (scoreType<numScoreType-1) {
scoreType++;
} else {
scoreType=0;
nextStateId();
}
}
};