package edu.stanford.nlp.parser.shiftreduce;
import java.util.List;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.tagger.maxent.Distsim;
/**
* Featurizes words based only on their distributional similarity classes.
* Borrows the Distsim class from the tagger.
*
* @author John Bauer
*/
public class DistsimFeatureFactory extends FeatureFactory {
private final Distsim distsim;
DistsimFeatureFactory() {
throw new UnsupportedOperationException("Illegal construction of DistsimFeatureFactory. It must be created with a path to a cluster file");
}
DistsimFeatureFactory(String path) {
distsim = Distsim.initLexicon(path);
}
public void addDistsimFeatures(List<String> features, CoreLabel label, String featureName) {
if (label == null) {
return;
}
String word = getFeatureFromCoreLabel(label, FeatureComponent.HEADWORD);
String tag = getFeatureFromCoreLabel(label, FeatureComponent.HEADTAG);
String cluster = distsim.getMapping(word);
features.add(featureName + "dis-" + cluster);
features.add(featureName + "disT-" + cluster + "-" + tag);
}
@Override
public List<String> featurize(State state, List<String> features) {
CoreLabel s0Label = getStackLabel(state.stack, 0); // current top of stack
CoreLabel s1Label = getStackLabel(state.stack, 1); // one previous
CoreLabel q0Label = getQueueLabel(state.sentence, state.tokenPosition, 0); // current location in queue
addDistsimFeatures(features, s0Label, "S0");
addDistsimFeatures(features, s1Label, "S1");
addDistsimFeatures(features, q0Label, "Q0");
return features;
}
private static final long serialVersionUID = -396152777907151063L;
}