package edu.stanford.nlp.international.arabic.process;
import java.util.Collection;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.PaddedList;
/**
* Feature factory for the IOB clitic segmentation model described by
* Green and DeNero (2012).
*
* @author Spence Green
*
* @param <IN>
*/
public class BigWindowArabicSegmenterFeatureFactory<IN extends CoreLabel> extends ArabicSegmenterFeatureFactory<IN> {
private static final long serialVersionUID = 6864940988019110930L;
public void init(SeqClassifierFlags flags) {
super.init(flags);
}
protected Collection<String> featuresC(PaddedList<IN> cInfo, int loc) {
Collection<String> features = super.featuresC(cInfo, loc);
CoreLabel n3 = cInfo.get(loc + 3);
CoreLabel p3 = cInfo.get(loc - 3);
String charn3 = n3.get(CoreAnnotations.CharAnnotation.class);
String charp3 = p3.get(CoreAnnotations.CharAnnotation.class);
// a 7 character window instead of a 5 character window
features.add(charn3 + "-n3");
features.add(charp3 + "-p3");
return features;
}
}