package edu.fudan.nlp.pipe.templet;
import edu.fudan.ml.types.Instance;
import edu.fudan.ml.types.alphabet.IFeatureAlphabet;
import edu.fudan.ml.types.alphabet.LabelAlphabet;
import edu.fudan.ml.types.sv.BinarySparseVector;
import edu.fudan.nlp.pipe.Pipe;
/**
* 将字符序列转换成特征序列 因为都是01特征,这里保存的是索引号
*
* @author xpqiu
*
*/
public class Sequence2SVWithTemplate extends Pipe{
private static final long serialVersionUID = -4782249062779216625L;
TempletGroup templets;
public IFeatureAlphabet features;
LabelAlphabet labels;
public Sequence2SVWithTemplate(TempletGroup templets,
IFeatureAlphabet features, LabelAlphabet labels) {
this.templets = templets;
this.features = features;
this.labels = labels;
}
public void addThruPipe(Instance instance) throws Exception {
String[][] data = (String[][]) instance.getData();
BinarySparseVector sv = new BinarySparseVector();
for (int j = 0; j < templets.size(); j++) {
int[] idx = templets.get(j).generateAt(instance,
this.features, 1);
sv.put(idx);
}
instance.setData(sv);
}
}