package edu.fudan.nlp.pipe.seq;
import java.util.ArrayList;
import java.util.Arrays;
import edu.fudan.ml.types.Instance;
import edu.fudan.ml.types.alphabet.IFeatureAlphabet;
import edu.fudan.ml.types.alphabet.LabelAlphabet;
import edu.fudan.nlp.pipe.Pipe;
import edu.fudan.nlp.pipe.seq.templet.TempletGroup;
import edu.fudan.util.exception.UnsupportedDataTypeException;
/**
* 将字符序列转换成特征序列
* 因为都是01特征,这里保存的是索引号
*
* @author xpqiu
*
*/
public class Sequence2FeatureSequence extends Pipe{
private static final long serialVersionUID = -6481304918657094682L;
TempletGroup templets;
public IFeatureAlphabet features;
LabelAlphabet labels;
public Sequence2FeatureSequence(TempletGroup templets,
IFeatureAlphabet features, LabelAlphabet labels) {
this.templets = templets;
this.features = features;
this.labels = labels;
}
public void addThruPipe(Instance instance) throws Exception {
Object sdata = instance.getData();
String[][] data;
if(sdata instanceof String[]){
data = new String[1][];
data[0] = (String[]) sdata;
}else if(sdata instanceof String[][]){
data = (String[][]) sdata;
}else if(sdata instanceof ArrayList){
ArrayList ssdata = (ArrayList) sdata;
data = new String[ssdata.size()][];
for(int i=0;i<ssdata.size();i++){
ArrayList<String> idata = (ArrayList<String>) ssdata.get(i);
data[i] = idata.toArray(new String[idata.size()]);
}
}else{
throw new UnsupportedDataTypeException(sdata.getClass().toString());
}
instance.setData(data);
int len = data[0].length;
int[][] newData = new int[len][templets.size()];
for (int i = 0; i < len; i++) {
for (int j = 0; j < templets.size(); j++) {
newData[i][j] = templets.get(j).generateAt(instance,
this.features, i, labels.size());
}
}
instance.setData(newData);
instance.setSource(data);
}
}