package edu.fudan.nlp.pipe.seq.templet;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import edu.fudan.ml.types.Instance;
import edu.fudan.ml.types.alphabet.IFeatureAlphabet;
import edu.fudan.ontology.Dictionary;
/**
* 通过字典生成特征
* @author xpqiu
*
*/
public class DictionaryTemplet implements Templet, Serializable {
private static final long serialVersionUID = -4516243129442692024L;
private Dictionary d;
private int[] args;
private int id;
private String text;
public DictionaryTemplet(Dictionary d, int id, int ... args) {
this.d = d;
this.id = id;
this.args = args;
Arrays.sort(args);
StringBuffer sb = new StringBuffer();
sb.append(id);
sb.append(":dict");
for(int i=0; i<args.length; i++) {
sb.append(':');
sb.append(args[i]);
}
sb.append(':');
this.text = new String(sb);
}
@Override
public int generateAt(Instance instance, IFeatureAlphabet features, int pos, int ... numLabels) {
assert(numLabels.length == 1);
String[][] data = ( String[][]) instance.getData();
int len = data[0].length;
StringBuffer sb = new StringBuffer(text);
for(int i=0; i<args.length; i++) {
int idx = pos+args[i];
if(idx>=0&&idx<len)
sb.append((data[0][idx]));
}
int index = -1;
if(d.contains(sb.toString())){
sb.append(d.name);
index = features.lookupIndex(sb.toString(), numLabels[0]);
}
return index;
}
public int getOrder() { return 0; }
public int[] getVars() { return new int[]{0}; }
public int offset(int... curs) {
return 0;
}
}