package edu.fudan.nlp.cn.tag.format;
import java.util.ArrayList;
import java.util.List;
import edu.fudan.ml.types.Instance;
import edu.fudan.ml.types.InstanceSet;
/**
* 将序列标注转换成数组
*
* @author xpqiu
*
*/
public class Seq2ArrayWithTag {
public static String format(InstanceSet testSet, String[][] labelsSet) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < testSet.size(); i++) {
Instance inst = testSet.getInstance(i);
String[] labels = labelsSet[i];
sb.append(format(inst, labels));
}
return sb.toString();
}
public static List[] format(Instance inst, String[] labels) {
String[][] data = (String[][]) inst.getSource();
List<String> w = new ArrayList<String>();
List<String> p = new ArrayList<String>();
StringBuilder sb = new StringBuilder();
for (int j = 0; j < data.length; j++) {
String label = labels[j];
int tagidx = label.indexOf("-");
String tag = label.substring(tagidx + 1);
label = label.substring(0, tagidx);
String c = data[j][0];
sb.append(c);
if (label.equals("E") || label.equals("S")) {
w.add(sb.toString());
p.add(tag);
sb = new StringBuilder();
tag = "";
}
}
return new List[] { w, p };
}
}