package edu.fudan.nlp.cn.tag.format; import java.util.List; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.InstanceSet; import edu.fudan.nlp.cn.Chars; /** * 将序列标注转换成字符串标签 * * @author xpqiu * */ public class Seq2StrWithTag { public static String format(InstanceSet testSet, String[][] labelsSet) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < testSet.size(); i++) { Instance inst = testSet.getInstance(i); String[] labels = labelsSet[i]; sb.append(format(inst, labels)); } return sb.toString(); } public static String format(Instance inst, String[] labels) { String[][] data = (String[][]) inst.getSource(); StringBuilder sb = new StringBuilder(); for (int j = 0; j < data.length; j++) { String label = labels[j]; String w = data[j][0]; // 处理连接在一起的英文字符串 if(Chars.getType(w).equals("E")){ if(label.contains("B-")) w = "[" + w + " "; else if(label.contains("M-")) w = w + " "; else if(label.contains("E-")) w = w + "]"; } sb.append(w); int tagidx = label.indexOf("-"); if (tagidx != -1) { String tag = label.substring(tagidx + 1); label = label.substring(0, tagidx); if (label.equals("E") || label.equals("S")) { sb.append("/" + tag + " "); } }else { if (label.equals("E") || label.equals("S")) sb.append(" "); } } return sb.toString(); } }