package edu.fudan.nlp.pipe.seq.templet; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.fudan.ml.types.DynamicInfo; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.alphabet.IFeatureAlphabet; public class DynamicTemplet implements Templet { private static final long serialVersionUID = -965487786100531480L; Pattern parser = Pattern.compile("(?:%(x|y|pos|word|len)\\[(-?\\d+)(?:,(\\d+))?\\])"); String templet; int order; int id; int[][] dims; int[] vars; int[] dynamic_pos, dynamic_word, dynamic_len; public static int MAX = 0; public static int MIN = 0; public DynamicTemplet(int id, String templet) { this.id = id; this.templet = templet; Matcher matcher = parser.matcher(this.templet); /** * 解析y的位置 */ List<String> y = new ArrayList<String>(); List<String> x = new ArrayList<String>(); List<String> pos = new ArrayList<String>(); List<String> word = new ArrayList<String>(); List<String> len = new ArrayList<String>(); while (matcher.find()) { if (matcher.group(1).equalsIgnoreCase("y")) { y.add(matcher.group(2)); } else if (matcher.group(1).equalsIgnoreCase("x")) { x.add(matcher.group(2)); x.add(matcher.group(3)); } else if (matcher.group(1).equalsIgnoreCase("pos")) { pos.add(matcher.group(2)); } else if (matcher.group(1).equalsIgnoreCase("word")) { word.add(matcher.group(2)); } else if (matcher.group(1).equalsIgnoreCase("len")) { len.add(matcher.group(2)); } } if(y.size()==0){//兼容CRF++模板 vars = new int[]{0}; }else{ vars = new int[y.size()]; for (int j = 0; j < y.size(); j++) { vars[j] = Integer.parseInt(y.get(j)); } } order = vars.length - 1; dynamic_pos = getDynamic(pos); dynamic_word = getDynamic(word); dynamic_len = getDynamic(len); dims = new int[x.size() / 2][2]; for (int i = 0; i < x.size(); i += 2) { dims[i / 2][0] = Integer.parseInt(x.get(i)); dims[i / 2][1] = Integer.parseInt(x.get(i + 1)); } } private int[] getDynamic(List<String> d) { int[] dynamic = new int[d.size()]; for (int j = 0; j < d.size(); j++) { dynamic[j] = -(Integer.parseInt(d.get(j))); //dynamic[j] = -(Integer.parseInt(d.get(j)) + 1);//DynamicDoubleViterbi if(dynamic[j] > MAX) MAX = dynamic[j]; if(dynamic[j] < MIN) MIN = dynamic[j]; } return dynamic; } @Override public int getOrder() { return this.order; } public String toString() { return this.templet; } @Override public int generateAt(Instance instance, IFeatureAlphabet features, int pos, int... numLabels) throws Exception { assert (numLabels.length == 1); String[][] data = (String[][]) instance.getSource(); ArrayList<String> preLabel1 = null; ArrayList<String> preLabel2 = null; if(instance.getTempData() instanceof LinkedList) { LinkedList<ArrayList<String>> al = (LinkedList<ArrayList<String>>) instance.getTempData(); preLabel1 = al.get(0); preLabel2 = al.get(1); } ArrayList<DynamicInfo> preLabel = null; if(instance.getTempData() instanceof ArrayList) { preLabel = (ArrayList<DynamicInfo>)instance.getTempData(); } for(int i = 0; i < vars.length; i++) { int j = vars[i]; if (pos+j < 0 || pos+j >= data.length) return -1; } StringBuffer sb = new StringBuffer(); sb.append(id); sb.append(':'); for (int i = 0; i < dims.length; i++) { String rp = ""; int j = dims[i][0]; int k = dims[i][1]; if (pos + j < 0 || pos + j >= data.length) { if (pos + j < 0) rp = "B_" + String.valueOf(-(pos + j) - 1); if (pos + j >= data.length) rp = "E_" + String.valueOf(pos + j - data.length); } else { rp = data[pos + j][k]; } if (-1 != rp.indexOf('$')) rp = rp.replaceAll("\\$", "\\\\\\$"); sb.append(rp); sb.append("//"); } if(preLabel != null) { setDynamicFeature(sb, dynamic_pos, preLabel, 0); setDynamicFeature(sb, dynamic_word, preLabel, 1); setDynamicFeature(sb, dynamic_len, preLabel, 2); } else setDynamicFeature2(sb, dynamic_pos, preLabel1, preLabel2); // System.out.println(sb.toString()); int index = features.lookupIndex(sb.toString(), (int) Math.pow(numLabels[0], order + 1)); return index; } private void setDynamicFeature(StringBuffer sb, int[] dynamic, ArrayList<DynamicInfo> preLabel, int choice) { for (int i = 0; i < dynamic.length; i++) { String rp = ""; int j = dynamic[i]; if(j < 0) { continue; } else if (j >= preLabel.size()) { rp = "B_" + (j - preLabel.size()); } else { if(choice == 0) rp = preLabel.get(j).getPos(); else if(choice == 1) rp = preLabel.get(j).getWord(); else if(choice == 2) rp = preLabel.get(j).getLen() + ""; else System.out.println("DynamicTemplet.setDynamicFeature Error!"); } sb.append(rp); sb.append("//"); } } private void setDynamicFeature2(StringBuffer sb, int[] dynamic, ArrayList<String> preLabel, ArrayList<String> preLabel2) { for (int i = 0; i < dynamic.length; i++) { String rp = ""; int j = dynamic[i] - 1; if(j < 0) { if(preLabel2 == null) continue; else { int k = -j - 1; if(k >= preLabel2.size()) rp = "E_" + (k - preLabel2.size()); else rp = preLabel2.get(k); } } else if (j >= preLabel.size()) { rp = "B_" + (j - preLabel.size()); } else { rp = preLabel.get(j); } sb.append(rp); sb.append("//"); } } @Override public int[] getVars() { // TODO Auto-generated method stub return null; } }