package edu.fudan.nlp.pipe.seq.templet; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.alphabet.IFeatureAlphabet; /** * 混合序列模型模板类 * 0: s[i], t[i]; * -1: s[i]t[i], t[i-1]s[i]; * 1: t[i-1]t[i], s[i-1]s[i]; * 2: t[i-1]s[i]t[i], s[i-1]t[i-1]s[i] * @author Feng Ji * */ public class HybridTemplet implements Templet { /* * Order or Type of JointTemplet: 0 : s[i], t[i] -1 : s[i]t[i], t[i-1]s[i] 1 * : t[i-1]t[i], s[i-1]s[i] 2 : t[i-1]s[i]t[i], s[i-1]t[i-1]s[i] */ private static final long serialVersionUID = 3519288368823314632L; private int order = 0; private String prefix; private String pattern; private int[][] vars; private int[][] dims; static Pattern parser = Pattern .compile("(?:%(x|s|t)\\[(-?\\d+)(?:,(\\d+))?\\])"); public HybridTemplet(String prefix, String pattern) { this.prefix = prefix; this.pattern = pattern; Matcher matcher = parser.matcher(pattern); vars = new int[2][]; List<String> l = new ArrayList<String>(); while (matcher.find()) { if (matcher.group(1).equals("s")) { l.add(matcher.group(2)); } } vars[0] = new int[l.size()]; for (int j = 0; j < l.size(); j++) { vars[0][j] = Integer.parseInt(l.get(j)); } matcher.reset(); l.clear(); while (matcher.find()) { if (matcher.group(1).equals("t")) { l.add(matcher.group(2)); } } vars[1] = new int[l.size()]; for (int j = 0; j < l.size(); j++) { vars[1][j] = Integer.parseInt(l.get(j)); } matcher.reset(); order = Math.abs(vars[0].length + vars[1].length - 1); if (vars[0].length == 1 && vars[1].length == 1) order = -order; l.clear(); while (matcher.find()) { if (matcher.group(1).equals("x")) { l.add(matcher.group(2)); l.add(matcher.group(3)); } } dims = new int[l.size() / 2][2]; for (int i = 0; i < l.size(); i += 2) { dims[i / 2][0] = Integer.parseInt(l.get(i)); dims[i / 2][1] = Integer.parseInt(l.get(i + 1)); } l = null; } public int generateAt(Instance instance, IFeatureAlphabet features, int cur, int... labels) throws Exception { assert (labels.length == 2); String[][] data = (String[][]) instance.getData(); for (int i = 0; i < vars.length; i++) { for (int n = 0; n < vars[i].length; n++) { int j = vars[i][n]; if (cur + j < 0 || cur + j >= data.length) return -1; } } StringBuffer sb = new StringBuffer(); sb.append(prefix); sb.append(':'); for (int i = 0; i < dims.length; i++) { String rp = ""; int j = dims[i][0]; int k = dims[i][1]; if (cur + j < 0 || cur + j >= data.length) { if (cur + j < 0) rp = "B_" + String.valueOf(-(cur + j) - 1); if (cur + j >= data.length) rp = "E_" + String.valueOf(cur + j - data.length); } else { rp = data[cur + j][k]; } if (-1 != rp.indexOf('$')) rp = rp.replaceAll("\\$", "\\\\\\$"); sb.append(rp); sb.append("//"); } int indent = (int) Math.pow(labels[0], vars[0].length); for(int i = 1; i < labels.length; i++) { indent *= (int)Math.pow(labels[i], vars[i].length); } int index = features.lookupIndex(sb.toString(), indent); return index; } public int getOrder() { return order; } public String toString() { return prefix+":"+pattern; } @Override public int[] getVars() { // TODO Auto-generated method stub return null; } }