/* * 文件名:QuestionTemplate.java * 版权:Copyright 2008-20012 复旦大学 All Rights Reserved. * 描述:程序总入口 * 修改人:xpqiu * 修改时间:Nov 30, 2008 * 修改内容:新增 * * 修改人:〈修改人〉 * 修改时间:YYYY-MM-DD * 跟踪单号:〈跟踪单号〉 * 修改单号:〈修改单号〉 * 修改内容:〈修改内容〉 */ package edu.fudan.nlp.pipe.templet; import java.io.Serializable; import java.util.ArrayList; import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 记录模板 * 模板格式:#dd# * @author xpqiu * @version 1.0 */ public class RETemplate implements Serializable{ private static final long serialVersionUID = 7528628307437160316L; private static final double BASE = 2; ArrayList<String> templates; ArrayList<Integer> lens; ArrayList<Float> weights; ArrayList<Pattern> patterns; int minlen = 2; /** * 保存捕获分组结果 */ ArrayList<ArrayList<String>> matchGroup; String comment = ""; public RETemplate(){ templates = new ArrayList<String>(); patterns = new ArrayList<Pattern>(); lens = new ArrayList<Integer>(); weights = new ArrayList<Float>(); matchGroup = new ArrayList<ArrayList<String>>(); } /** * @param qm * @throws Exception */ public void addTemplates(ArrayList<String> templatesList) throws Exception { Iterator<String> it = templatesList.iterator(); while(it.hasNext()){ String temp = it.next(); addTemplate(temp,1); } } /** * 将问题模板转换成Pattern * @param strTemplate * @param weight * @throws Exception */ public void addTemplate(String strTemplate, int weight) throws Exception { strTemplate = strTemplate.replaceAll("[\\?\\*\\[\\]\\{\\}\\,\\|]", ""); // if(strTemplate.contains(".")) // System.out.println(strTemplate); strTemplate = strTemplate.replaceAll("\\.", "\\\\."); Pattern p = Pattern.compile("(#[^#]+#)+"); Matcher m = p.matcher(strTemplate); if(m.matches()) return; // strTemplate = "^"+strTemplate; // strTemplate += "$"; String str = new String(strTemplate); ArrayList<String> matchs = new ArrayList<String>(); int len = 0; int patternNum = 0; while(m.find()) { patternNum++; String target = m.group(); len+=target.length(); matchs.add(target); str = str.replace(target, "(.{"+minlen+",})"); } len = str.length()-len; if(len<2) return; templates.add(strTemplate); lens.add(len); weights.add((float) (weight*Math.pow(BASE, Math.max(0,2-patternNum)))); matchGroup.add(matchs); //System.out.println(str); try { patterns.add(Pattern.compile(str)); } catch (Exception e) { System.out.println(str); e.printStackTrace(); throw e; } } public static void main(String[] args) throws Exception{ RETemplate qt = new RETemplate(); qt.addTemplate("最近的#poi#",1); float a = qt.matches("最近的加油站"); System.out.println(a); } /** * @param str * @return */ public float matches(String str) { Iterator<Pattern> it = patterns.iterator(); float w = 0; while(it.hasNext()){ Pattern p = it.next(); Matcher m = p.matcher(str); if(m.find()) {//匹配 int idx = patterns.indexOf(p); // System.out.print("模板:"+ templates.get(idx)); // System.out.print("\t"); ArrayList<String> matchs = matchGroup.get(idx); String[] matchedName = new String[matchs.size()]; int len=0; for(int i=1;i<=matchs.size();i++){ matchedName[i-1] = m.group(i); len +=matchedName[i-1].length(); // System.out.print("匹配:"+matchedName[i-1]); // System.out.print("\t"); } // System.out.print("类别:"+comment); // System.out.print("\t"); int mlen = str.length(); float ww = Math.min(mlen, lens.get(idx)); ww /= Math.max(mlen, lens.get(idx)); ww *= weights.get(idx); w +=ww; // System.out.println("权重:"+w); } } return w; } public String toString(){ return comment; } }