package edu.harvard.wcfia.yoshikoder.dictionary; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * Generates a regular expression corresponding to simple pattern language. * Patterns generate exact matches, unless an asterisk is present, when * any number of non-whitespace characters are permitted (regexp: \S*). * The asterisk cannot be escaped. * * @author will */ public class SubstringPatternEngine implements PatternEngine { protected int reFlags; public SubstringPatternEngine(){ reFlags = Pattern.CASE_INSENSITIVE; } public String getType(){ return PatternEngine.SUBSTRING; } public int getReFlags(){ return reFlags; } public void setReFlags(int ref){ reFlags = ref; } /** * Converts a candidate string into regular expression that looks for * case-insensitive word-internal exact matches, or substring matches * if * is applied in the pattern. */ public Pattern makeRegexp(String pstring) throws PatternSyntaxException{ String escaped = escape(pstring); return Pattern.compile(escaped, reFlags); } private String escape(String pstring){ StringBuffer sb = new StringBuffer(); char[] pchar = pstring.toCharArray(); for (int ii=0; ii<pchar.length; ii++){ if (pchar[ii] == '*'){ if (ii == 0) sb.append("\\S*\\Q"); else if (ii == pchar.length-1) sb.append("\\E\\S*"); else sb.append("\\E\\S*\\Q"); // end quot \S* start quot } else { if (ii == 0) sb.append("\\b\\Q"); // add prefix if at start sb.append(pchar[ii]); // add letter if (ii == pchar.length-1) // add suffix if at end sb.append("\\E\\b"); } } return sb.toString(); } public static void main(String[] args) { SubstringPatternEngine engine = new SubstringPatternEngine(); Pattern p = engine.makeRegexp("*f*k*"); System.out.println(p.pattern()); Matcher m = p.matcher("fgk"); System.out.println(m.matches()); } }