/******************************************************************************* * Copyright (c) 2005-2011, G. Weirich and Elexis * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * G. Weirich - initial implementation * *******************************************************************************/ package ch.rgw.tools; import java.util.logging.Level; import java.util.logging.Logger; /** * <p> * Title: Toolbox * </p> * <p> * Description: * </p> * <p> * Copyright: Copyright (c) 2002 * </p> * <p> * Company: rgw * </p> * * @author G. Weirich * @version 1.0 */ public class FuzzyMatcher { public static String Version(){ return "1.1.1"; } static final public int LITERAL = 0; static final public int LEVENSHTEIN = 1; static final public int SHIFT_AND = 2; static final public int REGEX = 3; static final public int SYNPHON = 4; static final public int EXACT = 0; static final public int SHARP = 1; static final public int MEDIUM = 2; static final public int BLURRED = 3; String pattern; int type; int level; boolean emptyMatches; private static Logger log; static { log = log.getLogger("Matcher"); } private FuzzyMatcher(){ emptyMatches = false; } public static FuzzyMatcher createWLDMatcher(String pattern, int level){ FuzzyMatcher ret = new FuzzyMatcher(); ret.type = LEVENSHTEIN; ret.pattern = pattern; ret.level = level; if (level > 2) ret.emptyMatches = true; log.log(Level.FINE, "create WLDMatcher level " + ret.level); return ret; } public static FuzzyMatcher createSynphonMatcher(String pattern, int level){ FuzzyMatcher ret = new FuzzyMatcher(); ret.type = SYNPHON; if (level > 2) ret.emptyMatches = true; switch (level) { case 1: ret.level = 5; break; case 2: ret.level = 3; break; default: ret.level = 2; } ret.pattern = SYPH_compile(pattern.trim(), ret.level); log.log(Level.FINE, "create SYPHMatcher level " + ret.level); log.log(Level.FINE, "pattern: " + ret.pattern); return ret; } public static FuzzyMatcher createLiteralMatcher(String pattern, int level){ FuzzyMatcher ret = new FuzzyMatcher(); ret.type = LITERAL; ret.level = level; ret.pattern = pattern; return ret; } public String getPattern(){ return pattern; } public boolean match(String w1){ if (StringTool.isNothing(w1)) { return emptyMatches; } String wort = w1.trim(); if (level == EXACT) { return wort.equals(pattern); } String[] pat = wort.split("[\\s,\\.]"); if ((pat.length < 1) || (StringTool.isNothing(pat[0])) || pat[0].equals(" ")) { return emptyMatches; } switch (type) { case LEVENSHTEIN: return (WLD(pat[0], pattern, '*', level) <= level); case SYNPHON: return SynPhon(pat[0], pattern, level); case LITERAL: return pat[0].equalsIgnoreCase(pattern); default: return false; } } /* Levenshtein */ private static String formatierung(String wort, char modus){ String res = wort.toUpperCase(); res = res.replaceAll("ä", "ae"); res = res.replaceAll("Ä", "Ae"); res = res.replaceAll("ö", "oe"); res = res.replaceAll("Ö", "Oe"); res = res.replaceAll("ü", "ue"); res = res.replaceAll("Ü", "Ue"); if (modus == '*') { res = res.replaceAll("\\**", "\\*"); } return res; } /** * weighted levenshtein distance Gibt "Distanz" zwischen Wort und Muster */ public static int WLD(String wort, String muster, char modus, int limit){ final int maxlen = 100; int spmin, p, q, r, d1, d2, i, k, x1, x2, x3; char c; String ww, mm; int[] d = new int[maxlen]; if (modus == '+' || modus == '*') { // lw = formatierung (ww, wort, maxlen,modus); // lm = formatierung (mm,muster,maxlen,modus); ww = formatierung(wort, modus); mm = formatierung(muster, modus); if ((modus == '*') && (ww.length() < mm.length() - 1) && (ww.indexOf('*') != -1)) { /**** Wort und Muster tauschen ****/ i = ww.length(); wort = mm; muster = ww + "*"; /**** Limit neu setzen ****/ i = (i / 3); if (i < limit) { limit = i; } } else { wort = ww; muster = mm; } } // modus='*' /**** Anfangswerte berechnen ****/ if (muster.charAt(0) == '*') { for (k = 0; k <= wort.length(); k++) { d[k] = 0; } } else { d[0] = (muster.equals("")) ? 0 : 1; i = (muster.charAt(0) == '?') ? 0 : 1; for (k = 1; k <= wort.length(); k++) { if (muster.charAt(0) == wort.charAt(k - 1)) { i = 0; } d[k] = k - 1 + i; } } spmin = (d[0] == 0 || wort.length() == 0) ? d[0] : d[1]; if (spmin > limit) { return (maxlen); } /**** Distanzmatrix durchrechnen ****/ for (i = 2; i <= muster.length(); i++) { c = muster.charAt(i - 1); p = (c == '*' || c == '?') ? 0 : 1; q = (c == '*') ? 0 : 1; r = (c == '*') ? 0 : 1; d2 = d[0]; d[0] = d2 + q; spmin = d[0]; for (k = 1; k <= wort.length(); k++) { /**** d[k] = Minimum dreier Zahlen ****/ d1 = d2; d2 = d[k]; x1 = d1 + ((c == wort.charAt(k - 1)) ? 0 : p); x2 = d2 + q; x3 = d[k - 1] + r; if (x1 < x2) { x2 = x1; } d[k] = (x2 < x3) ? x2 : x3; if (d[k] < spmin) { spmin = d[k]; } } if (spmin > limit) { return (maxlen); } } return ((d[wort.length()] <= limit) ? d[wort.length()] : maxlen); } // SynPhon-Algorithmus: �hnlicher Klang; f�r deutsche SPrache optimiert private static final String Vokale = "[aeiou���yh]"; private static final String[] grps = { "1,ei,ey,ay,ai,eu", "2,au,aw", "x,ks,gs", "b,p,mb,mp", "g,ck,k,q", "s,sch,sz,ts,cz,ch,c,z", "d,t,mt,md", "f,ph,v,w" }; public static String SYPH_compile(String wort, int l){ String r1 = (wort.toLowerCase()); // 1. Gruppen ersetzen for (int i = 0; i < grps.length; i++) { String[] px = grps[i].split(","); for (int j = 1; j < px.length; j++) { r1 = r1.replaceAll(px[j], px[0]); } } // 2. verbleibende Vokale und h entfernen String r2 = r1.replaceAll(Vokale, ""); // 3. Doppelzeichen entfernen String r3 = r2.replaceAll("(.)\\1", "$1"); if (r3.length() > l) { return r3.substring(0, l); } return r3; } public static boolean SynPhon(String wort, String pattern, int l){ String cw = SYPH_compile(wort, l); if (cw.equals(pattern)) { return true; } return false; } }