/*
* Freeplane - mind map editor
* Copyright (C) 2012 Dimitry Polivaev
*
* This file's author is Felix Natter
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.freeplane.features.filter;
import java.util.Arrays;
/**
* Damerau-Levenshtein implementation, computes the edit distance (ins/del/subst/transpos)
* between a search term and a text to search against.
* see http://en.wikipedia.org/wiki/Damerau–Levenshtein_distance
* The basic algorithm is orignally from wikipedia, and was extended for semi-global alignments.
*
* Optionally the edit distance of a semi-global alignment is computed which
* allows the search term to be shifted free-of-cost (i.e. dist("file", "a file is")==0).
*
* Some properties are explained in the unit test, {@link org.freeplane.features.filter.EditDistanceStringMatchingStrategiesTest}.
*
* TODO: use unicode code points instead of chars !!
*
* @author Felix Natter <fnatter@gmx.net>
*
*/
public class DamerauLevenshtein implements EditDistanceStringMatchingStrategy {
private String searchTerm;
private String searchText;
private Type type;
private int alphabetLength;
public int distance()
{
final int INFINITY = searchTerm.length() + searchText.length();
int[][] H = new int[searchTerm.length()+2][searchText.length()+2];
H[0][0] = INFINITY;
for(int i = 0; i<=searchTerm.length(); i++) {
H[i+1][1] = i;
H[i+1][0] = INFINITY;
}
for(int j = 0; j<=searchText.length(); j++) {
H[1][j+1] = (type == Type.Global) ? j : 0;
H[0][j+1] = INFINITY;
}
int[] DA = new int[alphabetLength];
Arrays.fill(DA, 0);
for(int i = 1; i<=searchTerm.length(); i++) {
int DB = 0;
for(int j = 1; j<=searchText.length(); j++) {
int i1 = DA[searchText.charAt(j-1)];
int j1 = DB;
int d = ((searchTerm.charAt(i-1)==searchText.charAt(j-1))?0:1);
if(d==0) DB = j;
H[i+1][j+1] =
min(H[i][j]+d,
H[i+1][j] + 1,
H[i][j+1]+1,
H[i1][j1] + (i-i1-1) + 1 + (j-j1-1));
}
DA[searchTerm.charAt(i-1)] = i;
}
//writeMatrix(H);
if (type == Type.Global)
{
return H[searchTerm.length()+1][searchText.length()+1];
}
else
{
int min = Integer.MAX_VALUE;
for (int j = 1; j <= searchText.length() + 1; j++)
{
min = Math.min(min, H[searchTerm.length()+1][j]);
}
return min;
}
}
private void writeMatrix(int[][] H)
{
for (int i = 0; i < H.length; i++)
{
for (int j = 0; j < H[0].length; j++)
{
System.out.format(" %3d", H[i][j]);
}
System.out.println();
}
}
private static int min(int ... nums)
{
int min = Integer.MAX_VALUE;
for (int num : nums) {
min = Math.min(min, num);
}
return min;
}
public float matchProb()
{
if (type == Type.SemiGlobal)
{
return 1.0F - ((float)distance() / searchTerm.length());
}
else
{
return 1.0F - ((float)distance() / Math.min(searchTerm.length(), searchText.length()));
}
}
/*
public DamerauLevenshtein(final String searchTerm, final String searchText,
final Type type, final boolean caseSensitive)
{
if (caseSensitive)
{
this.searchTerm = searchTerm;
this.searchText = searchText;
}
else
{
this.searchTerm = searchTerm.toLowerCase();
this.searchText= searchText.toLowerCase();
}
this.type = type;
int maxCodePoint = 0;
for (int i = 0; i < searchTerm.length(); i++)
{
maxCodePoint = Math.max(maxCodePoint, searchTerm.charAt(i));
}
for (int i = 0; i < searchText.length(); i++)
{
maxCodePoint = Math.max(maxCodePoint, searchText.charAt(i));
}
alphabetLength = maxCodePoint + 1;
}
*/
public DamerauLevenshtein() {
}
public void init(final String searchTerm, final String searchText, final boolean subStringMatch,
final boolean caseSensitive)
{
if (searchTerm == null || searchText == null)
{
throw new IllegalArgumentException("Null searchText/searchTerm!");
}
if (caseSensitive)
{
this.searchTerm = searchTerm;
this.searchText = searchText;
}
else
{
this.searchTerm = searchTerm.toLowerCase();
this.searchText= searchText.toLowerCase();
}
this.type = subStringMatch ? Type.SemiGlobal : Type.Global;
int maxCodePoint = 0;
for (int i = 0; i < this.searchTerm.length(); i++)
{
maxCodePoint = Math.max(maxCodePoint, this.searchTerm.charAt(i));
}
for (int i = 0; i < this.searchText.length(); i++)
{
maxCodePoint = Math.max(maxCodePoint, this.searchText.charAt(i));
}
alphabetLength = maxCodePoint + 1;
}
public boolean matches(final String searchTerm, final String searchText, final boolean subStringMatch,
final boolean caseSensitive)
{
//LogUtils.severe(String.format("DL(%s,%s)\n", searchTerm, searchText));
init(searchTerm, searchText, subStringMatch, caseSensitive);
return matchProb() > StringMatchingStrategy.APPROXIMATE_MATCHING_MINPROB;
}
}