package ruc.irm.similarity.util;
/**
*
* This class computes the edit distance between two strings using dynamic
* programming. The dynamic programming part is in the method
* printEditDistance().
*
* @author <a href="mailto:iamxiatian@gmail.com">夏天</a>
* @organization 中国人民大学信息资源管理学院 知识工程实验室
*/
public class EditDistance {
/**
* 获取删除代价
*
* @return
*/
public int getDeletionCost() {
return 1;
}
/**
* 获取插入代价
*
* @return
*/
public int getInsertionCost() {
return 1;
}
/**
* 获取替换代价
*
* @return
*/
public int getSubstitutionCost(char a, char b) {
return (a == b) ? 0 : 1;
}
public int getEditDistance(String S, String T) {
int[][] D = null;
if (S == null)
S = "";
if (T == null)
T = "";
char[] a = S.toCharArray();
char[] b = T.toCharArray();
int n = a.length; // 字符串S的长度
int m = b.length; // 字符串T的长度
if (a.length == 0) {
return b.length;
} else if (b.length == 0) {
return a.length;
}
D = new int[a.length + 1][b.length + 1];
/** 初始化D[i][0] */
for (int i = 1; i <= n; i++) {
D[i][0] = D[i - 1][0] + getDeletionCost();
}
/** 初始化D[0][j] */
for (int j = 1; j <= m; j++) {
D[0][j] = D[0][j - 1] + getInsertionCost();
}
for (int i = 1; i <= n; i++) {
for (int j = 1; j <= m; j++) {
D[i][j] = MathUtils.min(D[i - 1][j] + getDeletionCost(),
D[i][j - 1] + getInsertionCost(), D[i - 1][j - 1]
+ getSubstitutionCost(a[i - 1], b[j - 1]));
}
}
return D[n][m];
}
/**
* 应与getEditDistance(S, T)等同
* @param s
* @param t
* @return
*/
public static int getLevenshteinDistance(String s, String t) {
if (s == null || t == null) {
throw new IllegalArgumentException("Strings must not be null");
}
int d[][]; // matrix
int n; // length of s
int m; // length of t
int i; // iterates through s
int j; // iterates through t
char s_i; // ith character of s
char t_j; // jth character of t
int cost; // cost
// Step 1
n = s.length();
m = t.length();
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
d = new int[n + 1][m + 1];
// Step 2
for (i = 0; i <= n; i++) {
d[i][0] = i;
}
for (j = 0; j <= m; j++) {
d[0][j] = j;
}
// Step 3
for (i = 1; i <= n; i++) {
s_i = s.charAt(i - 1);
// Step 4
for (j = 1; j <= m; j++) {
t_j = t.charAt(j - 1);
// Step 5
if (s_i == t_j) {
cost = 0;
} else {
cost = 1;
}
// Step 6
d[i][j] = MathUtils.min(d[i - 1][j] + 1, d[i][j - 1] + 1,
d[i - 1][j - 1] + cost);
}
}
// Step 7
return d[n][m];
}
}