package com.freetmp.mbg.merge;
import info.debatty.java.stringsimilarity.*;
import org.junit.Rule;
import org.junit.Test;
import org.junit.contrib.java.lang.system.SystemOutRule;
/**
* Created by LiuPin on 2015/5/15.
*/
public class SimilarityTest {
@Rule
public final SystemOutRule systemOutRule = new SystemOutRule().muteForSuccessfulTests();
@Test
public void testLevenshtein() {
Levenshtein l = new Levenshtein();
System.out.println(l.distance("My string", "My $tring"));
}
@Test
public void testWeightedLevenshtein() {
WeightedLevenshtein wl = new WeightedLevenshtein(
(c1, c2) -> {
// t and r are next to each other,
// let's assign a lower cost to substitution
if (c1 == 't' && c2 == 'r') {
return 0.5;
}
return 1.0;
});
System.out.println(wl.distance("String1", "Srring2"));
}
@Test
public void testDamerau(){
Damerau d = new Damerau();
// One transposition
System.out.println(d.distance("ABCDEF", "ABDCEF"));
// Transposition of 2 characters that are far from each other
// => 1 deletion + 1 insertion
System.out.println(d.distance("ABCDEF", "BCDAEF"));
// distance and similarity allways produce a result between 0 and 1
System.out.println(d.distance("ABCDEF", "GHABCDE"));
}
@Test
public void testJaroWinkler(){
JaroWinkler jw = new JaroWinkler();
System.out.println(jw.distance("My string", "My $tring"));
System.out.println(jw.similarity("My string", "My $tring"));
}
@Test
public void testLongestCommonSubsequence(){
LongestCommonSubsequence lcs = new LongestCommonSubsequence();
System.out.println(lcs.distance("AGCAT", "GAC"));
}
@Test
public void testNGram(){
NGram twogram = new NGram(2);
// Should be 0.41666
System.out.println(twogram.distance("ABCD", "ABTUIO"));
}
@Test
public void testQGram(){
QGram dig = new QGram(2);
// AB BC CD CE
// 1 1 1 0
// 1 1 0 1
// 2 / (3 + 3) = 0.33333
System.out.println(dig.distance("ABCD", "ABCE"));
}
}