package org.gbif.nub.lookup.similarity;
import java.util.List;
import com.google.common.collect.ImmutableList;
import org.apache.commons.lang3.time.StopWatch;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DistanceComparisonTest {
private static final Logger LOG = LoggerFactory.getLogger(DistanceComparisonTest.class);
private final StringSimilarity DL = new DamerauLevenshtein();
private final StringSimilarity MDL2= new ModifiedDamerauLevenshtein(2);
private final StringSimilarity MDL3= new ModifiedDamerauLevenshtein(3);
private final StringSimilarity JW = new JaroWinkler();
private final StringSimilarity MJW= new ModifiedJaroWinkler();
private List<String[]> names = ImmutableList.of(
new String[] {"Helga", "Markus"},
new String[] {"Abies", "Apis"},
new String[] {"Aneplus", "Anelus"},
new String[] {"Aneplus vulgaris", "Anelus vulgaris"},
new String[] {"Aneplus", "Anephlus"},
new String[] {"Anelus", "Anephlus"},
new String[] {"Abies alba", "Abies alba"},
new String[] { "Apies alba", "Abies alba" },
new String[] { "Apbies alba", "Abies alba" },
new String[] { "Abbies alba", "Abies alba" },
new String[] { "Obies alba", "Abies alba" },
new String[] { "Abies allba", "Abies alba" },
new String[] { "Abies ahlba", "Abies alba" },
new String[] { "Abbies ahlba", "Abies alba" },
new String[] { "Puma concolor", "Abies alba" },
new String[] { "Pumac oncolor", "Puma concolor" },
new String[] { "Pumaco color", "Puma concolor" },
new String[] { "Pumae concolour", "Puma concolor" },
new String[] { "Cnaemidophorus rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophorus rododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Caemidophorus rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnamidophorus rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophora rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophorhus rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemydophorus rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cneamidophorus rhododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophorus rododactyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophorus rhododatyla", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophorus rhododactula", "Cnaemidophorus rhododactyla" },
new String[] { "Cnaemidophorus rhododactulla", "Cnaemidophorus rhododactyla" },
new String[] { "Yigoga forcipula", "Yigoga forcipula" },
new String[] { "Igoga forcipula", "Yigoga forcipula" },
new String[] { "Yiogoga forcipula", "Yigoga forcipula" },
new String[] { "Yigoga forzipula", "Yigoga forcipula" },
new String[] { "Agrotis ripae", "Agrotis ripae" },
new String[] { "Agrostis ripae", "Agrotis ripae" },
new String[] { "Agrotis ripa", "Agrotis ripae" },
new String[] { "Agrotis ripea", "Agrotis ripae" },
new String[] { "Lasionycta leucocycla", "Lasionycta leucocycla" },
new String[] { "Lasionicta leucocycla", "Lasionycta leucocycla" },
new String[] { "Lasionykta leucocycla", "Lasionycta leucocycla" },
new String[] { "Lasionycta leucocicla", "Lasionycta leucocycla" },
new String[] { "Ophthalmis lincea", "Ophthalmis lincea" },
new String[] { "Oftalmis lincea", "Ophthalmis lincea" },
new String[] { "Ophthalmis linzea", "Ophthalmis lincea" }
);
@Test
public void testGetSimilarity() throws Exception {
for (String[] ns : names) {
LOG.info(ns[0] + " x " + ns[1]);
doitTime("DL ", DL, ns[0], ns[1]);
doitTime("MDL2", MDL2, ns[0], ns[1]);
doitTime("MDL3", MDL3, ns[0], ns[1]);
doitTime("JW ", JW, ns[0], ns[1]);
doitTime("MJW ", MJW, ns[0], ns[1]);
}
}
public static void main (String[] args) {
int[] sim = new int[]{60,70,80,85,90,91,92,93,94,95,96,97,98,99,100,101,102,105,110,115,120,130,140,150,160,175};
for (int s : sim){
int ns = s <= 90 ? s*10 : 900 + (int) (100d * (Math.log10((s-80d)*1.1) - 1) );
System.out.println(s + " => " + (Math.log10((s-80d)*1.1)) );
System.out.println(s + " => " + ns );
}
}
private double doit(String name, StringSimilarity sim, String x1, String x2) {
double s = sim.getSimilarity(x1, x2);
LOG.debug(" {}={}", name, s);
return s;
}
private double doitTime(String name, StringSimilarity sim, String x1, String x2) {
StopWatch watch = new StopWatch();
watch.start();
double s = sim.getSimilarity(x1, x2);
int repeat = 5000;
while (repeat > 0) {
sim.getSimilarity(x1, x2);
repeat--;
}
LOG.info(" {}={} 1000x in {} microsec", name, s, watch.getNanoTime() / 1000);
return s;
}
}