package edu.hawaii.jmotif.sax;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import static org.junit.Assert.assertEquals;
import java.util.HashMap;
import java.util.Map.Entry;
import org.junit.Test;
/**
* Test the cosine similarity implementation.
*
* @author psenin
*
*/
public class TestCosineSimilarity {
private static final double TEST_VALUE = 0.8215838362577491D;
private static final double TEST_PASS_PRECISION = 0.000000000000001;
private static final double TEST_FAIL_PRECISION = 0.0000000000000001;
/**
* Using a dumb example.
*/
@Test
public void testCosineSimilarity() {
WordBag wb1 = new WordBag("first");
WordBag wb2 = new WordBag("second");
wb1.addWord("me", 2);
wb1.addWord("Julie", 1);
wb1.addWord("likes", 0);
wb1.addWord("loves", 2);
wb1.addWord("Jane", 0);
wb1.addWord("Linda", 1);
wb1.addWord("than", 1);
wb1.addWord("more", 1);
wb2.addWord("me", 2);
wb2.addWord("Julie", 1);
wb2.addWord("likes", 1);
wb2.addWord("loves", 1);
wb2.addWord("Jane", 1);
wb2.addWord("Linda", 0);
wb2.addWord("than", 1);
wb2.addWord("more", 1);
double cosine = TextUtils.cosineDistance(wb1.getWordsAsDoubles(), wb2.getWordsAsDoubles());
assertEquals("Testing cosine similarity", TEST_VALUE, cosine, TEST_PASS_PRECISION);
}
/**
* Making sure normalization works as intended - i.e doesn't change anything.
*/
@Test
public void testCosineSimilarityNorm() {
WordBag wb1 = new WordBag("first");
WordBag wb2 = new WordBag("second");
wb1.addWord("me", 2);
wb1.addWord("Julie", 1);
wb1.addWord("likes", 0);
wb1.addWord("loves", 2);
wb1.addWord("Jane", 0);
wb1.addWord("Linda", 1);
wb1.addWord("than", 1);
wb1.addWord("more", 1);
wb2.addWord("me", 2);
wb2.addWord("Julie", 1);
wb2.addWord("likes", 1);
wb2.addWord("loves", 1);
wb2.addWord("Jane", 1);
wb2.addWord("Linda", 0);
wb2.addWord("than", 1);
wb2.addWord("more", 1);
double cosine = TextUtils.cosineDistance(wb1.getWordsAsDoubles(), wb2.getWordsAsDoubles());
assertEquals("Testing cosine similarity", TEST_VALUE, cosine, TEST_PASS_PRECISION);
// grow the vector
HashMap<String, Double> wbLong = wb1.getWordsAsDoubles();
double multiplier = 8.24864813846848348486;
for (Entry<String, Double> e : wbLong.entrySet()) {
wbLong.put(e.getKey(), e.getValue() * multiplier);
}
double distLong = TextUtils.cosineDistance(wbLong, wb2.getWordsAsDoubles());
assertEquals("Testing cosine similarity", TEST_VALUE, distLong, TEST_PASS_PRECISION);
// normalize vectors
HashMap<String, HashMap<String, Double>> vectors = new HashMap<String, HashMap<String, Double>>();
vectors.put("first", wbLong);
vectors.put("second", wb2.getWordsAsDoubles());
vectors = TextUtils.normalizeToUnitVectors(vectors);
double distNorm = TextUtils.cosineDistance(vectors.get("first"), vectors.get("second"));
assertEquals("Testing cosine similarity", TEST_VALUE, distNorm, TEST_PASS_PRECISION);
}
}