package edu.hawaii.jmotif.sax;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import static org.junit.Assert.assertTrue;
import java.util.HashMap;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
/**
* Test the text utilities class.
*
* @author psenin
*
*/
public class TestTextUtils {
private static final String[][] BAG1 = {
{ "the", "3" },
{ "brown", "5" },
{ "cow", "2" } };
private static final String[][] BAG2 = {
{ "the", "3" },
{ "green", "2" },
{ "hill", "3" },
{ "cow", "2" },
{ "grass", "4" } };
private static final String[][] BAG3 = {
{ "the", "3" },
{ "hill", "2" },
{ "meadow", "4" },
{ "cow", "4" },
{ "air", "2" } };
private WordBag bag1;
private WordBag bag2;
private WordBag bag3;
private HashMap<String, WordBag> bags;
/**
* Test set-up.
*/
@Before
public void setUp() {
bag1 = buildBag("bag1", BAG1);
bag2 = buildBag("bag2", BAG2);
bag3 = buildBag("bag3", BAG3);
bags = new HashMap<String, WordBag>();
bags.put(bag1.getLabel(), bag1);
bags.put(bag2.getLabel(), bag2);
bags.put(bag3.getLabel(), bag3);
}
/**
* Test the term frequency method.
*/
@Ignore
@Test
public void testTF() {
assertTrue(Double.valueOf(3.0D / 5D).doubleValue() == TextUtils.normalizedTF(bag1, BAG1[0][0]));
assertTrue(Double.valueOf(2.0D / 4D).doubleValue() == TextUtils.normalizedTF(bag2, BAG2[1][0]));
assertTrue(Double.valueOf(4.0D / 4D).doubleValue() == TextUtils.normalizedTF(bag3, BAG3[3][0]));
}
/**
* Test the document frequency method.
*/
@Ignore
@Test
public void testDF() {
assertTrue(3 == TextUtils.df(bags, "the"));
assertTrue(1 == TextUtils.df(bags, "meadow"));
}
/**
* Test inverse document frequency method.
*/
@Ignore
@Test
public void testIDF() {
assertTrue(Double.POSITIVE_INFINITY == TextUtils.idf(bags, "non"));
assertTrue(1.0D == TextUtils.idf(bags, "the"));
assertTrue(3.0D / 2.0D == TextUtils.idf(bags, "hill"));
assertTrue(3.0D / 1.0D == TextUtils.idf(bags, "air"));
}
@Ignore
@Test
public void testTFIDF_bag1_the() {
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags.values());
assertTrue(0.0D == tfidf.get("bag1").get("the"));
}
/**
* Test tf-idf statistics.
*/
@Ignore
@Test
public void testTFIDF() {
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags.values());
double tfHill2 = TextUtils.logTF(bag2, "hill");
double idfHill = TextUtils.idf(bags, "hill");
double tfidfHill2 = tfHill2 * Math.log10(idfHill);
System.out.println("\ntfHill2: " + tfHill2);
System.out.println("idfHill: " + idfHill);
System.out.println("tfidfHill2: " + tfidfHill2);
System.out.println("tfidf.get(\"bag2\").get(\"hill\"): " + tfidf.get("bag2").get("hill"));
assertTrue(tfidfHill2 == tfidf.get("bag2").get("hill"));
}
@Ignore
@Test
public void testTFIDF_bag3_hill() {
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags.values());
double idfHill = TextUtils.idf(bags, "hill");
double tfHill3 = TextUtils.logTF(bag3, "hill");
double tfidfHill3 = tfHill3 * Math.log10(idfHill);
System.out.println("\ntfHill3: " + tfHill3);
System.out.println("idfHill: " + idfHill);
System.out.println("tfidfHill3: " + tfidfHill3);
System.out.println("tfidf.get(\"bag3\").get(\"hill\"): " + tfidf.get("bag3").get("hill"));
assertTrue(tfidfHill3 == tfidf.get("bag3").get("hill"));
}
/**
* private method for building test bag objects.
*
* @param name The bag name.
* @param data The test data.
* @return The wordBag class.
*/
private WordBag buildBag(String name, String[][] data) {
WordBag res = new WordBag(name);
for (String[] d : data) {
res.addWord(d[0], Integer.valueOf(d[1]));
}
return res;
}
}