package com.formulasearchengine.mathosphere.mlp.text;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multiset;
import com.google.common.collect.Multisets;
import junit.framework.TestCase;
import org.apache.flink.api.java.tuple.Tuple2;
public class TokenCounterTest extends TestCase {
public void testCountTokens() throws Exception {
TokenCounter ct = new TokenCounter();
Multiset<Tuple2<String, String>> count = ct.countTokens(this.getClass().getClassLoader().getResourceAsStream("tokens.json"));
assertEquals(11, count.count(new Tuple2<>("TEX_ONLY", "H")));
assertEquals(9788, count.size());
ImmutableSet<Multiset.Entry<Tuple2<String, String>>> entries = Multisets.copyHighestCountFirst(count).entrySet();
for (Multiset.Entry<Tuple2<String, String>> tuple2Entry : entries) {
System.out.println(tuple2Entry.getElement().toString() + ":" + tuple2Entry.getCount());
}
}
public void testCountIdentifier() throws Exception {
TokenCounter ct = new TokenCounter();
Multiset<String> count = ct.countIdentifer(this.getClass().getClassLoader().getResourceAsStream("identifier.json"));
assertEquals(5, count.count("H"));
assertEquals(1443, count.size());
ImmutableSet<Multiset.Entry<String>> entries = Multisets.copyHighestCountFirst(count).entrySet();
for (Multiset.Entry<String> tuple2Entry : entries) {
System.out.println(tuple2Entry.getElement() + ":" + tuple2Entry.getCount());
}
}
}