package edu.stanford.nlp.util;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import edu.stanford.nlp.ling.CoreAnnotations;
import junit.framework.TestCase;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.math.ArrayMath;
/**
* Tests that the CoreMap TypesafeMap works as expected.
*
* @author dramage
*/
public class CoreMapTest extends TestCase {
private static class StringA implements CoreAnnotation<String> {
public Class<String> getType() { return String.class; } }
private static class StringB implements CoreAnnotation<String> {
public Class<String> getType() { return String.class; } }
/** This class is used in CoreMapsTest, so it can't be private. */
static class IntegerA implements CoreAnnotation<Integer> {
public Class<Integer> getType() { return Integer.class; } }
@SuppressWarnings("unchecked")
public void testCoreMap() {
CoreMap object = new ArrayCoreMap(0);
assertFalse(object.containsKey(StringA.class));
object.set(StringA.class, "stem");
assertTrue(object.containsKey(StringA.class));
assertEquals("stem", object.get(StringA.class));
object.set(StringA.class, "hi");
assertEquals("hi", object.get(StringA.class));
assertEquals(null, object.get(IntegerA.class));
object.set(IntegerA.class, 4);
assertEquals(Integer.valueOf(4), object.get(IntegerA.class));
object.set(StringB.class, "Yes");
assertEquals("Wrong # objects", 3, object.keySet().size());
assertEquals("Wrong keyset",
new HashSet<Class<?>>(
Arrays.asList(StringA.class, IntegerA.class, StringB.class)),
object.keySet());
assertEquals("Wrong remove value", Integer.valueOf(4),
object.remove(IntegerA.class));
assertEquals("Wrong # objects", 2, object.keySet().size());
assertEquals("Wrong keyset",
new HashSet<Class<?>>(Arrays.asList(StringA.class, StringB.class)),
object.keySet());
assertEquals("Wrong value", "hi", object.get(StringA.class));
assertEquals("Wrong value", "Yes", object.get(StringB.class));
assertEquals(null, object.set(IntegerA.class, 7));
assertEquals(Integer.valueOf(7), object.get(IntegerA.class));
assertEquals(Integer.valueOf(7), object.set(IntegerA.class, 3));
assertEquals(Integer.valueOf(3), object.get(IntegerA.class));
}
public void testToShorterString() {
ArrayCoreMap a = new ArrayCoreMap();
a.set(CoreAnnotations.TextAnnotation.class, "Australia");
a.set(CoreAnnotations.NamedEntityTagAnnotation.class, "LOCATION");
a.set(CoreAnnotations.BeforeAnnotation.class, " ");
a.set(CoreAnnotations.PartOfSpeechAnnotation.class, "NNP");
a.set(CoreAnnotations.ShapeAnnotation.class, "Xx");
assertEquals("Incorrect toShorterString()", "[Text=Australia NamedEntityTag=LOCATION]",
a.toShorterString("Text", "NamedEntityTag"));
assertEquals("Incorrect toShorterString()", "[Text=Australia]",
a.toShorterString("Text"));
assertEquals("Incorrect toShorterString()",
"[Text=Australia NamedEntityTag=LOCATION Before= PartOfSpeech=NNP Shape=Xx]",
a.toShorterString());
}
public void testEquality() {
CoreMap a = new ArrayCoreMap();
CoreMap b = new ArrayCoreMap();
assertTrue(a.equals(b));
assertTrue(a.hashCode() == b.hashCode());
a.set(StringA.class, "hi");
assertFalse(a.equals(b));
assertFalse(a.hashCode() == b.hashCode());
b.set(StringA.class, "hi");
assertTrue(a.equals(b));
assertTrue(a.hashCode() == b.hashCode());
a.remove(StringA.class);
assertFalse(a.equals(b));
assertFalse(a.hashCode() == b.hashCode());
}
/**
* This method is for comparing the speed of the ArrayCoreMap family and
* HashMap. It tests random access speed for a fixed number of accesses, i,
* for both a CoreLabel (can be swapped out for an ArrayCoreMap) and a
* HashMap. Switching the order of testing (CoreLabel first or second) shows
* that there's a slight advantage to the second loop, especially noticeable
* for small i - this is due to some background java funky-ness, so we now
* run 50% each way.
*/
@SuppressWarnings({"StringEquality"})
public static void main(String[] args) {
@SuppressWarnings("unchecked")
Class<CoreAnnotation<String>>[] allKeys = new Class[]{CoreAnnotations.TextAnnotation.class, CoreAnnotations.LemmaAnnotation.class,
CoreAnnotations.PartOfSpeechAnnotation.class, CoreAnnotations.ShapeAnnotation.class, CoreAnnotations.NamedEntityTagAnnotation.class,
CoreAnnotations.DocIDAnnotation.class, CoreAnnotations.ValueAnnotation.class, CoreAnnotations.CategoryAnnotation.class,
CoreAnnotations.BeforeAnnotation.class, CoreAnnotations.AfterAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class,
CoreAnnotations.ArgumentAnnotation.class, CoreAnnotations.MarkingAnnotation.class
};
// how many iterations
final int numBurnRounds = 10;
final int numGoodRounds = 60;
final int numIterations = 2000000;
final int maxNumKeys = 12;
double gains = 0.0;
for (int numKeys = 1; numKeys <= maxNumKeys; numKeys++) {
// the HashMap instance
HashMap<String,String> hashmap = new HashMap<String, String>(numKeys);
// the CoreMap instance
CoreMap coremap = new ArrayCoreMap(numKeys);
// the set of keys to use
String[] hashKeys = new String[numKeys];
@SuppressWarnings("unchecked")
Class<CoreAnnotation<String>>[] coreKeys = new Class[numKeys];
for (int key = 0; key < numKeys; key++) {
hashKeys[key] = allKeys[key].getSimpleName();
coreKeys[key] = allKeys[key];
}
// initialize with default values
for (int i = 0; i < numKeys; i++) {
coremap.set(coreKeys[i], String.valueOf(i));
hashmap.put(hashKeys[i], String.valueOf(i));
}
assert coremap.size() == numKeys;
assert hashmap.size() == numKeys;
// for storing results
double[] hashTimings = new double[numGoodRounds];
double[] coreTimings = new double[numGoodRounds];
final Random rand = new Random(0);
boolean foundEqual = false;
for (int round = 0; round < numBurnRounds + numGoodRounds; round++) {
System.err.print(".");
if (round % 2 == 0) {
// test timings on hashmap first
final long hashStart = System.nanoTime();
final int length = hashKeys.length;
String last = null;
for (int i = 0; i < numIterations; i++) {
int key = rand.nextInt(length);
String val = hashmap.get(hashKeys[key]);
if (val == last) {
foundEqual = true;
}
last = val;
}
if (round >= numBurnRounds) {
hashTimings[round-numBurnRounds] = (System.nanoTime() - hashStart) / 1000000000.0;
}
}
{ // test timings on coremap
final long coreStart = System.nanoTime();
final int length = coreKeys.length;
String last = null;
for (int i = 0; i < numIterations; i++) {
int key = rand.nextInt(length);
String val = coremap.get(coreKeys[key]);
if (val == last) {
foundEqual = true;
}
last = val;
}
if (round >= numBurnRounds) {
coreTimings[round-numBurnRounds] = (System.nanoTime() - coreStart) / 1000000000.0;
}
}
if (round % 2 == 1) {
// test timings on hashmap second
final long hashStart = System.nanoTime();
final int length = hashKeys.length;
String last = null;
for (int i = 0; i < numIterations; i++) {
int key = rand.nextInt(length);
String val = hashmap.get(hashKeys[key]);
if (val == last) {
foundEqual = true;
}
last = val;
}
if (round >= numBurnRounds) {
hashTimings[round-numBurnRounds] = (System.nanoTime() - hashStart) / 1000000000.0;
}
}
}
if (foundEqual) { System.err.print(" [found equal]"); }
System.err.println();
double hashMean = ArrayMath.mean(hashTimings);
double coreMean = ArrayMath.mean(coreTimings);
double percentDiff = (hashMean - coreMean) / hashMean * 100.0;
NumberFormat nf = new DecimalFormat("0.00");
System.out.println("HashMap @ " + numKeys + " keys: "+ hashMean +
" secs/2million gets");
System.out.println("CoreMap @ " + numKeys + " keys: "+ coreMean +
" secs/2million gets (" + nf.format(Math.abs(percentDiff)) + "% " +
(percentDiff >= 0.0 ? "faster" : "slower") + ")");
gains += percentDiff;
}
System.out.println();
gains = gains / maxNumKeys;
System.out.println("Average: " + Math.abs(gains) + "% " +
(gains >= 0.0 ? "faster" : "slower") + ".");
}
}