package edu.stanford.nlp.ie;
import java.util.List;
import edu.stanford.nlp.ie.regexp.NumberSequenceClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreUtilities;
import junit.framework.TestCase;
/** These tests focus on whether the new framework delivers results that
* the old framework did.
*
* @author Christopher Manning
*/
public class NumberSequenceClassifierExpectedOutputITest extends TestCase {
private NumberSequenceClassifier nscOld = new NumberSequenceClassifier(false);
private NumberSequenceClassifier nscNew = new NumberSequenceClassifier(true);
private String[][] w1 = {
{ "\u20AC", "30" },
{ "500", "US$" },
{ "forty", "three", "cents" },
{ "weighs", "almost", "192", "pounds" },
};
private String[][] t1 = {
{ "SYM", "CD" },
{ "CD", "$" },
{ "CD", "CD", "NNS" },
{ "VBZ", "RB", "CD", "NNS" },
};
private int[] i1 = {
0,
1,
0,
2,
};
private String[] a1 = {
"MONEY",
"MONEY",
"MONEY",
"NUMBER",
};
public void testCurrencyOld() {
assert w1.length == t1.length;
assert w1.length == i1.length;
assert w1.length == a1.length;
for (int i = 0; i < w1.length; i++) {
List<CoreLabel> cl = CoreUtilities.toCoreLabelList(w1[i], t1[i]);
cl = nscOld.classify(cl);
assertEquals("Failed on " + w1[i][i1[i]], a1[i], cl.get(i1[i]).get(CoreAnnotations.AnswerAnnotation.class));
}
}
public void testCurrencyNew() {
assert w1.length == t1.length;
assert w1.length == i1.length;
assert w1.length == a1.length;
for (int i = 0; i < w1.length; i++) {
List<CoreLabel> cl = CoreUtilities.toCoreLabelList(w1[i], t1[i]);
cl = nscNew.classify(cl);
assertEquals("Failed on " + w1[i][i1[i]], a1[i], cl.get(i1[i]).get(CoreAnnotations.AnswerAnnotation.class));
}
}
// notes:
// SUTime is regarding 1929 or even 1132 by itself as a DATE. Too broad?
// SUTime shouldn't exception on "11/31/1986" even though it isn't a valid date
private String[][] w2 = {
{ "1:43" },
{ "42:76" },
{ "22:14:12" },
{ "02:96:15" },
{ "12/31/1986" },
{ "5/18/1986" },
{ "11\\/3\\/1986" },
{ "2011-08-18"},
{ "13", "Oct"},
{ "December", "7"},
{ "18", "September", "2001" },
{ "3rd", "October", "1952" },
{ "5th", "of", "January", ",", "2011" },
{ "5:45", "a.m." },
{ "7:50", "PM"},
{ "March", "2001"},
{ "2011", "November", "18"},
{ "four", "hundred", "and", "two"},
{ "11th", "of", "February"},
{ "31st", "of", "December"},
{ "11th", "time"},
{ "First", "of", "April"},
};
private String[][] t2 = {
{ "CD" },
{ "CD" },
{ "CD" },
{ "CD" },
{ "CD" },
{ "CD" },
{ "CD" },
{ "CD" },
{ "CD", "NNP" },
{ "NNP", "CD" },
{ "CD", "NNP", "CD" },
{ "JJ", "NNP", "CD" },
{ "JJ", "IN", "NNP", ",", "CD" },
{ "CD", "NN"},
{ "CD", "NN"},
{ "NNP", "CD"},
{ "CD", "NNP", "CD"},
{ "CD", "CD", "CC", "CD"},
{ "JJ", "IN", "NNP"},
{ "JJ", "IN", "NNP"},
{ "JJ", "NN"},
{ "JJ", "IN", "NNP"},
};
private int[] i2 = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
2,
0,
1,
1,
1,
1,
1,
2,
0,
1,
0,
0,
};
private String[] a2 = {
"TIME",
"NUMBER",
"TIME",
"NUMBER",
"DATE",
"DATE",
"DATE",
"DATE",
"DATE",
"DATE",
"DATE",
"DATE",
"DATE",
"TIME",
"TIME",
"DATE",
"DATE",
"NUMBER",
"DATE",
"DATE",
"ORDINAL",
"DATE",
};
public void testCdOld() {
assert w2.length == t2.length;
assert w2.length == i2.length;
assert w2.length == a2.length;
for (int i = 0; i < w2.length; i++) {
List<CoreLabel> cl = CoreUtilities.toCoreLabelList(w2[i], t2[i]);
cl = nscOld.classify(cl);
assertEquals("Failed on " + w2[i][i2[i]], a2[i], cl.get(i2[i]).get(CoreAnnotations.AnswerAnnotation.class));
}
}
public void testCdNew() {
assert w2.length == t2.length;
assert w2.length == i2.length;
assert w2.length == a2.length;
for (int i = 0; i < w2.length; i++) {
List<CoreLabel> cl = CoreUtilities.toCoreLabelList(w2[i], t2[i]);
//System.err.println("CHECKING: " + cl);
cl = nscNew.classify(cl);
assertEquals("Failed on " + w2[i][i2[i]], a2[i], cl.get(i2[i]).get(CoreAnnotations.AnswerAnnotation.class));
}
}
}