package edu.stanford.nlp.ie;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
import org.junit.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
/**
* Test cases for number normalizer.
*
* @author Angel Chang
*/
public class NumberNormalizerITest {
private static AnnotationPipeline pipeline; // = null;
private static final boolean VERBOSE = false;
@BeforeClass
public static void runOnceBeforeClass() {
if (VERBOSE) {
System.err.println("Setting up pipeline in @BeforeClasss");
}
pipeline = new AnnotationPipeline();
pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(DefaultPaths.DEFAULT_POS_MODEL, false));
}
@AfterClass
public static void runOnceAfterClass() {
System.err.println("Nulling pipeline in @AfterClass");
pipeline = null;
}
@Test
public void testNumbers() throws IOException {
// Set up test text
String testText =
"two dozen\n" +
"six hundred,\n" +
"four hundred, and twelve.\n" +
"4 million six hundred fifty thousand, two hundred and eleven.\n" +
"6 hundred billion, five million six hundred fifty thousand, three hundred and seventy six\n" +
"5,786,345\n" +
"twenty-five.\n" +
// "one and a half million\n" +
"1.3 million.\n" +
"one thousand two hundred and twenty four\n" +
"10 thousand million.\n"+
"3.625\n" +
"zero\n" +
"-15\n" +
"one two three four.\n" +
"one hundred and fifty five\n" +
"a hundred and one\n" +
// "five oh four\n"
"four score.\n" +
"a dozen bagels\n" +
"five dozen\n" +
"An IQ score of 161.\n" + // only 161, not 20 for score
"thirty two\n"
;
// set up expected results
Iterator<? extends Number> expectedNumbers = Arrays.asList(
24.0, 600.0, 412.0, 4650211.0, 600005650376.0, 5786345, 25.0,
/* 1500000.0, */
1300000.0, 1224.0, 10000000000.0, 3.625,
0, -15.0, 1, 2, 3, 4, 155.0, 101.0 /*504.0, */, 80.0, 12, 60.0, 161, 32.0 ).iterator();
Iterator<String> expectedTexts = Arrays.asList(
"two dozen", "six hundred", "four hundred, and twelve",
"4 million six hundred fifty thousand, two hundred and eleven",
"6 hundred billion, five million six hundred fifty thousand, three hundred and seventy six",
"5,786,345",
"twenty-five",
// "one and half million",
"1.3 million",
"one thousand two hundred and twenty four",
"10 thousand million",
"3.625",
"zero", "-15", "one", "two", "three", "four",
"one hundred and fifty five",
"hundred and one" /* "five oh four", */,
"four score",
"dozen",
"five dozen",
"161", "thirty two").iterator();
// create document
Annotation document = createDocument(testText);
// Annotate numbers
NumberNormalizer.findAndAnnotateNumericExpressions(document);
// Check answers
for (CoreMap num: document.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
if (num.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) {
Number expectedNumber = expectedNumbers.next();
String expectedType = "NUMBER";
String expectedText = expectedTexts.next();
String text = document.get(CoreAnnotations.TextAnnotation.class).substring(
num.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
num.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)
);
if (VERBOSE) {
System.err.printf("Found %s of type %s with value %s%n",
text,
num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class),
num.get(CoreAnnotations.NumericCompositeValueAnnotation.class));
}
assertEquals(expectedText, text);
assertEquals(expectedType, num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
assertEquals(expectedNumber.toString(), num.get(CoreAnnotations.NumericCompositeValueAnnotation.class).toString());
// This doesn't work as sometimes type is different
// assertEquals(expectedNumber, num.get(CoreAnnotations.NumericCompositeValueAnnotation.class));
// } else if (VERBOSE) {
// System.err.println("num is " + num.toShorterString());
}
}
assertFalse(expectedNumbers.hasNext());
}
@Test
public void testOrdinals() throws IOException {
// Set up test text
String testText =
"0th, 1st, 2nd, 3rd, 4th, 5th, 6th, 7th, 8th, 9th, 10th\n" +
"zeroth, first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth\n" +
"11th, 12th, 13th, 14th, 15th, 16th, 17th, 18th, 19th, 20th\n" +
"Eleventh, twelfth, thirteenth, Fourteenth, fifteenth, Sixteenth, seventeenth, eighteenth, nineteenth, twentieth\n" +
"Twenty-first, twenty first, twenty second, twenty third, twenty fourth\n" +
"thirtieth, thirty first, thirty-second," +
"fortieth, one hundredth, two hundredth, one hundred and fifty first, one hundred fifty first";
// TODO: Fix consistency of number representation
// set up expected results
Iterator<? extends Number> expectedNumbers =
Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21.0, 21.0, 22.0, 23.0, 24.0, 30, 31.0, 32.0, 40, 100.0, 200.0, 151.0, 151.0).iterator();
Iterator<String> expectedTexts = Arrays.asList(testText.split("\\s*[,\\n]+\\s*")).iterator();
// create document
Annotation document = createDocument(testText);
// Annotate numbers
NumberNormalizer.findAndAnnotateNumericExpressions(document);
// Check answers
for (CoreMap num: document.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
if (num.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) {
Number expectedNumber = expectedNumbers.next();
String expectedType = "ORDINAL";
String expectedText = expectedTexts.next();
String text = document.get(CoreAnnotations.TextAnnotation.class).substring(
num.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
num.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)
);
if (VERBOSE) {
System.err.printf("Found %s of type %s with value %s%n",
text,
num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class),
num.get(CoreAnnotations.NumericCompositeValueAnnotation.class));
}
assertEquals(expectedText, text);
assertEquals("Type for " + expectedText, expectedType, num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
assertEquals(expectedNumber.toString(), num.get(CoreAnnotations.NumericCompositeValueAnnotation.class).toString());
}
}
assertFalse(expectedNumbers.hasNext());
}
private static Annotation createDocument(String text) {
Annotation annotation = new Annotation(text);
pipeline.annotate(annotation);
return annotation;
}
}