package edu.stanford.nlp.ie.qe;
import edu.stanford.nlp.ling.tokensregex.MatchedExpression;
import edu.stanford.nlp.pipeline.*;
import junit.framework.TestCase;
import java.util.List;
/**
* Test for quantifiable entity extractor.
*
* @author Angel Chang
*/
public class QuantifiableEntityExtractorITest extends TestCase {
private static AnnotationPipeline pipeline; // = null;
private static QuantifiableEntityExtractor extractor; // = null;
public void test() throws Exception {
// TODO: Enable tests after rules files are added to models
}
@Override
public void setUp() throws Exception {
super.setUp();
synchronized(QuantifiableEntityExtractorITest.class) {
if (pipeline == null) {
pipeline = new AnnotationPipeline();
pipeline.addAnnotator(new TokenizerAnnotator(false, "en"));
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(DefaultPaths.DEFAULT_POS_MODEL, false));
//pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false));
}
extractor = new QuantifiableEntityExtractor();
//extractor.init(new Options());
}
}
protected static Annotation createDocument(String text) {
Annotation annotation = new Annotation(text);
pipeline.annotate(annotation);
return annotation;
}
public static class ExpectedQuantity {
String text;
String normalizedValue;
String type;
public ExpectedQuantity(String text, String normalizedValue, String type) {
this.text = text;
this.normalizedValue = normalizedValue;
this.type = type;
}
}
private static void runAndCheck(String prefix, String[] sentences, ExpectedQuantity[][] expected) throws Exception {
for (int si = 0; si < sentences.length; si++) {
String sentence = sentences[si];
Annotation annotation = createDocument(sentence);
List<MatchedExpression> matchedExpressions = extractor.extract(annotation);
// Print out matched text and value
if (expected == null) {
for (MatchedExpression matchedExpression : matchedExpressions) {
String text = matchedExpression.getText();
Object value = matchedExpression.getValue();
System.out.println(prefix + ": Got expression " + text + " with value " + value);
}
assertTrue(prefix + ": No expected provided", false);
} else {
int minMatchable = Math.min(expected[si].length, matchedExpressions.size());
for (int i = 0; i < minMatchable; i++) {
ExpectedQuantity expectedQuantity = expected[si][i];
MatchedExpression matched = matchedExpressions.get(i);
SimpleQuantifiableEntity actualQuantity = (SimpleQuantifiableEntity) matched.getValue().get();
assertEquals(prefix + ".matched." + si + "." + i + ".text", expectedQuantity.text, matched.getText());
assertEquals(prefix + ".matched." + si + "." + i + ".normalizedValue", expectedQuantity.normalizedValue, actualQuantity.toString());
assertEquals(prefix + ".matched." + si + "." + i + ".type", expectedQuantity.type, actualQuantity.getUnit().type);
}
assertEquals(prefix + ".length." + si, expected[si].length, matchedExpressions.size());
}
}
}
public static void _testMoney() throws Exception {
String[] sentences = {
"I have 1 dollar and 2 cents.",
"It cost 10 thousand million dollars."
};
// TODO: merge the 1 dollar and 2 cents
ExpectedQuantity[][] expected = {
{new ExpectedQuantity("1 dollar", "$1.00", "MONEY"), new ExpectedQuantity("2 cents", "$0.02", "MONEY")},
{new ExpectedQuantity("10 thousand million dollars", "$10000000000.00", "MONEY")}
};
runAndCheck("testMoney", sentences, expected);
}
public static void _testLength() throws Exception {
String[] sentences = {
"We are 2 kilometer away.",
"We are 2 kilometers away.",
"We turn after 5 miles.",
"The box is 100 centimeters tall.",
"The box is 10cm wide.",
"The box is over 1000 mm long.",
"The box is 2ft long."
};
ExpectedQuantity[][] expected = {
{new ExpectedQuantity("2 kilometer", "2000.0m", "LENGTH")},
{new ExpectedQuantity("2 kilometers", "2000.0m", "LENGTH")},
{new ExpectedQuantity("5 miles", "5.0mi", "LENGTH")},
{new ExpectedQuantity("100 centimeters", "1.0m", "LENGTH")},
{new ExpectedQuantity("10cm", "0.1m", "LENGTH")},
{new ExpectedQuantity("1000 mm", "1.0m", "LENGTH")},
{new ExpectedQuantity("2ft", "2.0'", "LENGTH")}
};
runAndCheck("testLength", sentences, expected);
}
// We do weight instead of mass since in typical natural language
// kilograms are used to refer to weight vs mass (in scientific usage)
public static void _testWeight() throws Exception {
String[] sentences = {
"The ball is 2 kilograms in weight.",
"There are five grams.",
"How much is seven pounds?"
};
ExpectedQuantity[][] expected = {
{new ExpectedQuantity("2 kilograms", "2.0kg", "WEIGHT")},
{new ExpectedQuantity("five grams", "0.005kg", "WEIGHT")},
{new ExpectedQuantity("seven pounds", "7.0lb", "WEIGHT")}
};
runAndCheck("testWeight", sentences, expected);
}
}