package edu.uncc.cs.watsonsim;
import static org.junit.Assert.*;
import java.util.ArrayList;
import org.junit.Test;
import edu.stanford.nlp.trees.Tree;
import edu.uncc.cs.watsonsim.Phrase;
import edu.uncc.cs.watsonsim.nlp.Trees;
import edu.uncc.cs.watsonsim.scorers.CommonConstituents;
public class CoreNLPSentenceSimilarityTest {
@Test
public void testParseToTree() {
// Empty case
assertEquals(new ArrayList<>(), Trees.parse(""));
// Simple case
assertEquals(Tree.valueOf("(ROOT (NP (NN Example)))"), Trees.parse("Example").get(0));
// Challenging case
// fails: "Buffalo buffalo Buffalo buffalo buffalo buffalo Buffalo buffalo."
// succeeds, or at least it looks generally right to me:
assertEquals(Tree.valueOf("(ROOT (S (NP (NNP Niel) (NNP Armstrong)) "
+ "(VP (VBD was) (NP (DT the) (JJ first) (NN man)"
+ "(S (VP (TO to) (VP (VB walk) "
+ "(PP (IN on) (NP (DT the) (NN moon)))))))) (. .)))"),
Trees.parse("Niel Armstrong was the first man to walk on the moon.").get(0));
assertEquals(
Tree.valueOf("(ROOT (S (NP (PRP I)) (VP (VBP am) (ADJP (JJ tall))) (. .)))"),
Trees.parse("I am tall. You are short.").get(0));
assertEquals(
Tree.valueOf("(ROOT (S (NP (PRP You)) (VP (VBP are) (ADJP (JJ short))) (. .)))"),
Trees.parse("I am tall. You are short.").get(1));
}
@Test
public void testScorePhrases() {
CommonConstituents scorer = new CommonConstituents();
// These are in large part to make sure that it does not accidentally change.
/*assertEquals(
1.0,
scorer.getCommonSubtreeCount(
new Phrase("this"),
new Phrase("this")),
0.01
);*/
assertEquals(
6.0,
scorer.getCommonSubtreeCount(
new Phrase("My goat knows the bowling score."),
new Phrase("Michael rowed the boat ashore.")),
0.01
);
assertEquals(
12.0,
scorer.getCommonSubtreeCount(
new Phrase("A tisket. A tasket. A green and yellow basket."),
new Phrase("A tisket, a tasket, what color is my basket?")),
0.01
);
}
}