package edu.stanford.nlp.trees;
import junit.framework.TestCase;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Unit tests for the GrammaticalStructure family of classes.
*
* @author dramage
* @author mcdm
*/
public class GrammaticalStructureTest extends TestCase {
/**
* Turn token string into HashSet to abstract over ordering
*/
public HashSet<String> tokenSet(String tokenString) {
Pattern tokenPattern = Pattern.compile("(\\S+\\(\\S+-\\d+, \\S+-\\d+\\))");
Matcher tpMatcher = tokenPattern.matcher(tokenString);
HashSet<String> tokenSet = new HashSet<String>();
while (tpMatcher.find()) {
tokenSet.add(tpMatcher.group());
}
return tokenSet;
}
public HashSet<String> tokenSet(List<TypedDependency> ds) {
HashSet<String> tokenSet = new HashSet<String>();
for (TypedDependency d: ds) {
tokenSet.add(d.toString());
}
return tokenSet;
}
/**
* Tests that we can extract dependency relations correctly from
* some hard-coded trees.
*/
public void testEnglishDependenciesByTree() {
// the trees to test
String[] testTrees = new String[]{
"((S (NP (NNP Sam)) (VP (VBD died) (NP-TMP (NN today)))))",
"(ROOT (S (NP (PRP I)) (VP (VBD saw) (NP (NP (DT the) (NN book)) (SBAR (WHNP (WDT which)) (S (NP (PRP you)) (VP (VBD bought)))))) (. .)))"
};
// the expected dependency answers (basic)
String[] testAnswers = new String[] {
"root(ROOT-0, died-2) nsubj(died-2, Sam-1) tmod(died-2, today-3)",
"nsubj(saw-2, I-1) root(ROOT-0, saw-2) det(book-4, the-3) dobj(saw-2, book-4) dobj(bought-7, which-5) ref(book-4, which-5) dobj(bought-7, which-5) nsubj(bought-7, you-6) rcmod(book-4, bought-7)"
};
// the expected dependency answers (collapsed dependencies)
String[] testAnswersCollapsed = new String[] {
"root(ROOT-0, died-2) nsubj(died-2, Sam-1) tmod(died-2, today-3)",
"nsubj(saw-2, I-1) root(ROOT-0, saw-2) det(book-4, the-3) dobj(saw-2, book-4) dobj(bought-7, book-4) nsubj(bought-7, you-6) rcmod(book-4, bought-7)"
};
// the expected dependency answers (conjunctions processed)
String[] testAnswersCCProcessed = new String[] {
"root(ROOT-0, died-2) nsubj(died-2, Sam-1) tmod(died-2, today-3)",
"nsubj(saw-2, I-1) root(ROOT-0, saw-2) det(book-4, the-3) dobj(saw-2, book-4) dobj(bought-7, book-4) nsubj(bought-7, you-6) rcmod(book-4, bought-7)"
};
for (int i = 0; i < testTrees.length; i++) {
String testTree = testTrees[i];
String testAnswer = testAnswers[i];
String testAnswerCollapsed = testAnswersCollapsed[i];
String testAnswerCCProcessed = testAnswersCCProcessed[i];
HashSet<String> testAnswerTokens = tokenSet(testAnswer);
HashSet<String> testAnswerCollapsedTokens = tokenSet(testAnswerCollapsed);
HashSet<String> testAnswerCCProcessedTokens = tokenSet(testAnswerCCProcessed);
Tree tree;
try {
tree = new PennTreeReader(new StringReader(testTree),
new LabeledScoredTreeFactory()).readTree();
} catch (IOException e) {
// these trees should all parse correctly
throw new RuntimeException(e);
}
GrammaticalStructure gs = new EnglishGrammaticalStructure(tree);
assertEquals("Unexpected basic dependencies for tree "+testTree,
testAnswerTokens, tokenSet(gs.typedDependencies(GrammaticalStructure.Extras.MAXIMAL)));
assertEquals("Unexpected collapsed dependencies for tree "+testTree,
testAnswerCollapsedTokens, tokenSet(gs.typedDependenciesCollapsed(GrammaticalStructure.Extras.MAXIMAL)));
assertEquals("Unexpected cc-processed dependencies for tree "+testTree,
testAnswerCCProcessedTokens, tokenSet(gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL)));
}
}
}