package edu.stanford.nlp.trees.ud;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import junit.framework.TestCase;
import java.io.Reader;
import java.io.StringReader;
import java.util.Iterator;
/**
* @author Sebastian Schuster
*/
public class CoNLLUDocumentReaderWriterTest extends TestCase {
private static String MULTIWORD_TEST_INPUT =
"1 I I PRON PRP Case=Nom|Number=Sing|Person=1 2 nsubj _ _\n" +
"2-3 haven't _ _ _ _ _ _ _ _\n" +
"2 have have VERB VBP Number=Sing|Person=1|Tense=Pres 0 root _ _\n" +
"3 not not PART RB Negative=Neg 2 neg _ _\n" +
"4 a a DET DT Definite=Ind|PronType=Art 5 det _ _\n" +
"5 clue clue NOUN NN Number=Sing 2 dobj _ _\n" +
"6 . . PUNCT . _ 2 punct _ _\n\n";
private static String COMMENT_TEST_INPUT =
"#comment line 1\n" +
"#comment line 2\n" +
"1 I I PRON PRP Case=Nom|Number=Sing|Person=1 2 nsubj _ _\n" +
"2 have have VERB VBP Number=Sing|Person=1|Tense=Pres 0 root _ _\n" +
"3 not not PART RB Negative=Neg 2 neg _ _\n" +
"4 a a DET DT Definite=Ind|PronType=Art 5 det _ _\n" +
"5 clue clue NOUN NN Number=Sing 2 dobj _ _\n" +
"6 . . PUNCT . _ 2 punct _ _\n\n";
private static String EXTRA_DEPS_TEST_INPUT =
"1 They They PRON PRP _ 2 nsubj 4:nsubj _\n" +
"2 buy buy VERB VBP _ 0 root _ _\n" +
"3 and and CONJ CC _ 2 cc _ _\n" +
"4 sell sell VERB VBP _ 5 conj _ _\n" +
"5 books book NOUN NNS _ 2 dobj 4:dobj _\n" +
"6 , , PUNCT , _ 5 punct _ _\n" +
"7 newspapers newspaper NOUN NNS _ 5 conj 2:dobj|4:dobj _\n" +
"8 and and CONJ CC _ 5 cc _ _\n" +
"9 magazines magazine NOUN NNS _ 5 conj 2:dobj|4:dobj _\n" +
"10 . . PUNCT . _ 2 punct _ _\n\n";
public void testMultiWords() {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
Reader stringReader = new StringReader(MULTIWORD_TEST_INPUT);
Iterator<SemanticGraph> it = reader.getIterator(stringReader);
SemanticGraph sg = it.next();
assertNotNull(sg);
assertFalse("The input only contains one dependency tree.", it.hasNext());
assertEquals("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.toCompactString(true));
for (IndexedWord iw : sg.vertexListSorted()) {
if (iw.index() != 2 && iw.index() != 3) {
assertEquals("", iw.originalText());
} else {
assertEquals("haven't", iw.originalText());
}
}
assertEquals(Integer.valueOf(3), sg.getNodeByIndex(2).get(CoreAnnotations.LineNumberAnnotation.class));
}
public void testComment() {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
Reader stringReader = new StringReader(COMMENT_TEST_INPUT);
Iterator<SemanticGraph> it = reader.getIterator(stringReader);
SemanticGraph sg = it.next();
assertNotNull(sg);
assertFalse("The input only contains one dependency tree.", it.hasNext());
assertEquals("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.toCompactString(true));
assertEquals(Integer.valueOf(3), sg.getNodeByIndex(1).get(CoreAnnotations.LineNumberAnnotation.class));
assertEquals(2, sg.getComments().size());
assertEquals("#comment line 1", sg.getComments().get(0));
}
/**
* Tests whether extra dependencies are correctly parsed.
*/
public void testExtraDependencies() {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
Reader stringReader = new StringReader(EXTRA_DEPS_TEST_INPUT);
Iterator<SemanticGraph> it = reader.getIterator(stringReader);
SemanticGraph sg = it.next();
assertNotNull(sg);
assertFalse("The input only contains one dependency tree.", it.hasNext());
assertTrue(sg.containsEdge(sg.getNodeByIndex(4), sg.getNodeByIndex(1)));
assertTrue(sg.containsEdge(sg.getNodeByIndex(2), sg.getNodeByIndex(7)));
assertTrue(sg.containsEdge(sg.getNodeByIndex(4), sg.getNodeByIndex(7)));
}
/**
* Tests whether reading a Semantic Graph and printing it
* is equal to the original input.
*/
private void testSingleReadAndWrite(String input) {
String clean = input.replaceAll("[\\t ]+", "\t");
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();
Reader stringReader = new StringReader(clean);
Iterator<SemanticGraph> it = reader.getIterator(stringReader);
SemanticGraph sg = it.next();
String output = writer.printSemanticGraph(sg);
assertEquals(clean, output);
}
public void testReadingAndWriting() {
testSingleReadAndWrite(COMMENT_TEST_INPUT);
testSingleReadAndWrite(EXTRA_DEPS_TEST_INPUT);
testSingleReadAndWrite(MULTIWORD_TEST_INPUT);
}
}