//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.uima.grammar;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.Collections;
import java.util.Set;
import org.apache.uima.jcas.JCas;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import uk.gov.dstl.baleen.types.language.Dependency;
import uk.gov.dstl.baleen.types.language.Sentence;
import uk.gov.dstl.baleen.types.language.WordToken;
import uk.gov.dstl.baleen.uima.testing.JCasSingleton;
public class DependencyGraphTest {
private JCas jCas;
private Dependency dText;
private Dependency dOf;
private Dependency dSample;
private Dependency dA;
private WordToken a;
private WordToken sample;
private WordToken of;
private WordToken text;
@Before
public void setUp() throws Exception {
jCas = JCasSingleton.getJCasInstance();
jCas.setDocumentText("A sample of text.");
// Note this dependency grammar is not accurate!
a = new WordToken(jCas, 0, 1);
a.addToIndexes();
sample = new WordToken(jCas, 2, 8);
sample.addToIndexes();
of = new WordToken(jCas, 9, 11);
of.addToIndexes();
text = new WordToken(jCas, 12, 16);
text.addToIndexes();
dA = new Dependency(jCas, 0, 1);
dA.setDependent(a);
dA.setGovernor(sample);
dA.addToIndexes();
dSample = new Dependency(jCas, 2, 8);
dSample.setGovernor(sample);
dSample.setDependent(sample);
dSample.setDependencyType("ROOT");
dSample.addToIndexes();
dOf = new Dependency(jCas, 9, 11);
dOf.setGovernor(text);
dOf.setDependent(of);
dOf.addToIndexes();
dText = new Dependency(jCas, 12, 16);
dText.setGovernor(sample);
dText.setDependent(text);
dText.addToIndexes();
}
@Test
public void testExtractWordsMultiHop() {
final DependencyGraph graph = DependencyGraph.build(jCas);
final Set<WordToken> fromDependencies = graph.extractWords(3, dA);
Assert.assertEquals(4, fromDependencies.size());
}
@Test
public void testExtractWordsNone() {
final DependencyGraph graph = DependencyGraph.build(jCas);
final Set<WordToken> fromDependencies = graph.extractWords(-1, dA);
Assert.assertEquals(0, fromDependencies.size());
final Set<WordToken> fromWords = graph.nearestWords(-1, a);
Assert.assertEquals(0, fromWords.size());
}
@Test
public void testExtractWordsMissingWord() {
final DependencyGraph graph = DependencyGraph.build(jCas);
final Set<WordToken> fromWords = graph.nearestWords(10, new WordToken(jCas));
// We include the word itself (even though its not in...?)
Assert.assertEquals(1, fromWords.size());
}
@Test
public void testExtractWordsArray() {
final DependencyGraph graph = DependencyGraph.build(jCas);
final Set<WordToken> fromDependencies = graph.extractWords(1, dOf);
Assert.assertEquals(2, fromDependencies.size());
Assert.assertTrue(fromDependencies.contains(of));
Assert.assertTrue(fromDependencies.contains(text));
final Set<WordToken> fromWords = graph.nearestWords(1, sample);
Assert.assertEquals(3, fromWords.size());
Assert.assertFalse(fromWords.contains(of));
final Set<WordToken> fromTwo = graph.nearestWords(1, sample, of);
Assert.assertEquals(4, fromTwo.size());
}
@Test
public void testBuild() {
final DependencyGraph graph = DependencyGraph.build(jCas);
Assert.assertNotNull(graph);
graph.log();
assertEquals(4, graph.getWords().size());
assertEquals(1, graph.getDependents(a).size());
assertEquals(1, graph.getEdges(a).count());
assertEquals(0, graph.getGovernors(a).size());
// 0 as the root is not included in the graph
assertEquals(0, graph.getDependents(sample).size());
assertEquals(2, graph.getEdges(sample).count());
assertEquals(2, graph.getGovernors(sample).size());
}
@Test
public void testFilter() {
final DependencyGraph graph = DependencyGraph.build(jCas);
final DependencyGraph subgraph = graph
.filter(p -> p == a || p == sample);
subgraph.log();
assertEquals(2, subgraph.getWords().size());
assertEquals(1, subgraph.getGovernors(sample).size());
}
@Test
public void testBuildCovered() {
// Create a fake sub-sentence
final Sentence s = new Sentence(jCas);
s.setBegin(0);
s.setEnd(sample.getEnd());
final DependencyGraph graph = DependencyGraph.build(jCas, s);
Assert.assertNotNull(graph);
graph.log();
assertEquals(2, graph.getWords().size());
assertEquals(1, graph.getDependents(a).size());
assertEquals(1, graph.getEdges(a).count());
assertEquals(0, graph.getGovernors(a).size());
// 0 as the root is not included in the graph
assertEquals(0, graph.getDependents(sample).size());
assertEquals(1, graph.getEdges(sample).count());
assertEquals(1, graph.getGovernors(sample).size());
}
@Test
public void testShortestPath() {
final DependencyGraph graph = DependencyGraph.build(jCas);
assertTrue(graph.shortestPath(Collections.singletonList(a), Collections.singletonList(of), 1).isEmpty());
assertTrue(graph.shortestPath(Collections.singletonList(a), Collections.singletonList(of), 2).isEmpty());
assertFalse(graph.shortestPath(Collections.singletonList(a), Collections.singletonList(of), 5).isEmpty());
}
int traverseCount = 0;
@Test
public void testTravese() {
final DependencyGraph graph = DependencyGraph.build(jCas);
graph.traverse(1, Collections.singletonList(dA), (d, f, t, h) -> {
traverseCount++;
return true;
});
assertEquals(1, traverseCount);
}
@Test
public void testTraveseWithTerminate() {
final DependencyGraph graph = DependencyGraph.build(jCas);
traverseCount = 0;
graph.traverse(10, Collections.singletonList(dA), (d, f, t, h) -> {
traverseCount++;
return false;
});
// 1 as go in both direction (dA a -> sample)
assertEquals(1, traverseCount);
}
@Test
public void testTraveseMultiple() {
final DependencyGraph graph = DependencyGraph.build(jCas);
traverseCount = 0;
graph.traverse(10, Collections.singletonList(dA), (d, f, t, h) -> {
// TODO: Ideally test the content so that the history is correct, but its hard to
// predict
traverseCount++;
return true;
});
assertEquals(4, traverseCount);
}
}