/*
* Copyright 2011 Corpuslinguistic working group Humboldt University Berlin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package annis.sqlgen;
import annis.test.CsvResultSetProvider;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.corpus_tools.salt.common.SCorpusGraph;
import org.corpus_tools.salt.common.SDocumentGraph;
import org.corpus_tools.salt.common.SDominanceRelation;
import org.corpus_tools.salt.common.SPointingRelation;
import org.corpus_tools.salt.common.SSpanningRelation;
import org.corpus_tools.salt.common.STextualRelation;
import org.corpus_tools.salt.common.SaltProject;
import org.corpus_tools.salt.core.SLayer;
import org.corpus_tools.salt.core.SNamedElement;
import org.corpus_tools.salt.core.SNode;
import org.corpus_tools.salt.core.SRelation;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.Test;
import static org.mockito.MockitoAnnotations.initMocks;
/**
*
* @author Thomas Krause <krauseto@hu-berlin.de>
*/
public class SaltAnnotateExtractorTest
{
// class under test
private CsvResultSetProvider resultSetProviderSingleText;
private CsvResultSetProvider resultSetProviderMultiText;
private SaltAnnotateExtractor instance;
// dependencies
private PostgreSqlArraySolutionKey<String> solutionKey = new PostgreSqlArraySolutionKey<>();
@Before
public void setUp() throws SQLException
{
initMocks(this);
solutionKey.setKeyColumnName("key");
resultSetProviderSingleText = new CsvResultSetProvider(getClass().getResourceAsStream(
"SampleAnnotateResult.csv"));
resultSetProviderMultiText = new CsvResultSetProvider(getClass().getResourceAsStream(
"SampleAnnotateResult_MultiText.csv"));
instance = new SaltAnnotateExtractor() {
protected SolutionKey<?> createSolutionKey() {
return solutionKey;
}
};
CorpusPathExtractor corpusPathExtractor = new ArrayCorpusPathExtractor();
instance.setCorpusPathExtractor(corpusPathExtractor);
TestAnnotateSqlGenerator.setupOuterQueryFactsTableColumnAliases(instance);
}
@Test
public void testCorpusGraph() throws Exception
{
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
assertEquals(1, project.getCorpusGraphs().size());
SCorpusGraph corpusGraph = project.getCorpusGraphs().get(0);
assertEquals(1, corpusGraph.getCorpora().size());
assertEquals("pcc2", corpusGraph.getCorpora().get(0).getName());
assertEquals(1, corpusGraph.getDocuments().size());
assertEquals("4282", corpusGraph.getDocuments().get(0).getName());
}
@Test
public void testLayerNames() throws SQLException
{
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).
getDocumentGraph();
List<SLayer> layers = new ArrayList<>(g.getLayers());
Collections.sort(layers, new NameComparator());
assertEquals(6, layers.size());
assertEquals("default_ns", layers.get(0).getName());
assertEquals("dep", layers.get(1).getName());
assertEquals("exmaralda", layers.get(2).getName());
assertEquals("mmax", layers.get(3).getName());
assertEquals("rst", layers.get(4).getName());
assertEquals("tiger", layers.get(5).getName());
}
@Test
public void testLayerNodes() throws SQLException
{
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).
getDocumentGraph();
List<SNode> n = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(9, n.size());
assertEquals("Focus_newInfSeg_10", n.get(0).getName());
assertEquals("Focus_newInfSeg_9", n.get(1).getName());
assertEquals("Inf-StatSeg_29", n.get(2).getName());
assertEquals("Inf-StatSeg_30", n.get(3).getName());
assertEquals("NPSeg_29", n.get(4).getName());
assertEquals("NPSeg_30", n.get(5).getName());
assertEquals("PPSeg_7", n.get(6).getName());
assertEquals("SentSeg_10", n.get(7).getName());
assertEquals("SentSeg_9", n.get(8).getName());
n = new ArrayList<>(g.getLayerByName("mmax").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(5, n.size());
assertEquals("primmarkSeg_1000154", n.get(0).getName());
assertEquals("primmarkSeg_60", n.get(1).getName());
assertEquals("sentenceSeg_50010", n.get(2).getName());
assertEquals("sentenceSeg_50011", n.get(3).getName());
assertEquals("sentenceSeg_5009", n.get(4).getName());
n = new ArrayList<>(g.getLayerByName("tiger").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(10, n.size());
assertEquals("const_50", n.get(0).getName());
assertEquals("const_52", n.get(1).getName());
assertEquals("const_54", n.get(2).getName());
assertEquals("const_55", n.get(3).getName());
assertEquals("const_56", n.get(4).getName());
assertEquals("const_57", n.get(5).getName());
assertEquals("const_58", n.get(6).getName());
assertEquals("const_59", n.get(7).getName());
assertEquals("const_60", n.get(8).getName());
assertEquals("const_61", n.get(9).getName());
n = new ArrayList<>(g.getLayerByName("default_ns").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(12, n.size());
assertEquals("tok_150", n.get(0).getName());
assertEquals("tok_151", n.get(1).getName());
assertEquals("tok_152", n.get(2).getName());
assertEquals("tok_153", n.get(3).getName());
assertEquals("tok_154", n.get(4).getName());
assertEquals("tok_155", n.get(5).getName());
assertEquals("tok_156", n.get(6).getName());
assertEquals("tok_157", n.get(7).getName());
assertEquals("tok_158", n.get(8).getName());
assertEquals("tok_159", n.get(9).getName());
assertEquals("tok_160", n.get(10).getName());
assertEquals("tok_161", n.get(11).getName());
n = new ArrayList<>(g.getLayerByName("rst").get(0).getNodes());
Collections.sort(n, new NameComparator());
assertEquals(9, n.size());
assertEquals("u0", n.get(0).getName());
assertEquals("u10", n.get(1).getName());
assertEquals("u11", n.get(2).getName());
assertEquals("u12", n.get(3).getName());
assertEquals("u20", n.get(4).getName());
assertEquals("u23", n.get(5).getName());
assertEquals("u24", n.get(6).getName());
assertEquals("u27", n.get(7).getName());
assertEquals("u28", n.get(8).getName());
assertEquals(0, g.getLayerByName("dep").get(0).getNodes().size());
}
@Test
public void testLayerRelations() throws SQLException
{
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).
getDocumentGraph();
// dep //
List<SRelation<SNode, SNode>> e = new ArrayList<>(g.getLayerByName("dep").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(9, e.size());
assertEquals("tok_150", e.get(0).getSource().getName());
assertEquals("tok_151", e.get(0).getTarget().getName());
assertEquals("tok_152", e.get(1).getSource().getName());
assertEquals("tok_153", e.get(1).getTarget().getName());
assertEquals("tok_156", e.get(2).getSource().getName());
assertEquals("tok_154", e.get(2).getTarget().getName());
assertEquals("tok_156", e.get(3).getSource().getName());
assertEquals("tok_155", e.get(3).getTarget().getName());
assertEquals("tok_156", e.get(4).getSource().getName());
assertEquals("tok_157", e.get(4).getTarget().getName());
assertEquals("tok_157", e.get(5).getSource().getName());
assertEquals("tok_158", e.get(5).getTarget().getName());
assertEquals("tok_158", e.get(6).getSource().getName());
assertEquals("tok_160", e.get(6).getTarget().getName());
assertEquals("tok_160", e.get(7).getSource().getName());
assertEquals("tok_159", e.get(7).getTarget().getName());
assertEquals("tok_160", e.get(8).getSource().getName());
assertEquals("tok_161", e.get(8).getTarget().getName());
// exmaralda //
e = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(30, e.size());
assertEquals("Focus_newInfSeg_10", e.get(0).getSource().getName());
assertEquals("tok_154", e.get(0).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(1).getSource().getName());
assertEquals("tok_155", e.get(1).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(2).getSource().getName());
assertEquals("tok_156", e.get(2).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(3).getSource().getName());
assertEquals("tok_157", e.get(3).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(4).getSource().getName());
assertEquals("tok_158", e.get(4).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(5).getSource().getName());
assertEquals("tok_159", e.get(5).getTarget().getName());
assertEquals("Focus_newInfSeg_10", e.get(6).getSource().getName());
assertEquals("tok_160", e.get(6).getTarget().getName());
assertEquals("Focus_newInfSeg_9", e.get(7).getSource().getName());
assertEquals("tok_150", e.get(7).getTarget().getName());
assertEquals("Focus_newInfSeg_9", e.get(8).getSource().getName());
assertEquals("tok_151", e.get(8).getTarget().getName());
assertEquals("Focus_newInfSeg_9", e.get(9).getSource().getName());
assertEquals("tok_152", e.get(9).getTarget().getName());
assertEquals("Inf-StatSeg_29", e.get(10).getSource().getName());
assertEquals("tok_150", e.get(10).getTarget().getName());
assertEquals("Inf-StatSeg_29", e.get(11).getSource().getName());
assertEquals("tok_151", e.get(11).getTarget().getName());
assertEquals("Inf-StatSeg_30", e.get(12).getSource().getName());
assertEquals("tok_155", e.get(12).getTarget().getName());
assertEquals("NPSeg_29", e.get(13).getSource().getName());
assertEquals("tok_150", e.get(13).getTarget().getName());
assertEquals("NPSeg_29", e.get(14).getSource().getName());
assertEquals("tok_151", e.get(14).getTarget().getName());
assertEquals("NPSeg_30", e.get(15).getSource().getName());
assertEquals("tok_155", e.get(15).getTarget().getName());
assertEquals("PPSeg_7", e.get(16).getSource().getName());
assertEquals("tok_150", e.get(16).getTarget().getName());
assertEquals("PPSeg_7", e.get(17).getSource().getName());
assertEquals("tok_151", e.get(17).getTarget().getName());
assertEquals("SentSeg_10", e.get(18).getSource().getName());
assertEquals("tok_154", e.get(18).getTarget().getName());
assertEquals("SentSeg_10", e.get(19).getSource().getName());
assertEquals("tok_155", e.get(19).getTarget().getName());
assertEquals("SentSeg_10", e.get(20).getSource().getName());
assertEquals("tok_156", e.get(20).getTarget().getName());
assertEquals("SentSeg_10", e.get(21).getSource().getName());
assertEquals("tok_157", e.get(21).getTarget().getName());
assertEquals("SentSeg_10", e.get(22).getSource().getName());
assertEquals("tok_158", e.get(22).getTarget().getName());
assertEquals("SentSeg_10", e.get(23).getSource().getName());
assertEquals("tok_159", e.get(23).getTarget().getName());
assertEquals("SentSeg_10", e.get(24).getSource().getName());
assertEquals("tok_160", e.get(24).getTarget().getName());
assertEquals("SentSeg_10", e.get(25).getSource().getName());
assertEquals("tok_161", e.get(25).getTarget().getName());
assertEquals("SentSeg_9", e.get(26).getSource().getName());
assertEquals("tok_150", e.get(26).getTarget().getName());
assertEquals("SentSeg_9", e.get(27).getSource().getName());
assertEquals("tok_151", e.get(27).getTarget().getName());
assertEquals("SentSeg_9", e.get(28).getSource().getName());
assertEquals("tok_152", e.get(28).getTarget().getName());
assertEquals("SentSeg_9", e.get(29).getSource().getName());
assertEquals("tok_153", e.get(29).getTarget().getName());
// mmax, only control samples //
e = new ArrayList<>(g.getLayerByName("mmax").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(14, e.size());
assertEquals("primmarkSeg_60", e.get(1).getSource().getName());
assertEquals("tok_150", e.get(1).getTarget().getName());
assertEquals("sentenceSeg_50010", e.get(7).getSource().getName());
assertEquals("tok_158", e.get(7).getTarget().getName());
// tiger, only control samples //
e = new ArrayList<>(g.getLayerByName("tiger").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(17, e.size());
assertEquals("const_59", e.get(9).getSource().getName());
assertEquals("tok_160", e.get(9).getTarget().getName());
assertEquals("const_61", e.get(16).getSource().getName());
assertEquals("tok_156", e.get(16).getTarget().getName());
// urml, only control samples //
e = new ArrayList<>(g.getLayerByName("rst").get(0).getRelations());
Collections.sort(e, new EdgeComparator());
assertEquals(20, e.size());
assertEquals("u0", e.get(0).getSource().getName());
assertEquals("u28", e.get(0).getTarget().getName());
assertEquals("u11", e.get(5).getSource().getName());
assertEquals("tok_153", e.get(5).getTarget().getName());
}
@Test
public void testRelationType() throws SQLException
{
SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).
getDocumentGraph();
for (SRelation<? extends SNode,? extends SNode> r : g.getRelations())
{
if(!(r instanceof STextualRelation))
{
assertEquals(1, r.getLayers().size());
String layerName = r.getLayers().iterator().next().getName();
if ("exmaralda".equals(layerName) || "urml".equals(layerName) || "mmax".
equals(layerName))
{
assertTrue("instance of SSpanningRelation",
r instanceof SSpanningRelation);
}
else if ("dep".equals(layerName))
{
assertTrue("instance of SPointingRelation",
r instanceof SPointingRelation);
}
else if ("tiger".equals(layerName))
{
assertTrue("instance of SDominanceRelation",
r instanceof SDominanceRelation);
}
}
}
}
@Test
public void testMultipleTextGeneration() throws SQLException
{
SaltProject project = instance.extractData(resultSetProviderMultiText.getResultSet());
assertNotNull(project);
SDocumentGraph g = project.getCorpusGraphs().get(0)
.getDocuments().get(0).getDocumentGraph();
assertEquals(3, g.getTextualDSs().size());
}
public static class NameComparator implements Comparator<SNamedElement>
{
@Override
public int compare(SNamedElement arg0, SNamedElement arg1)
{
return arg0.getName().compareTo(arg1.getName());
}
}
public static class EdgeComparator implements Comparator<SRelation<SNode,SNode>>
{
@Override
public int compare(SRelation<SNode,SNode> arg0, SRelation<SNode,SNode> arg1)
{
int result = arg0.getSource().getName().compareTo(arg1.getSource().
getName());
if (result == 0)
{
result = arg0.getTarget().getName().compareTo(arg1.getTarget().
getName());
}
if (result == 0)
{
String t0 = arg0.getType();
String t1 = arg1.getType();
if (t0 == null && t1 == null)
{
result = 0;
}
else if (t0 == null && t1 != null)
{
result = -1;
}
else if (t0 != null && t1 == null)
{
result = +1;
}
else
{
result = t0.compareTo(t1);
}
}
return result;
}
}
}