/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.execute;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.aksw.gerbil.annotator.AnnotatorConfiguration;
import org.aksw.gerbil.annotator.TestAnnotatorConfiguration;
import org.aksw.gerbil.annotator.decorator.ErrorCountingAnnotatorDecorator;
import org.aksw.gerbil.database.SimpleLoggingResultStoringDAO4Debugging;
import org.aksw.gerbil.dataset.Dataset;
import org.aksw.gerbil.dataset.DatasetConfiguration;
import org.aksw.gerbil.dataset.impl.nif.NIFFileDatasetConfig;
import org.aksw.gerbil.datatypes.ExperimentTaskConfiguration;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.evaluate.EvaluatorFactory;
import org.aksw.gerbil.evaluate.impl.ConfidenceBasedFMeasureCalculator;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.matching.Matching;
import org.aksw.gerbil.matching.impl.MatchingsCounterImpl;
import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier;
import org.aksw.gerbil.semantic.kb.UriKBClassifier;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.aksw.gerbil.transfer.nif.data.NamedEntity;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* This class tests the entity linking evaluation.
*
* @author Michael Röder <roeder@informatik.uni-leipzig.de>
*
*/
@RunWith(Parameterized.class)
public class FileBasedA2KBTest extends AbstractExperimentTaskTest {
private static final DatasetConfiguration GOLD_STD = new NIFFileDatasetConfig("OKE_Task1",
"src/test/resources/OKE_Challenge/example_data/task1.ttl", false, ExperimentType.A2KB, null, null);
private static final UriKBClassifier URI_KB_CLASSIFIER = new SimpleWhiteListBasedUriKBClassifier(
"http://dbpedia.org/resource/");
private static final ExperimentType EXPERIMENT_TYPE = ExperimentType.A2KB;
@BeforeClass
public static void setMatchingsCounterDebugFlag() {
MatchingsCounterImpl.setPrintDebugMsg(true);
ConfidenceBasedFMeasureCalculator.setPrintDebugMsg(true);
ErrorCountingAnnotatorDecorator.setPrintDebugMsg(true);
}
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
// The results of the NERD-ML annotator
testConfigs.add(new Object[] {
"src/test/resources/annotator_examples/NERD_ML-OKE_2015_Task_1_example_set-w-A2KB.ttl", GOLD_STD,
Matching.WEAK_ANNOTATION_MATCH,
new double[] { 0.2666666667, 0.2222222222, 0.2424242424, 0.5714285714, 1.0 / 3.0, 0.4210526316, 0 },
new double[] { 0.2222222222, 0.2222222222, 0.2222222222, 0.2857142857, 1.0 / 3.0, 0.3076923077, 0 } });
// The results of the FOX annotator
testConfigs
.add(new Object[] { "src/test/resources/annotator_examples/FOX-OKE_2015_Task_1_example_set-w-A2KB.ttl",
GOLD_STD, Matching.WEAK_ANNOTATION_MATCH,
new double[] { 0.8333333333, 0.5555555556, 0.6555555556, 0.7777777778, 0.5833333333,
0.6666666667, 0 },
new double[] { 0.8333333333, 0.5555555556, 0.6555555556, 0.7777777778, 0.5833333333,
0.6666666667, 0 } });
// The results of the DBpedia Spotlight annotator
testConfigs.add(new Object[] {
"src/test/resources/annotator_examples/DBpedia_Spotlight-OKE_2015_Task_1_example_set-w-A2KB.ttl",
GOLD_STD, Matching.WEAK_ANNOTATION_MATCH,
new double[] { 0.75, 0.4722222222, 0.5722222222, 0.6666666667, 0.5, 0.5714285714, 0 },
new double[] { 0.75, 0.4722222222, 0.5722222222, 0.6666666667, 0.5, 0.5714285714, 0 } });
return testConfigs;
}
private String annotatorFileName;
private DatasetConfiguration dataset;
private double expectedResults[];
private double expectedResultWithoutConfidence[];
private Matching matching;
public FileBasedA2KBTest(String annotatorFileName, DatasetConfiguration dataset, Matching matching,
double[] expectedResults, double[] expectedResultWithoutConfidence) {
this.annotatorFileName = annotatorFileName;
this.dataset = dataset;
this.expectedResults = expectedResults;
this.expectedResultWithoutConfidence = expectedResultWithoutConfidence;
this.matching = matching;
}
@Test
public void test() throws GerbilException {
int experimentTaskId = 1;
SimpleLoggingResultStoringDAO4Debugging experimentDAO = new SimpleLoggingResultStoringDAO4Debugging();
ExperimentTaskConfiguration configuration = new ExperimentTaskConfiguration(
loadAnnotatorFile(annotatorFileName, false), dataset, EXPERIMENT_TYPE, matching);
runTest(experimentTaskId, experimentDAO, new EvaluatorFactory(URI_KB_CLASSIFIER), configuration,
new F1MeasureTestingObserver(this, experimentTaskId, experimentDAO, expectedResults));
}
@Test
public void testWithoutConfidence() throws GerbilException {
int experimentTaskId = 1;
SimpleLoggingResultStoringDAO4Debugging experimentDAO = new SimpleLoggingResultStoringDAO4Debugging();
ExperimentTaskConfiguration configuration = new ExperimentTaskConfiguration(
loadAnnotatorFile(annotatorFileName, true), dataset, EXPERIMENT_TYPE, matching);
runTest(experimentTaskId, experimentDAO, new EvaluatorFactory(URI_KB_CLASSIFIER), configuration,
new F1MeasureTestingObserver(this, experimentTaskId, experimentDAO, expectedResultWithoutConfidence));
}
public AnnotatorConfiguration loadAnnotatorFile(String annotatorFileName, boolean eraseConfidenceValues)
throws GerbilException {
Dataset dataset = (new NIFFileDatasetConfig("ANNOTATOR", annotatorFileName, false, EXPERIMENT_TYPE, null, null))
.getDataset(EXPERIMENT_TYPE);
List<Document> instances;
if (eraseConfidenceValues) {
instances = new ArrayList<Document>(dataset.size());
Document newDoc;
for (Document originalDoc : dataset.getInstances()) {
newDoc = new DocumentImpl();
newDoc.setDocumentURI(originalDoc.getDocumentURI());
newDoc.setText(originalDoc.getText());
for (NamedEntity ne : originalDoc.getMarkings(NamedEntity.class)) {
newDoc.addMarking(new NamedEntity(ne.getStartPosition(), ne.getLength(), ne.getUris()));
}
instances.add(newDoc);
}
} else {
instances = dataset.getInstances();
}
return new TestAnnotatorConfiguration(instances, ExperimentType.A2KB);
}
}