/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.execute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.aksw.gerbil.annotator.TestAnnotatorConfiguration;
import org.aksw.gerbil.annotator.decorator.ErrorCountingAnnotatorDecorator;
import org.aksw.gerbil.database.SimpleLoggingResultStoringDAO4Debugging;
import org.aksw.gerbil.dataset.TestDataset;
import org.aksw.gerbil.datatypes.ExperimentTaskConfiguration;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.evaluate.EvaluatorFactory;
import org.aksw.gerbil.evaluate.impl.ConfidenceBasedFMeasureCalculator;
import org.aksw.gerbil.matching.Matching;
import org.aksw.gerbil.matching.impl.MatchingsCounterImpl;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.Marking;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.aksw.gerbil.transfer.nif.data.NamedEntity;
import org.aksw.gerbil.transfer.nif.data.SpanImpl;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class EntityRecognitionTest extends AbstractExperimentTaskTest {
@BeforeClass
public static void setMatchingsCounterDebugFlag() {
MatchingsCounterImpl.setPrintDebugMsg(true);
ConfidenceBasedFMeasureCalculator.setPrintDebugMsg(true);
ErrorCountingAnnotatorDecorator.setPrintDebugMsg(true);
}
private static final String TEXTS[] = new String[] { "Amy Winehouse is dead after a suspected drug overdose",
"Angelina, her father Jon, and her partner Brad never played together in the same movie." };
private static final Document GOLD_STD[] = new Document[] {
new DocumentImpl(TEXTS[0], "doc-0",
Arrays.asList((Marking) new NamedEntity(0, 13,
"http://www.aksw.org/gerbil/test-document/Amy_Winehouse"))),
new DocumentImpl(TEXTS[1], "doc-1",
Arrays.asList((Marking) new NamedEntity(21, 3, "http://www.aksw.org/gerbil/test-document/Jon"),
(Marking) new NamedEntity(0, 8, "http://www.aksw.org/gerbil/test-document/Angelina"),
(Marking) new NamedEntity(42, 4, "http://www.aksw.org/gerbil/test-document/Brad"))) };
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
// The recognizer found everything, but marked the word "Movie"
// additionally.
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0", Arrays.asList((Marking) new SpanImpl(0, 13))),
new DocumentImpl(TEXTS[1], "doc-1",
Arrays.asList((Marking) new SpanImpl(0, 8), (Marking) new SpanImpl(21, 3),
(Marking) new SpanImpl(42, 4), (Marking) new SpanImpl(81, 5))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH,
new double[] { 1.75 / 2.0, 1.0, ((1.5 / 1.75) + 1.0) / 2.0, 0.8, 1.0, (1.6 / 1.8), 0 } });
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0", Arrays.asList((Marking) new SpanImpl(0, 13))),
new DocumentImpl(TEXTS[1], "doc-1",
Arrays.asList((Marking) new SpanImpl(0, 8), (Marking) new SpanImpl(21, 3),
(Marking) new SpanImpl(42, 4), (Marking) new SpanImpl(81, 5))) },
GOLD_STD, Matching.WEAK_ANNOTATION_MATCH,
new double[] { 1.75 / 2.0, 1.0, ((1.5 / 1.75) + 1.0) / 2.0, 0.8, 1.0, (1.6 / 1.8), 0 } });
// The Recognizer couldn't find "Amy Winehouse" but "Winehouse". In the
// second sentence it coudn't identify Angelina.
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0", Arrays.asList((Marking) new SpanImpl(4, 9))),
new DocumentImpl(TEXTS[1], "doc-1",
Arrays.asList((Marking) new SpanImpl(21, 3), (Marking) new SpanImpl(42, 4))) },
GOLD_STD, Matching.STRONG_ANNOTATION_MATCH,
new double[] { 0.5, 1.0 / 3.0, (4.0 / 5.0) / 2.0, 2.0 / 3.0, 0.5, (4.0 / 7.0), 0 } });
testConfigs.add(new Object[] {
new Document[] { new DocumentImpl(TEXTS[0], "doc-0", Arrays.asList((Marking) new SpanImpl(4, 9))),
new DocumentImpl(TEXTS[1], "doc-1",
Arrays.asList((Marking) new SpanImpl(21, 3), (Marking) new SpanImpl(42, 4))) },
GOLD_STD, Matching.WEAK_ANNOTATION_MATCH,
new double[] { 1.0, 5.0 / 6.0, (1.0 + (4.0 / 5.0)) / 2.0, 1.0, 0.75, (1.5 / 1.75), 0 } });
return testConfigs;
}
private Document annotatorResults[];
private Document goldStandards[];
private double expectedResults[];
private Matching matching;
public EntityRecognitionTest(Document[] annotatorResults, Document[] goldStandards, Matching matching,
double[] expectedResults) {
this.annotatorResults = annotatorResults;
this.goldStandards = goldStandards;
this.expectedResults = expectedResults;
this.matching = matching;
}
@Test
public void test() {
int experimentTaskId = 1;
SimpleLoggingResultStoringDAO4Debugging experimentDAO = new SimpleLoggingResultStoringDAO4Debugging();
ExperimentTaskConfiguration configuration = new ExperimentTaskConfiguration(
new TestAnnotatorConfiguration(Arrays.asList(annotatorResults), ExperimentType.ERec),
new TestDataset(Arrays.asList(goldStandards), ExperimentType.ERec), ExperimentType.ERec, matching);
runTest(experimentTaskId, experimentDAO, null, new EvaluatorFactory(), configuration,
new F1MeasureTestingObserver(this, experimentTaskId, experimentDAO, expectedResults));
}
}