/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.execute;
import java.util.ArrayList;
import java.util.List;
import org.aksw.gerbil.annotator.A2KBAnnotator;
import org.aksw.gerbil.annotator.Annotator;
import org.aksw.gerbil.annotator.C2KBAnnotator;
import org.aksw.gerbil.annotator.D2KBAnnotator;
import org.aksw.gerbil.annotator.EntityRecognizer;
import org.aksw.gerbil.annotator.EntityTyper;
import org.aksw.gerbil.annotator.OKETask1Annotator;
import org.aksw.gerbil.annotator.OKETask2Annotator;
import org.aksw.gerbil.annotator.RT2KBAnnotator;
import org.aksw.gerbil.annotator.decorator.ErrorCountingAnnotatorDecorator;
import org.aksw.gerbil.annotator.decorator.SingleInstanceSecuringAnnotatorDecorator;
import org.aksw.gerbil.annotator.decorator.TimeMeasuringAnnotatorDecorator;
import org.aksw.gerbil.database.ExperimentDAO;
import org.aksw.gerbil.database.ResultNameToIdMapping;
import org.aksw.gerbil.dataset.Dataset;
import org.aksw.gerbil.dataset.DatasetConfiguration;
import org.aksw.gerbil.datatypes.ErrorTypes;
import org.aksw.gerbil.datatypes.ExperimentTaskConfiguration;
import org.aksw.gerbil.datatypes.ExperimentTaskResult;
import org.aksw.gerbil.datatypes.ExperimentTaskState;
import org.aksw.gerbil.evaluate.DoubleEvaluationResult;
import org.aksw.gerbil.evaluate.EvaluationResult;
import org.aksw.gerbil.evaluate.EvaluationResultContainer;
import org.aksw.gerbil.evaluate.Evaluator;
import org.aksw.gerbil.evaluate.EvaluatorFactory;
import org.aksw.gerbil.evaluate.IntEvaluationResult;
import org.aksw.gerbil.evaluate.SubTaskResult;
import org.aksw.gerbil.evaluate.impl.FMeasureCalculator;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.semantic.sameas.SameAsRetriever;
import org.aksw.gerbil.semantic.sameas.SameAsRetrieverUtils;
import org.aksw.gerbil.semantic.sameas.impl.MultipleSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.model.DatasetBasedSameAsRetriever;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.Marking;
import org.aksw.gerbil.transfer.nif.Meaning;
import org.aksw.gerbil.transfer.nif.MeaningSpan;
import org.aksw.gerbil.transfer.nif.Span;
import org.aksw.gerbil.transfer.nif.TypedSpan;
import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity;
import org.aksw.simba.topicmodeling.concurrent.tasks.Task;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is a single experiment designed as {@link Task} to be able to run
* several tasks in parallel.
*
* @author Michael Röder (roeder@informatik.uni-leipzig.de)
*
*/
public class ExperimentTask implements Task {
private static final Logger LOGGER = LoggerFactory.getLogger(ExperimentTask.class);
private ExperimentDAO experimentDAO;
private ExperimentTaskConfiguration configuration;
private int experimentTaskId;
private EvaluatorFactory evFactory;
private ExperimentTaskState taskState = null;
private AnnotatorOutputWriter annotatorOutputWriter = null;
private SameAsRetriever globalRetriever = null;
public ExperimentTask(int experimentTaskId, ExperimentDAO experimentDAO, SameAsRetriever globalRetriever,
org.aksw.gerbil.evaluate.EvaluatorFactory evFactory, ExperimentTaskConfiguration configuration) {
this.experimentDAO = experimentDAO;
this.configuration = configuration;
this.experimentTaskId = experimentTaskId;
this.evFactory = evFactory;
this.globalRetriever = globalRetriever;
}
@Override
public void run() {
LOGGER.info("Task started " + configuration.toString());
Annotator annotator = null;
Dataset dataset = null;
try {
// Create dataset
dataset = configuration.datasetConfig.getDataset(configuration.type);
if (dataset == null) {
throw new GerbilException("dataset=\"" + configuration.datasetConfig.getName() + "\" experimentType=\""
+ configuration.type.name() + "\".", ErrorTypes.DATASET_DOES_NOT_SUPPORT_EXPERIMENT);
}
// Create annotator
annotator = (Annotator) configuration.annotatorConfig.getAnnotator(configuration.type);
if (annotator == null) {
throw new GerbilException("annotator=\"" + configuration.annotatorConfig.getName()
+ "\" experimentType=\"" + configuration.type.name() + "\".",
ErrorTypes.ANNOTATOR_DOES_NOT_SUPPORT_EXPERIMENT);
}
Annotator decoratedAnnotator = annotator;
// Add decroatoring evaluators
TimeMeasuringAnnotatorDecorator timeMeasurer = TimeMeasuringAnnotatorDecorator.createDecorator(
configuration.type, decoratedAnnotator);
decoratedAnnotator = timeMeasurer;
ErrorCountingAnnotatorDecorator errorCounter = ErrorCountingAnnotatorDecorator.createDecorator(
configuration.type, decoratedAnnotator, dataset.size());
decoratedAnnotator = errorCounter;
decoratedAnnotator = SingleInstanceSecuringAnnotatorDecorator.createDecorator(configuration.type,
decoratedAnnotator);
List<Evaluator<?>> evaluators = new ArrayList<Evaluator<?>>();
evFactory.addEvaluators(evaluators, configuration, dataset);
evaluators.add(timeMeasurer);
evaluators.add(errorCounter);
// Prepare dataset for the experiment
// prepareDataset(dataset);
taskState = new ExperimentTaskState(dataset.size());
// perform experiment
EvaluationResult result = runExperiment(dataset, decoratedAnnotator, evaluators, taskState);
// create result object
// FIXME Fix this workaround
ExperimentTaskResult expResult = new ExperimentTaskResult(configuration, new double[6],
ExperimentDAO.TASK_FINISHED, 0);
transformResults(result, expResult);
// store result
experimentDAO.setExperimentTaskResult(experimentTaskId, expResult);
LOGGER.info("Task Finished " + configuration.toString());
} catch (GerbilException e) {
LOGGER.error("Got an error while running the task. Storing the error code in the db...", e);
// store error
experimentDAO.setExperimentState(experimentTaskId, e.getErrorType().getErrorCode());
} catch (Exception e) {
LOGGER.error("Error while trying to execute experiment.", e);
} finally {
IOUtils.closeQuietly(annotator);
IOUtils.closeQuietly(dataset);
}
}
/**
* Prepares the given dataset for the experiment, i.e., performs a sameAs
* retrieval if it is needed for the experiment type.
*
* @param dataset
* @deprecated This should be done by the {@link DatasetConfiguration} class
* that has loaded the dataset
*/
@Deprecated
protected void prepareDataset(Dataset dataset) {
switch (configuration.type) {
case A2KB:// falls through
case C2KB:
case D2KB:
case Rc2KB:
case Sa2KB:
case Sc2KB:
case OKE_Task1: // falls through
case OKE_Task2:
case ETyping: {
SameAsRetriever retriever = DatasetBasedSameAsRetriever.create(dataset);
if (retriever != null) {
if (globalRetriever != null) {
retriever = new MultipleSameAsRetriever(retriever, globalRetriever);
}
} else {
retriever = globalRetriever;
}
if (retriever != null) {
for (Document document : dataset.getInstances()) {
SameAsRetrieverUtils.addSameURIsToMarkings(retriever, document.getMarkings());
}
}
return;
}
case ERec:// falls through
default:
// nothing to do
return;
}
}
/**
* Prepares the given annotator results for the evaluation, i.e., performs a
* sameAs retrieval if it is needed for the experiment type.
*
* @param results
* @param annotatorSameAsRetriever
*/
@SuppressWarnings("deprecation")
protected void prepareAnnotatorResults(List<? extends List<? extends Meaning>> results,
SameAsRetriever annotatorSameAsRetriever) {
switch (configuration.type) {
case A2KB:// falls through
case C2KB:
case D2KB:
case Rc2KB:
case Sa2KB:
case Sc2KB:
case OKE_Task1: // falls through
case OKE_Task2:
case ETyping: {
if (annotatorSameAsRetriever != null) {
for (List<? extends Meaning> result : results) {
SameAsRetrieverUtils.addSameURIsToMeanings(annotatorSameAsRetriever, result);
}
}
return;
}
case ERec:// falls through
default:
// nothing to do
return;
}
}
protected void transformResults(EvaluationResult result, ExperimentTaskResult expResult) {
if (result instanceof SubTaskResult) {
ExperimentTaskResult subTask = new ExperimentTaskResult(((SubTaskResult) result).getConfiguration(),
new double[6], ExperimentDAO.TASK_FINISHED, 0);
List<EvaluationResult> tempResults = ((EvaluationResultContainer) result).getResults();
for (EvaluationResult tempResult : tempResults) {
transformResults(tempResult, subTask);
}
expResult.addSubTask(subTask);
} else if (result instanceof EvaluationResultContainer) {
List<EvaluationResult> tempResults = ((EvaluationResultContainer) result).getResults();
for (EvaluationResult tempResult : tempResults) {
transformResults(tempResult, expResult);
}
} else if (result instanceof DoubleEvaluationResult) {
switch (result.getName()) {
case FMeasureCalculator.MACRO_F1_SCORE_NAME: {
expResult.results[ExperimentTaskResult.MACRO_F1_MEASURE_INDEX] = ((DoubleEvaluationResult) result)
.getValueAsDouble();
return;
}
case FMeasureCalculator.MACRO_PRECISION_NAME: {
expResult.results[ExperimentTaskResult.MACRO_PRECISION_INDEX] = ((DoubleEvaluationResult) result)
.getValueAsDouble();
return;
}
case FMeasureCalculator.MACRO_RECALL_NAME: {
expResult.results[ExperimentTaskResult.MACRO_RECALL_INDEX] = ((DoubleEvaluationResult) result)
.getValueAsDouble();
return;
}
case FMeasureCalculator.MICRO_F1_SCORE_NAME: {
expResult.results[ExperimentTaskResult.MICRO_F1_MEASURE_INDEX] = ((DoubleEvaluationResult) result)
.getValueAsDouble();
return;
}
case FMeasureCalculator.MICRO_PRECISION_NAME: {
expResult.results[ExperimentTaskResult.MICRO_PRECISION_INDEX] = ((DoubleEvaluationResult) result)
.getValueAsDouble();
return;
}
case FMeasureCalculator.MICRO_RECALL_NAME: {
expResult.results[ExperimentTaskResult.MICRO_RECALL_INDEX] = ((DoubleEvaluationResult) result)
.getValueAsDouble();
return;
}
default: {
int id = ResultNameToIdMapping.getInstance().getResultId(result.getName());
if (id == ResultNameToIdMapping.UKNOWN_RESULT_TYPE) {
LOGGER.error("Got an unknown additional result \"" + result.getName() + "\". Discarding it.");
} else {
expResult.addAdditionalResult(id, ((DoubleEvaluationResult) result).getValueAsDouble());
}
}
}
return;
} else if (result instanceof IntEvaluationResult) {
if (result.getName().equals(ErrorCountingAnnotatorDecorator.ERROR_COUNT_RESULT_NAME)) {
expResult.errorCount = ((IntEvaluationResult) result).getValueAsInt();
return;
}
int id = ResultNameToIdMapping.getInstance().getResultId(result.getName());
if (id == ResultNameToIdMapping.UKNOWN_RESULT_TYPE) {
LOGGER.error("Got an unknown additional result \"" + result.getName() + "\". Discarding it.");
} else {
expResult.addAdditionalResult(id, ((IntEvaluationResult) result).getValueAsInt());
}
}
}
@SuppressWarnings({ "deprecation" })
protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator,
List<Evaluator<? extends Marking>> evaluators, ExperimentTaskState state) throws GerbilException {
EvaluationResult evalResult = null;
switch (configuration.type) {
case D2KB: {
try {
List<List<MeaningSpan>> results = new ArrayList<List<MeaningSpan>>(dataset.size());
List<List<MeaningSpan>> goldStandard = new ArrayList<List<MeaningSpan>>(dataset.size());
D2KBAnnotator linker = ((D2KBAnnotator) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a text and a list of Spans
results.add(linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)));
goldStandard.add(document.getMarkings(MeaningSpan.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
prepareAnnotatorResults(results, globalRetriever);
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case Sa2KB:
case A2KB: {
try {
List<List<MeaningSpan>> results = new ArrayList<List<MeaningSpan>>(dataset.size());
List<List<MeaningSpan>> goldStandard = new ArrayList<List<MeaningSpan>>(dataset.size());
A2KBAnnotator extractor = ((A2KBAnnotator) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a single text
results.add(extractor.performA2KBTask(DocumentInformationReducer.reduceToPlainText(document)));
goldStandard.add(document.getMarkings(MeaningSpan.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
prepareAnnotatorResults(results, globalRetriever);
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case C2KB: {
try {
List<List<Meaning>> results = new ArrayList<List<Meaning>>(dataset.size());
List<List<Meaning>> goldStandard = new ArrayList<List<Meaning>>(dataset.size());
C2KBAnnotator c2KBAnnotator = ((C2KBAnnotator) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a text and a list of Spans
results.add(c2KBAnnotator.performC2KB(DocumentInformationReducer.reduceToPlainText(document)));
goldStandard.add(document.getMarkings(Meaning.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
prepareAnnotatorResults(results, globalRetriever);
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case Sc2KB: // Falls through
case Rc2KB: {
throw new GerbilException(ErrorTypes.UNEXPECTED_EXCEPTION);
}
case ERec: {
try {
List<List<Span>> results = new ArrayList<List<Span>>(dataset.size());
List<List<Span>> goldStandard = new ArrayList<List<Span>>(dataset.size());
EntityRecognizer recognizer = ((EntityRecognizer) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a single text
results.add(recognizer.performRecognition(DocumentInformationReducer.reduceToPlainText(document)));
goldStandard.add(document.getMarkings(Span.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case ETyping: {
try {
List<List<TypedSpan>> results = new ArrayList<List<TypedSpan>>(dataset.size());
List<List<TypedSpan>> goldStandard = new ArrayList<List<TypedSpan>>(dataset.size());
EntityTyper typer = ((EntityTyper) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a text and a list of Spans
results.add(typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)));
goldStandard.add(document.getMarkings(TypedSpan.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case RT2KB: {
try {
List<List<TypedSpan>> results = new ArrayList<List<TypedSpan>>(dataset.size());
List<List<TypedSpan>> goldStandard = new ArrayList<List<TypedSpan>>(dataset.size());
RT2KBAnnotator extractor = (RT2KBAnnotator) annotator;
for (Document document : dataset.getInstances()) {
// reduce the document to a single text
results.add(extractor.performRT2KBTask(DocumentInformationReducer.reduceToPlainText(document)));
goldStandard.add(document.getMarkings(TypedSpan.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case OKE_Task1: {
try {
List<List<TypedNamedEntity>> results = new ArrayList<List<TypedNamedEntity>>(dataset.size());
List<List<TypedNamedEntity>> goldStandard = new ArrayList<List<TypedNamedEntity>>(dataset.size());
OKETask1Annotator okeTask1Annotator = ((OKETask1Annotator) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a text and a list of Spans
results.add(okeTask1Annotator.performTask1(DocumentInformationReducer
.reduceToTextAndSpans(document)));
goldStandard.add(document.getMarkings(TypedNamedEntity.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
prepareAnnotatorResults(results, globalRetriever);
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
case OKE_Task2: {
try {
List<List<TypedNamedEntity>> results = new ArrayList<List<TypedNamedEntity>>(dataset.size());
List<List<TypedNamedEntity>> goldStandard = new ArrayList<List<TypedNamedEntity>>(dataset.size());
OKETask2Annotator okeTask2Annotator = ((OKETask2Annotator) annotator);
for (Document document : dataset.getInstances()) {
// reduce the document to a text and a list of Spans
results.add(okeTask2Annotator.performTask2(DocumentInformationReducer
.reduceToTextAndEntities(document)));
goldStandard.add(document.getMarkings(TypedNamedEntity.class));
taskState.increaseExperimentStepCount();
}
if (annotatorOutputWriter != null) {
annotatorOutputWriter.storeAnnotatorOutput(configuration, results, dataset.getInstances());
}
prepareAnnotatorResults(results, globalRetriever);
evalResult = evaluate(evaluators, results, goldStandard);
} catch (GerbilException e) {
throw e;
} catch (Exception e) {
throw new GerbilException(e, ErrorTypes.UNEXPECTED_EXCEPTION);
}
break;
}
default:
throw new GerbilException("This experiment type isn't implemented yet. Sorry for this.",
ErrorTypes.UNEXPECTED_EXCEPTION);
}
return evalResult;
}
@SuppressWarnings("unchecked")
protected <T extends Marking> EvaluationResult evaluate(List<Evaluator<? extends Marking>> evaluators,
List<List<T>> annotatorResults, List<List<T>> goldStandard) {
EvaluationResultContainer evalResults = new EvaluationResultContainer();
for (Evaluator<? extends Marking> e : evaluators) {
((Evaluator<T>) e).evaluate(annotatorResults, goldStandard, evalResults);
}
return evalResults;
}
@Override
public String getId() {
return configuration.toString();
}
@Override
public String getProgress() {
if (taskState != null) {
return (taskState.getExperimentTaskProcess() * 100.0) + "% of dataset";
} else {
return null;
}
}
public void setAnnotatorOutputWriter(AnnotatorOutputWriter annotatorOutputWriter) {
this.annotatorOutputWriter = annotatorOutputWriter;
}
}