/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.web;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.aksw.gerbil.annotator.AnnotatorConfiguration;
import org.aksw.gerbil.database.ExperimentDAO;
import org.aksw.gerbil.dataset.DatasetConfiguration;
import org.aksw.gerbil.datatypes.ExperimentTaskResult;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.matching.Matching;
import org.aksw.gerbil.utils.DatasetMetaData;
import org.aksw.gerbil.utils.DatasetMetaDataMapping;
import org.aksw.gerbil.utils.PearsonsSampleCorrelationCoefficient;
import org.aksw.gerbil.web.config.AdapterList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
@Controller
public class ExperimentOverviewController {
private static final Logger LOGGER = LoggerFactory.getLogger(ExperimentOverviewController.class);
private static final double NOT_AVAILABLE_SENTINAL = -2;
private static final int MIN_NUMBER_OF_VALUES_FOR_CORR_CALC = 5;
private static final String CORRELATION_TABLE_COLUMN_HEADINGS[] = { "number of documents", "avg. document length",
"number of entities", "entities per document", "entities per token", "amount of persons",
"amount of organizations", "amount of locations", "amount of others"/*
* ,
* "corr. based on # datasets"
*/};
@Autowired
@Qualifier("experimentDAO")
private ExperimentDAO dao;
@Autowired
@Qualifier("annotators")
private AdapterList<AnnotatorConfiguration> annotators;
@Autowired
@Qualifier("datasets")
private AdapterList<DatasetConfiguration> datasets;
@RequestMapping("/experimentoverview")
public @ResponseBody String experimentoverview(@RequestParam(value = "experimentType") String experimentType,
@RequestParam(value = "matching") String matchingString) {
LOGGER.debug("Got request on /experimentoverview(experimentType={}, matching={}", experimentType,
matchingString);
Matching matching = MainController.getMatching(matchingString);
ExperimentType eType = ExperimentType.valueOf(experimentType);
String annotatorNames[] = loadAnnotators(eType);
String datasetNames[] = loadDatasets(eType);
double results[][] = loadLatestResults(eType, matching, annotatorNames, datasetNames);
double correlations[][] = calculateCorrelations(results, datasetNames);
return generateJson(results, correlations, annotatorNames, datasetNames);
}
private double[][] loadLatestResults(ExperimentType experimentType, Matching matching, String[] annotatorNames,
String[] datasetNames) {
Map<String, Integer> annotator2Index = new HashMap<String, Integer>();
for (int i = 0; i < annotatorNames.length; ++i) {
annotator2Index.put(annotatorNames[i], i);
}
Map<String, Integer> dataset2Index = new HashMap<String, Integer>();
for (int i = 0; i < datasetNames.length; ++i) {
dataset2Index.put(datasetNames[i], i);
}
List<ExperimentTaskResult> expResults = dao.getLatestResultsOfExperiments(experimentType.name(),
matching.name(), annotatorNames, datasetNames);
double results[][] = new double[annotatorNames.length][datasetNames.length];
for (int i = 0; i < results.length; ++i) {
Arrays.fill(results[i], NOT_AVAILABLE_SENTINAL);
}
int row, col;
for (ExperimentTaskResult result : expResults) {
if (annotator2Index.containsKey(result.annotator) && dataset2Index.containsKey(result.dataset)) {
row = annotator2Index.get(result.annotator);
col = dataset2Index.get(result.dataset);
if (result.state == ExperimentDAO.TASK_FINISHED) {
results[row][col] = result.getMicroF1Measure();
} else {
results[row][col] = result.state;
}
}
}
return results;
}
private String[] loadAnnotators(ExperimentType eType) {
Set<String> annotatorNames = annotators.getAdapterNamesForExperiment(eType);
String annotatorNameArray[] = annotatorNames.toArray(new String[annotatorNames.size()]);
Arrays.sort(annotatorNameArray);
return annotatorNameArray;
}
private String[] loadDatasets(ExperimentType eType) {
Set<String> datasetNames = datasets.getAdapterNamesForExperiment(eType);
String datasetNameArray[] = datasetNames.toArray(new String[datasetNames.size()]);
Arrays.sort(datasetNameArray);
return datasetNameArray;
}
private double[][] calculateCorrelations(double[][] results, String datasetNames[]) {
DatasetMetaDataMapping mapping = DatasetMetaDataMapping.getInstance();
DatasetMetaData metadata[] = new DatasetMetaData[datasetNames.length];
for (int i = 0; i < datasetNames.length; ++i) {
metadata[i] = mapping.getMetaData(datasetNames[i]);
}
double correlations[][] = new double[results.length][CORRELATION_TABLE_COLUMN_HEADINGS.length];
DoubleArrayList annotatorResults = new DoubleArrayList(datasetNames.length);
DoubleArrayList numberOfDocuments = new DoubleArrayList(datasetNames.length);
DoubleArrayList avgDocumentLength = new DoubleArrayList(datasetNames.length);
DoubleArrayList numberOfEntities = new DoubleArrayList(datasetNames.length);
DoubleArrayList entitiesPerDoc = new DoubleArrayList(datasetNames.length);
DoubleArrayList entitiesPerToken = new DoubleArrayList(datasetNames.length);
DoubleArrayList amountOfPersons = new DoubleArrayList(datasetNames.length);
DoubleArrayList amountOfOrganizations = new DoubleArrayList(datasetNames.length);
DoubleArrayList amountOfLocations = new DoubleArrayList(datasetNames.length);
DoubleArrayList amountOfOthers = new DoubleArrayList(datasetNames.length);
double annotatorResultsAsArray[];
int elementCount;
for (int i = 0; i < correlations.length; ++i) {
Arrays.fill(correlations[i], NOT_AVAILABLE_SENTINAL);
// load the values for this annotator
annotatorResults.clear();
numberOfDocuments.clear();
avgDocumentLength.clear();
numberOfEntities.clear();
entitiesPerDoc.clear();
entitiesPerToken.clear();
amountOfPersons.clear();
amountOfOrganizations.clear();
amountOfLocations.clear();
amountOfOthers.clear();
for (int j = 0; j < results[i].length; ++j) {
if ((metadata[j] != null) && (results[i][j] >= 0)) {
annotatorResults.add(results[i][j]);
numberOfDocuments.add(metadata[j].numberOfDocuments);
avgDocumentLength.add(metadata[j].avgDocumentLength);
numberOfEntities.add(metadata[j].numberOfEntities);
entitiesPerDoc.add(metadata[j].entitiesPerDoc);
entitiesPerToken.add(metadata[j].entitiesPerToken);
amountOfPersons.add(metadata[j].amountOfPersons);
amountOfOrganizations.add(metadata[j].amountOfOrganizations);
amountOfLocations.add(metadata[j].amountOfLocations);
amountOfOthers.add(metadata[j].amountOfOthers);
}
}
// If we have enough datasets with metadata and results of the
// current annotator for these datasets
elementCount = annotatorResults.size();
if (elementCount > MIN_NUMBER_OF_VALUES_FOR_CORR_CALC) {
annotatorResultsAsArray = annotatorResults.toArray(new double[elementCount]);
correlations[i][0] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, numberOfDocuments.toArray(new double[elementCount]));
correlations[i][1] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, avgDocumentLength.toArray(new double[elementCount]));
correlations[i][2] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, numberOfEntities.toArray(new double[elementCount]));
correlations[i][3] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, entitiesPerDoc.toArray(new double[elementCount]));
correlations[i][4] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, entitiesPerToken.toArray(new double[elementCount]));
correlations[i][5] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, amountOfPersons.toArray(new double[elementCount]));
correlations[i][6] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, amountOfOrganizations.toArray(new double[elementCount]));
correlations[i][7] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, amountOfLocations.toArray(new double[elementCount]));
correlations[i][8] = PearsonsSampleCorrelationCoefficient.calculateRankCorrelation(
annotatorResultsAsArray, amountOfOthers.toArray(new double[elementCount]));
// correlations[i][9] = annotatorResultsAsArray.length;
}
}
return correlations;
}
private String generateJson(double[][] results, double[][] correlations, String annotatorNames[],
String datasetNames[]) {
StringBuilder jsonBuilder = new StringBuilder();
// jsonBuilder.append("results=");
jsonBuilder.append('[');
jsonBuilder.append(generateJSonTableString(results, datasetNames, annotatorNames, "Micro F1-measure"));
jsonBuilder.append(',');
jsonBuilder.append(generateJSonTableString(correlations, CORRELATION_TABLE_COLUMN_HEADINGS, annotatorNames,
"Correlations"));
jsonBuilder.append(']');
return jsonBuilder.toString();
}
private String generateJSonTableString(double values[][], String columnHeadings[], String lineHeadings[],
String tableName) {
StringBuilder dataBuilder = new StringBuilder();
dataBuilder.append("[[\"");
dataBuilder.append(tableName);
for (int i = 0; i < columnHeadings.length; ++i) {
dataBuilder.append("\",\"");
dataBuilder.append(columnHeadings[i]);
}
for (int i = 0; i < lineHeadings.length; ++i) {
dataBuilder.append("\"],\n[\"");
dataBuilder.append(lineHeadings[i]);
for (int j = 0; j < columnHeadings.length; ++j) {
dataBuilder.append("\",\"");
// if this is a real result
if (values[i][j] > NOT_AVAILABLE_SENTINAL) {
dataBuilder.append(String.format(Locale.US, "%.3f", values[i][j]));
} else {
// if this value is simply missing
if (values[i][j] == NOT_AVAILABLE_SENTINAL) {
dataBuilder.append("n.a.");
} else {
// this is an error value
dataBuilder.append("error (");
dataBuilder.append((int) values[i][j]);
dataBuilder.append(')');
}
}
}
}
dataBuilder.append("\"]]");
return dataBuilder.toString();
}
}