/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.web.config;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.aksw.gerbil.config.GerbilConfiguration;
import org.aksw.gerbil.dataset.Dataset;
import org.aksw.gerbil.dataset.DatasetConfiguration;
import org.aksw.gerbil.dataset.SingletonDatasetConfigImpl;
import org.aksw.gerbil.dataset.check.EntityCheckerManager;
import org.aksw.gerbil.dataset.datahub.DatahubNIFConfig;
import org.aksw.gerbil.dataset.datahub.DatahubNIFLoader;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.semantic.sameas.SameAsRetriever;
import org.aksw.gerbil.web.config.check.Checker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class DatasetsConfig {
private static final Logger LOGGER = LoggerFactory.getLogger(DatasetsConfig.class);
public static final String ANNOTATOR_CONFIGURATION_PREFIX = "org.aksw.gerbil.datasets.definition";
public static final String ANNOTATOR_CACHE_FLAG_SUFFIX = "cacheable";
public static final String ANNOTATOR_CLASS_SUFFIX = "class";
public static final String ANNOTATOR_CONSTRUCTOR_ARGS_SUFFIX = "constructorArgs";
public static final String ANNOTATOR_EXPERIMENT_TYPE_SUFFIX = "experimentType";
public static final String ANNOTATOR_NAME_SUFFIX = "name";
public static final String ANNOTATOR_CHECK_CLASS_SUFFIX = "check.class";
public static final String ANNOTATOR_CHECK_ARGS_SUFFIX = "check.args";
@Bean
public static AdapterList<DatasetConfiguration> datasets(EntityCheckerManager entityCheckerManager,
SameAsRetriever globalRetriever) {
List<DatasetConfiguration> datasetConfigurations = new ArrayList<DatasetConfiguration>();
Set<String> datasetKeys = getDatasetKeys();
DatasetConfiguration configuration;
for (String datasetKey : datasetKeys) {
try {
configuration = getConfiguration(datasetKey, entityCheckerManager, globalRetriever);
if (configuration != null) {
datasetConfigurations.add(configuration);
LOGGER.info("Found dataset configuration " + configuration.toString());
}
} catch (Exception e) {
LOGGER.error("Got an exception while trying to load configuration of \"" + datasetKey + "\" dataset: "
+ e.toString());
}
}
// load Datahub data
DatahubNIFLoader datahub = new DatahubNIFLoader();
Map<String, String> datasets = datahub.getDataSets();
for (String datasetName : datasets.keySet()) {
datasetConfigurations.add(new DatahubNIFConfig(datasetName, datasets.get(datasetName), true,
entityCheckerManager, globalRetriever));
}
LOGGER.info("Found {} datasets.", datasetConfigurations.size());
return new AdapterList<DatasetConfiguration>(datasetConfigurations);
}
private static Set<String> getDatasetKeys() {
Set<String> datasetKeys = new HashSet<String>();
@SuppressWarnings("rawtypes")
Iterator iterator = GerbilConfiguration.getInstance().getKeys(ANNOTATOR_CONFIGURATION_PREFIX);
String datasetKey;
int pos;
while (iterator.hasNext()) {
datasetKey = (String) iterator.next();
datasetKey = datasetKey.substring(ANNOTATOR_CONFIGURATION_PREFIX.length() + 1);
pos = datasetKey.indexOf('.');
if (pos > 0) {
datasetKey = datasetKey.substring(0, pos);
datasetKeys.add(datasetKey);
}
}
return datasetKeys;
}
private static DatasetConfiguration getConfiguration(String datasetKey, EntityCheckerManager entityCheckerManager,
SameAsRetriever globalRetriever) throws ClassNotFoundException, NoSuchMethodException, SecurityException {
org.apache.commons.configuration.Configuration config = GerbilConfiguration.getInstance();
StringBuilder keyBuilder = new StringBuilder();
String key;
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_NAME_SUFFIX);
if (!config.containsKey(key)) {
LOGGER.error("Couldn't get a name for the \"" + datasetKey + "\" dataset.");
return null;
}
String name = config.getString(key);
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_CLASS_SUFFIX);
if (!config.containsKey(key)) {
LOGGER.error("Couldn't get a class for the \"" + datasetKey + "\" dataset.");
return null;
}
String classString = config.getString(key);
@SuppressWarnings("unchecked")
Class<? extends Dataset> datasetClass = (Class<? extends Dataset>) DatasetsConfig.class.getClassLoader()
.loadClass(classString);
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_EXPERIMENT_TYPE_SUFFIX);
if (!config.containsKey(key)) {
LOGGER.error("Couldn't get a class for the \"" + datasetKey + "\" dataset.");
return null;
}
String typeString = config.getString(key);
ExperimentType type = ExperimentType.valueOf(typeString);
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_CACHE_FLAG_SUFFIX);
boolean cacheable = true;
if (config.containsKey(key)) {
cacheable = config.getBoolean(key);
}
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_CONSTRUCTOR_ARGS_SUFFIX);
String constructorArgStrings[];
if (config.containsKey(key)) {
constructorArgStrings = config.getStringArray(key);
} else {
constructorArgStrings = new String[0];
}
Object constructorArgs[] = new Object[constructorArgStrings.length];
Class<?> constructorArgClasses[] = new Class[constructorArgStrings.length];
for (int i = 0; i < constructorArgs.length; ++i) {
constructorArgs[i] = constructorArgStrings[i];
constructorArgClasses[i] = String.class;
}
Constructor<? extends Dataset> constructor = datasetClass.getConstructor(constructorArgClasses);
// If a checker class has been defined
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_CHECK_CLASS_SUFFIX);
if (config.containsKey(key)) {
String checkerClassName = config.getString(key);
// If checker arguments have been defined
key = buildKey(keyBuilder, datasetKey, ANNOTATOR_CHECK_ARGS_SUFFIX);
String checkerArgStrings[];
if (config.containsKey(key)) {
checkerArgStrings = config.getStringArray(key);
} else {
checkerArgStrings = new String[0];
}
Object checkerArgs[] = new Object[checkerArgStrings.length];
for (int i = 0; i < checkerArgs.length; ++i) {
checkerArgs[i] = checkerArgStrings[i];
}
try {
@SuppressWarnings("unchecked")
Class<? extends Checker> checkerClass = (Class<? extends Checker>) DatasetsConfig.class.getClassLoader()
.loadClass(checkerClassName);
Checker checker = checkerClass.newInstance();
if (!checker.check(checkerArgs)) {
LOGGER.info("Check for dataset \"{}\" failed. It won't be available.", name);
return null;
}
} catch (Exception e) {
LOGGER.error("Error while trying to run check for dataset \"" + name + "\". Returning null.", e);
}
}
// return new DatasetConfigurationImpl(name, cacheable, constructor,
// constructorArgs, type, entityCheckerManager);
return new SingletonDatasetConfigImpl(name, cacheable, constructor, constructorArgs, type, entityCheckerManager,
globalRetriever);
}
protected static String buildKey(StringBuilder keyBuilder, String annotatorKey, String suffix) {
keyBuilder.append(ANNOTATOR_CONFIGURATION_PREFIX);
keyBuilder.append('.');
keyBuilder.append(annotatorKey);
keyBuilder.append('.');
keyBuilder.append(suffix);
String key = keyBuilder.toString();
keyBuilder.delete(0, keyBuilder.length());
return key;
}
}