/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.web.config;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.aksw.gerbil.config.GerbilConfiguration;
import org.aksw.gerbil.dataset.check.EntityCheckerManager;
import org.aksw.gerbil.dataset.check.impl.EntityCheckerManagerImpl;
import org.aksw.gerbil.dataset.check.impl.FileBasedCachingEntityCheckerManager;
import org.aksw.gerbil.dataset.check.impl.HttpBasedEntityChecker;
import org.aksw.gerbil.dataset.check.impl.InMemoryCachingEntityCheckerManager;
import org.aksw.gerbil.dataset.check.index.IndexBasedEntityChecker;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.evaluate.EvaluatorFactory;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.execute.AnnotatorOutputWriter;
import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier;
import org.aksw.gerbil.semantic.kb.UriKBClassifier;
import org.aksw.gerbil.semantic.sameas.SameAsRetriever;
import org.aksw.gerbil.semantic.sameas.SingleUriSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.CrawlingSameAsRetrieverDecorator;
import org.aksw.gerbil.semantic.sameas.impl.DomainBasedSameAsRetrieverManager;
import org.aksw.gerbil.semantic.sameas.impl.ErrorFixingSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.UriEncodingHandlingSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.UriFilteringSameAsRetrieverDecorator;
import org.aksw.gerbil.semantic.sameas.impl.cache.FileBasedCachingSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.cache.InMemoryCachingSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.http.HTTPBasedSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.index.IndexBasedSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.wiki.WikiDbPediaBridgingSameAsRetriever;
import org.aksw.gerbil.semantic.sameas.impl.wiki.WikipediaApiBasedSingleUriSameAsRetriever;
import org.aksw.gerbil.semantic.subclass.ClassHierarchyLoader;
import org.aksw.gerbil.semantic.subclass.SimpleSubClassInferencer;
import org.aksw.gerbil.semantic.subclass.SubClassInferencer;
import org.aksw.gerbil.utils.ConsoleLogger;
import org.aksw.simba.topicmodeling.concurrent.overseers.pool.DefeatableOverseer;
import org.aksw.simba.topicmodeling.concurrent.overseers.pool.ExecutorBasedOverseer;
import org.aksw.simba.topicmodeling.concurrent.reporter.LogReporter;
import org.aksw.simba.topicmodeling.concurrent.reporter.Reporter;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConversionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.PropertySource;
import org.springframework.context.support.PropertySourcesPlaceholderConfigurer;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
/**
* This is the root {@link Configuration} class that is processed by the Spring
* framework and performs the following configurations:
* <ul>
* <li>Loads the properties file \"gerbil.properties\"</li>
* <li>Starts a component scan inside the package
* <code>org.aksw.gerbil.web.config</code> searching for other
* {@link Configuration}s</li>
* <li>Replaces the streams used by <code>System.out</code> and
* <code>System.err</code> by two {@link ConsoleLogger} objects. (This is a very
* ugly workaround that should be fixed in the near future)</li>
* </ul>
*
* @author Michael Röder (roeder@informatik.uni-leipzig.de)
* @author Lars Wesemann
* @author Didier Cherix
*
*/
@SuppressWarnings("deprecation")
@org.springframework.context.annotation.Configuration
@ComponentScan(basePackages = "org.aksw.gerbil.web.config")
@PropertySource("gerbil.properties")
public class RootConfig {
private static final Logger LOGGER = LoggerFactory.getLogger(RootConfig.class);
private static final int DEFAULT_NUMBER_OF_WORKERS = 20;
private static final String NUMBER_OF_WORKERS_KEY = "org.aksw.gerbil.web.config.overseerWorkers";
private static final String SAME_AS_CACHE_FILE_KEY = "org.aksw.gerbil.semantic.sameas.CachingSameAsRetriever.cacheFile";
private static final String SAME_AS_IN_MEMORY_CACHE_SIZE_KEY = "org.aksw.gerbil.semantic.sameas.InMemoryCachingSameAsRetriever.cacheSize";
private static final String ANNOTATOR_OUTPUT_WRITER_USAGE_KEY = "org.aksw.gerbil.execute.AnnotatorOutputWriter.printAnnotatorResults";
private static final String ANNOTATOR_OUTPUT_WRITER_DIRECTORY_KEY = "org.aksw.gerbil.execute.AnnotatorOutputWriter.outputDirectory";
private static final String HTTP_SAME_AS_RETRIEVAL_DOMAIN_KEY = "org.aksw.gerbil.semantic.sameas.impl.http.HTTPBasedSameAsRetriever.domain";
private static final String ENTITY_CHECKING_MANAGER_USE_PERSISTENT_CACHE_KEY = "org.aksw.gerbil.dataset.check.EntityCheckerManagerImpl.usePersistentCache";
private static final String ENTITY_CHECKING_MANAGER_PERSISTENT_CACHE_FILE_NAME_KEY = "org.aksw.gerbil.dataset.check.FileBasedCachingEntityCheckerManager.cacheFile";
private static final String ENTITY_CHECKING_MANAGER_PERSISTENT_CACHE_DURATION_KEY = "org.aksw.gerbil.dataset.check.FileBasedCachingEntityCheckerManager.cacheDuration";
private static final String ENTITY_CHECKING_MANAGER_IN_MEM_CACHE_SIZE_KEY = "org.aksw.gerbil.dataset.check.InMemoryCachingEntityCheckerManager.cacheSize";
private static final String ENTITY_CHECKING_MANAGER_IN_MEM_CACHE_DURATION_KEY = "org.aksw.gerbil.dataset.check.InMemoryCachingEntityCheckerManager.cacheDuration";
private static final String HTTP_BASED_ENTITY_CHECKING_NAMESPACE_KEY = "org.aksw.gerbil.dataset.check.HttpBasedEntityChecker.namespace";
private static final String INDEX_BASED_ENTITY_CHECKING_CONFIG_KEY_START = "org.aksw.gerbil.dataset.check.IndexBasedEntityChecker";
private static final String WIKIPEDIA_BASED_SAME_AS_RETRIEVAL_DOMAIN_KEY = "org.aksw.gerbil.semantic.sameas.impl.wiki.WikipediaApiBasedSingleUriSameAsRetriever.domain";
private static final String SAME_AS_RETRIEVAL_DOMAIN_BLACKLIST_KEY = "org.aksw.gerbil.semantic.sameas.impl.UriFilteringSameAsRetrieverDecorator.domainBlacklist";
private static final String INDEXED_BASED_SAME_AS_RETRIEVER_FOLDER_KEY = "org.aksw.gerbil.semantic.sameas.impl.index.IndexBasedSameAsRetriever.folder";
private static final String INDEXED_BASED_SAME_AS_RETRIEVER_DOMAIN_KEY = "org.aksw.gerbil.semantic.sameas.impl.index.IndexBasedSameAsRetriever.domain";
private static final String AVAILABLE_EXPERIMENT_TYPES_KEY = "org.aksw.gerbil.web.MainController.availableExperimentTypes";
private static final String DEFAULT_WELL_KNOWN_KBS_PARAMETER_KEY = "org.aksw.gerbil.evaluate.DefaultWellKnownKB";
static @Bean public PropertySourcesPlaceholderConfigurer myPropertySourcesPlaceholderConfigurer() {
PropertySourcesPlaceholderConfigurer p = new PropertySourcesPlaceholderConfigurer();
Resource[] resourceLocations = new Resource[] { new ClassPathResource("gerbil.properties"), };
p.setLocations(resourceLocations);
return p;
}
public static @Bean DefeatableOverseer createOverseer() {
int numberOfWorkers = DEFAULT_NUMBER_OF_WORKERS;
if (GerbilConfiguration.getInstance().containsKey(NUMBER_OF_WORKERS_KEY)) {
try {
numberOfWorkers = GerbilConfiguration.getInstance().getInt(NUMBER_OF_WORKERS_KEY);
} catch (Exception e) {
LOGGER.warn("Couldn't load number of workers from config. Using the default number.", e);
}
} else {
LOGGER.warn("Couldn't load number of workers from config. Using the default number.");
}
DefeatableOverseer overseer = new ExecutorBasedOverseer(numberOfWorkers);
@SuppressWarnings("unused")
Reporter reporter = new LogReporter(overseer);
return overseer;
}
public static @Bean SubClassInferencer createSubClassInferencer() {
Model classModel = ModelFactory.createDefaultModel();
String hierarchyFiles[] = GerbilConfiguration.getInstance()
.getStringArray("org.aksw.gerbil.semantic.subclass.SubClassInferencer.classHierarchyFiles");
ClassHierarchyLoader loader = new ClassHierarchyLoader();
for (int i = 0; i < hierarchyFiles.length; i += 3) {
try {
loader.loadClassHierarchy(new File(hierarchyFiles[i]), hierarchyFiles[i + 1], hierarchyFiles[i + 2],
classModel);
} catch (IOException e) {
LOGGER.error("Got an exception while trying to load the class hierarchy from the file \""
+ hierarchyFiles[i] + "\" encoded with \"" + hierarchyFiles[i + 1] + "\" using the base URI \""
+ hierarchyFiles[i + 2] + "\".", e);
}
}
return new SimpleSubClassInferencer(classModel);
}
public static @Bean SameAsRetriever createSameAsRetriever() {
DomainBasedSameAsRetrieverManager retrieverManager = new DomainBasedSameAsRetrieverManager();
retrieverManager.addStaticRetriever(new ErrorFixingSameAsRetriever());
retrieverManager.addStaticRetriever(new UriEncodingHandlingSameAsRetriever());
// HTTP based same as retrieval
HTTPBasedSameAsRetriever httpRetriever = null;
if (GerbilConfiguration.getInstance().containsKey(HTTP_SAME_AS_RETRIEVAL_DOMAIN_KEY)) {
httpRetriever = new HTTPBasedSameAsRetriever();
for (String domain : GerbilConfiguration.getInstance().getStringArray(HTTP_SAME_AS_RETRIEVAL_DOMAIN_KEY)) {
retrieverManager.addDomainSpecificRetriever(domain, httpRetriever);
}
}
// If there is an index based same as retriever available
if (GerbilConfiguration.getInstance().containsKey(INDEXED_BASED_SAME_AS_RETRIEVER_FOLDER_KEY)) {
SameAsRetriever retriever;
try {
retriever = new IndexBasedSameAsRetriever(
GerbilConfiguration.getInstance().getString(INDEXED_BASED_SAME_AS_RETRIEVER_FOLDER_KEY));
} catch (GerbilException e) {
LOGGER.error("Could not load Index Retriever. using HTTPBasedSameAs Retriever instead");
if (httpRetriever == null) {
retriever = new HTTPBasedSameAsRetriever();
} else {
retriever = httpRetriever;
}
}
for (String domain : GerbilConfiguration.getInstance()
.getStringArray(INDEXED_BASED_SAME_AS_RETRIEVER_DOMAIN_KEY)) {
retrieverManager.addDomainSpecificRetriever(domain, retriever);
}
}
// Wikipedia API based same as retrieval
if (GerbilConfiguration.getInstance().containsKey(WIKIPEDIA_BASED_SAME_AS_RETRIEVAL_DOMAIN_KEY)) {
SingleUriSameAsRetriever singleRetriever = new WikipediaApiBasedSingleUriSameAsRetriever();
for (String domain : GerbilConfiguration.getInstance()
.getStringArray(WIKIPEDIA_BASED_SAME_AS_RETRIEVAL_DOMAIN_KEY)) {
retrieverManager.addDomainSpecificRetriever(domain, singleRetriever);
}
}
// Wikipedia to DBpedia URI translation
(new WikiDbPediaBridgingSameAsRetriever()).addToManager(retrieverManager);
// The manager is ready
SameAsRetriever sameAsRetriever = retrieverManager;
// same as retrieval domain blacklist
if (GerbilConfiguration.getInstance().containsKey(SAME_AS_RETRIEVAL_DOMAIN_BLACKLIST_KEY)) {
sameAsRetriever = new UriFilteringSameAsRetrieverDecorator(sameAsRetriever,
GerbilConfiguration.getInstance().getStringArray(SAME_AS_RETRIEVAL_DOMAIN_BLACKLIST_KEY));
}
// same as crawling
sameAsRetriever = new CrawlingSameAsRetrieverDecorator(sameAsRetriever);
SameAsRetriever decoratedRetriever = null;
if (GerbilConfiguration.getInstance().containsKey(SAME_AS_CACHE_FILE_KEY)) {
decoratedRetriever = FileBasedCachingSameAsRetriever.create(sameAsRetriever, false,
new File(GerbilConfiguration.getInstance().getString(SAME_AS_CACHE_FILE_KEY)));
}
if (decoratedRetriever == null) {
LOGGER.warn("Couldn't create file based cache for sameAs retrieving. Trying to create in Memory cache.");
if (GerbilConfiguration.getInstance().containsKey(SAME_AS_IN_MEMORY_CACHE_SIZE_KEY)) {
try {
int cacheSize = GerbilConfiguration.getInstance().getInt(SAME_AS_IN_MEMORY_CACHE_SIZE_KEY);
decoratedRetriever = new InMemoryCachingSameAsRetriever(sameAsRetriever, cacheSize);
} catch (ConversionException e) {
LOGGER.warn(
"Exception while trying to load parameter \"" + SAME_AS_IN_MEMORY_CACHE_SIZE_KEY + "\".",
e);
}
}
if (decoratedRetriever == null) {
LOGGER.info("Using default cache size for sameAs link in memory cache.");
sameAsRetriever = new InMemoryCachingSameAsRetriever(sameAsRetriever);
} else {
sameAsRetriever = decoratedRetriever;
decoratedRetriever = null;
}
} else {
sameAsRetriever = decoratedRetriever;
decoratedRetriever = null;
}
return sameAsRetriever;
}
public static @Bean EvaluatorFactory createEvaluatorFactory(SubClassInferencer inferencer) {
return new EvaluatorFactory(inferencer);
}
public static AnnotatorOutputWriter getAnnotatorOutputWriter() {
if (GerbilConfiguration.getInstance().containsKey(ANNOTATOR_OUTPUT_WRITER_USAGE_KEY)
&& GerbilConfiguration.getInstance().getBoolean(ANNOTATOR_OUTPUT_WRITER_USAGE_KEY)
&& GerbilConfiguration.getInstance().containsKey(ANNOTATOR_OUTPUT_WRITER_DIRECTORY_KEY)) {
return new AnnotatorOutputWriter(
GerbilConfiguration.getInstance().getString(ANNOTATOR_OUTPUT_WRITER_DIRECTORY_KEY));
} else {
return null;
}
}
@SuppressWarnings("unchecked")
public static @Bean EntityCheckerManager getEntityCheckerManager() {
EntityCheckerManager manager = null;
Configuration config = GerbilConfiguration.getInstance();
if (config.containsKey(ENTITY_CHECKING_MANAGER_USE_PERSISTENT_CACHE_KEY)
&& config.getBoolean(ENTITY_CHECKING_MANAGER_USE_PERSISTENT_CACHE_KEY)
&& config.containsKey(ENTITY_CHECKING_MANAGER_PERSISTENT_CACHE_DURATION_KEY)) {
LOGGER.info("Using file based cache for entity checking.");
try {
long duration = config.getLong(ENTITY_CHECKING_MANAGER_PERSISTENT_CACHE_DURATION_KEY);
String cacheFile = config.getString(ENTITY_CHECKING_MANAGER_PERSISTENT_CACHE_FILE_NAME_KEY);
manager = FileBasedCachingEntityCheckerManager.create(duration, new File(cacheFile));
} catch (ConversionException e) {
LOGGER.error("Exception while parsing parameter.", e);
}
}
if ((manager == null) && config.containsKey(ENTITY_CHECKING_MANAGER_IN_MEM_CACHE_SIZE_KEY)
&& config.containsKey(ENTITY_CHECKING_MANAGER_IN_MEM_CACHE_DURATION_KEY)) {
LOGGER.info("Using in-memory based cache for entity checking.");
try {
int cacheSize = config.getInt(ENTITY_CHECKING_MANAGER_IN_MEM_CACHE_SIZE_KEY);
long duration = config.getLong(ENTITY_CHECKING_MANAGER_IN_MEM_CACHE_DURATION_KEY);
manager = new InMemoryCachingEntityCheckerManager(cacheSize, duration);
} catch (Exception e) {
LOGGER.error("Exception while parsing parameter. Creating default EntityCheckerManagerImpl.", e);
manager = new EntityCheckerManagerImpl();
}
}
if (manager == null) {
manager = new EntityCheckerManagerImpl();
}
List<String> namespaces = config.getList(HTTP_BASED_ENTITY_CHECKING_NAMESPACE_KEY);
if (!namespaces.isEmpty()) {
for (String namespace : namespaces) {
manager.registerEntityChecker(namespace.toString(), new HttpBasedEntityChecker(namespace.toString()));
}
}
@SuppressWarnings("rawtypes")
Iterator keyIterator = config.getKeys(INDEX_BASED_ENTITY_CHECKING_CONFIG_KEY_START);
while (keyIterator.hasNext()) {
String key = keyIterator.next().toString();
namespaces = config.getList(key);
if (!namespaces.isEmpty()) {
// the first "namespace" is the directory of the index
IndexBasedEntityChecker indexBasedChecker = IndexBasedEntityChecker.create(namespaces.get(0));
if (indexBasedChecker != null) {
boolean first = true;
for (String namespace : namespaces) {
if (first) {
first = false;
} else {
manager.registerEntityChecker(namespace.toString(), indexBasedChecker);
}
}
} else {
LOGGER.error(
"Couldn't create index based entity checker for index \"{}\". Creating HTTP based checker.",
namespaces.get(0));
// use HTTP based checker
for (String namespace : namespaces) {
manager.registerEntityChecker(namespace.toString(),
new HttpBasedEntityChecker(namespace.toString()));
}
}
}
}
return manager;
}
public static ExperimentType[] getAvailableExperimentTypes() {
Configuration config = GerbilConfiguration.getInstance();
Set<ExperimentType> types = new HashSet<ExperimentType>();
if (config.containsKey(AVAILABLE_EXPERIMENT_TYPES_KEY)) {
String typeNames[] = config.getStringArray(AVAILABLE_EXPERIMENT_TYPES_KEY);
ExperimentType type = null;
for (int i = 0; i < typeNames.length; ++i) {
try {
type = ExperimentType.valueOf(typeNames[i]);
types.add(type);
} catch (IllegalArgumentException e) {
LOGGER.warn(
"Couldn't find the experiment type \"{}\" defined in the properties file. It will be ignored.",
typeNames[i]);
}
}
}
if (types.size() == 0) {
LOGGER.error(
"Couldn't load the list of available experiment types. This GERBIL instance won't work as expected. Please define a list of experiment types using the {} key in the configuration file.",
AVAILABLE_EXPERIMENT_TYPES_KEY);
return new ExperimentType[0];
} else {
ExperimentType typesArray[] = types.toArray(new ExperimentType[types.size()]);
Arrays.sort(typesArray);
return typesArray;
}
}
public static UriKBClassifier createDefaultUriKBClassifier() {
return new SimpleWhiteListBasedUriKBClassifier(loadDefaultKBs());
}
public static String[] loadDefaultKBs() {
String kbs[] = GerbilConfiguration.getInstance().getStringArray(DEFAULT_WELL_KNOWN_KBS_PARAMETER_KEY);
if (kbs == null) {
LOGGER.error("Couldn't load the list of well known KBs. This GERBIL instance might not work as expected!");
}
return kbs;
}
public static int getNoOfWorkers() {
int numberOfWorkers = DEFAULT_NUMBER_OF_WORKERS;
if (GerbilConfiguration.getInstance().containsKey(NUMBER_OF_WORKERS_KEY)) {
try {
numberOfWorkers = GerbilConfiguration.getInstance().getInt(NUMBER_OF_WORKERS_KEY);
} catch (Exception e) {
// LOGGER.warn("Couldn't load number of workers from config.
// Using the default number.", e);
}
} else {
// LOGGER.warn("Couldn't load number of workers from config. Using
// the default number.");
}
return numberOfWorkers;
}
}