/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.core;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConstants;
import org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl;
import org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser;
import org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Factory used to create {@link Indexer} instances
* @author Rupert Westenthaler
*
*/
public final class IndexerFactory {
private static final Logger log = LoggerFactory.getLogger(IndexerFactory.class);
/**
* singleton instance
*/
private static IndexerFactory instance = null;
/**
* Singleton constructor
*/
private IndexerFactory(){
//Singleton pattern
}
/**
* Getter for the singleton of this factory
* @return the singleton
*/
public static IndexerFactory getInstance(){
if(instance == null){
instance = new IndexerFactory();
}
return instance;
}
/**
* Creates an {@link Indexer} instance based on the configuration
* relative to the working directory.<p>
* The configuration is expected within "{workingdir}/indexing"
* @return The configured Indexer or an Exception when the configuration was
* not found or is not valid
*/
public Indexer create() {
return create(null);
}
/**
* Creates an {@link Indexer} instance based on the configuration. The
* configuration is expected within the "/indexing" directory of the parsed
* location.<p>
* In case a relative path is parsed the current working directory is used
* as context. That means that the configuration is expected within
* folder "{workingDir}/{parsedPath}/indexing". For absolute paths the
* configuration is expected at "{parsedPath}/indexing".
* @return The configured Indexer or an Exception when the configuration was
* not found or is not valid
*/
public Indexer create(String dir){
return create(dir, null);
}
/**
* Internally used for unit testing. Allows to parse an offset for loading
* the indexer configuration from the classpath. Currently a protected
* feature, but might be moved to the public API at a later point of time.
* (would allow to include multiple default configurations via the
* classpath).
* @param dir
* @param classpathOffset
* @return
*/
protected Indexer create(String dir,String classpathOffset){
Indexer indexer;
IndexingConfig config;
if(classpathOffset != null){
config= new IndexingConfig(dir,classpathOffset){};
} else {
config= new IndexingConfig(dir);
}
//get the mode based on the configured IndexingComponents
String name = config.getName();
EntityDataIterable dataIterable = config.getDataIterable();
EntityIterator idIterator = config.getEntityIdIterator();
EntityDataProvider dataProvider = config.getEntityDataProvider();
EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
IndexingDestination destination = config.getIndexingDestination();
if(destination == null){
log.error("The indexing configuration does not provide an " +
"indexing destination. This needs to be configured by the key " +
"'{}' in the indexing.properties within the directory {}",
IndexingConstants.KEY_INDEXING_DESTINATION,config.getConfigFolder());
throw new IllegalArgumentException("No IndexingDestination present");
}
List<EntityProcessor> processors = config.getEntityProcessors();
if(processors == null){
log.error("The indexing configuration does not provide an " +
"entity processor. This needs to be configured by the key " +
"'{}' in the indexing.properties within the directory {}",
IndexingConstants.KEY_ENTITY_PROCESSOR,config.getConfigFolder());
}
List<EntityProcessor> postProcessors = config.getEntityPostProcessors();
log.info("Present Source Configuration:");
log.info(" - EntityDataIterable: {}",dataIterable);
log.info(" - EntityIterator: {}",idIterator);
log.info(" - EntityDataProvider: {}",dataProvider);
log.info(" - EntityScoreProvider: {}",scoreProvider);
log.info(" - EntityProcessors ({}):",processors.size());
if(postProcessors != null){
log.info(" - EntityPostProcessors ({}):",postProcessors.size());
}
int i=0;
for(EntityProcessor processor : processors){
i++;
log.info(" {}) {}",i,processor);
}
if(dataIterable != null && scoreProvider != null){
// iterate over data and lookup scores
indexer = new IndexerImpl(name, dataIterable, scoreProvider,
config.getNormaliser(),destination, processors,
config.getIndexedEntitiesIdsFile(),postProcessors);
} else if(idIterator != null && dataProvider != null){
// iterate over id and lookup data
indexer = new IndexerImpl(name, idIterator,dataProvider,
config.getNormaliser(),destination, processors,
config.getIndexedEntitiesIdsFile(),postProcessors);
} else if(dataIterable != null && idIterator != null){
// create an EntityIterator to EntityScoreProvider adapter
log.info(
"Create Adapter from the configured EntityIterator '{}' to the " +
"required EntityScoreProvider as needed together with the " +
"configured EntityDataIterable '{}'",
idIterator.getClass(), dataIterable.getClass());
indexer = new IndexerImpl(config.getName(), dataIterable,
new EntityIneratorToScoreProviderAdapter(idIterator),
config.getNormaliser(),destination, processors,
config.getIndexedEntitiesIdsFile(),postProcessors);
} else {
log.error("Invalid Indexing Source configuration: ");
log.error(" - To iterate over the data and lookup scores one need to " +
"configure an EntityDataIterable and an EntityScoreProvider ");
log.error(" - To iterate over the Id and and lookup data one need to " +
"configure an EntityIterator and an EntityDataProvider");
throw new IllegalArgumentException("Invalid Indexing Source configuration");
}
return indexer;
}
public Indexer create(String name, EntityIterator idIterator, EntityDataProvider dataProvider,
ScoreNormaliser normaliser,
List<EntityProcessor> processors, IndexingDestination destination){
return new IndexerImpl(name, idIterator, dataProvider, normaliser,destination, processors,null,null);
}
public Indexer create(String name, EntityIterator idIterator, EntityDataProvider dataProvider,
ScoreNormaliser normaliser,
List<EntityProcessor> processors, List<EntityProcessor> postProcessors,
IndexingDestination destination){
File tmp;
try {
tmp = File.createTempFile("ind-ent-ids",".zip");
tmp.deleteOnExit();
} catch (IOException e) {
throw new IllegalStateException("Unable to create temporary file for storing the" +
"indexed Entity IDs",e);
}
return new IndexerImpl(name, idIterator, dataProvider, normaliser,destination, processors,
tmp,postProcessors);
}
public Indexer create(String name, EntityDataIterable dataIterable,EntityScoreProvider scoreProvider,
ScoreNormaliser normaliser,
List<EntityProcessor> processors, IndexingDestination destination){
return new IndexerImpl(name, dataIterable, scoreProvider, normaliser,destination, processors,null,null);
}
public Indexer create(String name, EntityDataIterable dataIterable,EntityScoreProvider scoreProvider,
ScoreNormaliser normaliser,
List<EntityProcessor> processors, List<EntityProcessor> postProcessors,
IndexingDestination destination){
File tmp;
try {
tmp = File.createTempFile("ind-ent-ids",".zip");
tmp.deleteOnExit();
} catch (IOException e) {
throw new IllegalStateException("Unable to create temporary file for storing the" +
"indexed Entity IDs",e);
}
return new IndexerImpl(name, dataIterable, scoreProvider, normaliser,destination, processors,
tmp,postProcessors);
}
}