/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.destination.solryard;
import static org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig.ALLOW_INITIALISATION_STATE;
import static org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig.DOCUMENT_BOOST_FIELD;
import static org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig.MULTI_YARD_INDEX_LAYOUT;
import static org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig.SOLR_SERVER_LOCATION;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Map.Entry;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.dataimport.SolrWriter;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.stanbol.entityhub.core.mapping.FieldMappingUtils;
import org.apache.stanbol.entityhub.core.site.CacheUtils;
import org.apache.stanbol.entityhub.indexing.core.IndexingDestination;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.indexing.core.destination.OsgiConfigurationUtil;
import org.apache.stanbol.entityhub.indexing.destination.solryard.fst.CorpusCreationInfo;
import org.apache.stanbol.entityhub.indexing.destination.solryard.fst.CorpusCreationTask;
import org.apache.stanbol.entityhub.indexing.destination.solryard.fst.FstConfig;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.apache.stanbol.entityhub.servicesapi.yard.Yard;
import org.apache.stanbol.entityhub.servicesapi.yard.YardException;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.commons.solr.IndexReference;
import org.apache.stanbol.commons.solr.SolrConstants;
import org.apache.stanbol.commons.solr.managed.ManagedIndexConstants;
import org.apache.stanbol.commons.solr.managed.ManagedSolrServer;
import org.apache.stanbol.commons.solr.managed.standalone.DefaultStandaloneManagedSolrServerWrapper;
import org.apache.stanbol.commons.solr.managed.standalone.StandaloneEmbeddedSolrServerProvider;
import org.apache.stanbol.commons.solr.managed.standalone.StandaloneManagedSolrServer;
import org.apache.stanbol.entityhub.yard.solr.impl.SolrYard;
import org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SolrYardIndexingDestination implements IndexingDestination {
private static final Logger log = LoggerFactory.getLogger(SolrYardIndexingDestination.class);
/**
* The relative path to the Solr 'write.lock' file (relative to the Solr
* instance dir). This file is excluded from the
* Solr Index archive built by {@link #writeSolrIndexArchive()} (see
* <a href="https://issues.apache.org/jira/browse/STANBOL-1176">STANBOL-1176</a>
* for more details).
*/
private static final String SOLR_WRITE_LOCK = FilenameUtils.separatorsToSystem("data/index/write.lock");
/**
* Parameter used to refer to the name of the properties file containing the
* field names as key and the {@link Float} boost factors as values. As
* default no boosts will be used for indexing.
*/
public static final String PARAM_FIELD_BOOST_CONFIG = "boosts";
/**
* Parameter used to explicitly set the name of the creates SolrYard
* configuration. The default value will be set to the name of the dataSet
* and adding "Index" to the end.
*/
public static final String PARAM_YARD_NAME = "name";
/**
* Parameter used to explicitly set the name of the created Solr Index. The
* default will be set to the name of the dataset to be indexed as returned
* by {@link IndexingConfig#getName()}.
*/
public static final String PARAM_SOLR_INDEX_NAME = "indexName";
/**
* Parameter used to set the name of the directory used as root for the
* SolrIndex. The value is relative to the
* {@link IndexingConfig#getDestinationFolder()}. The default value is
* {@link #DEFAULT_SOLR_INDEX_DIRECTORY}
*/
public static final String PARAM_SOLR_INDEX_DIRECTORY = "solrDir";
/**
* Parameter used to specify the name of the directory relative to the
* {@link IndexingConfig#getConfigFolder()} that contains the
* Solr configuration used for indexing. The default is that the config is
* searches under a folder with the name provided by
* {@link #PARAM_SOLR_INDEX_NAME} (that defaults to
* {@link IndexingConfig#getName()}). <p>
* However note that when this parameter is missing this configuration is
* optional (meaning that if it is not found the
* default Solr Configuration is used). When this parameter is used, than
* the configuration is required and an {@link IllegalArgumentException} is
* thrown it not found.<p>
* To use the default, but marking the configuration as required one can
* add this parameter without a value.
*/
public static final String PARAM_SOLR_CONFIG = "solrConf";
/**
* The default value for the directory holding the Solr index set to
* {@link SolrDirectoryManager#DEFAULT_SOLR_DATA_DIR}
*/
public static final String DEFAULT_SOLR_INDEX_DIRECTORY =
ManagedSolrServer.DEFAULT_SOLR_DATA_DIR;
/**
* The field used to boost documents while indexing. This is set to
* {@link RdfResourceEnum#entityRank}
*/
public static final String DOCUMENT_BOOST_FIELD = RdfResourceEnum.entityRank.getUri();
/**
* The extension of the distribution file
*/
public static final String SOLR_INDEX_ARCHIVE_EXTENSION = ".solrindex.zip";
/**
* The extension of the solrIndex reference file
*/
public static final String SOLR_INDEX_ARCHIVE_REF_EXTENSION = ".solrindex.ref";
/**
* The ID of the OSGI component used by the SolrYard implementation
*/
public static final String SOLR_YARD_COMPONENT_ID = "org.apache.stanbol.entityhub.yard.solr.impl.SolrYard";
/**
* The default value for the {@link ManagedIndexConstants#SYNCHRONIZED}
* property added to the SorlIndex reference file. The default is set to
* <code>true</code>. This will users allow to update the data for the
* ReferencedSite by simple replacing the solrindex Archive in the
* <code>/datafile</code> folder.<p>
* This property can be configured by using the main "indexing.properties"
* file.
*/
public static final boolean DEFAULT_SYNCHRONIZED_STATE = true;
/**
* The name of the properties file containing the FST configuration.<p>
* If not present no FST models will be created in the {@link #finalise()}
* state.
*/
public static final String FST_CONF = "fstConf";
/**
* The number of Threads used to concurrently build FST models
*/
public static final String FST_THREADS = "fstThreads";
private static final int DEFAULT_FST_THREADS = 4;
/**
* The location of the SolrIndex. This MUST BE an absolute Path in case it
* refers to a directory of the local file system and <code>null</code> in
* case an external SolrServer is used.
* Also NOTE that this can be an different value than returned by calling
* {@link SolrYardConfig#getSolrServerLocation()} on {@link #solrIndexConfig}
*/
private File solrIndexLocation;
/**
* Directory holding the specialised Solr configuration or <code>null</code>
* if the default configuration should be used
*/
private File solrIndexConfig;
/**
* The configuration used to instantiate the {@link SolrYard} returned by
* {@link #getYard()}.
*/
private SolrYardConfig solrYardConfig;
private SolrYard solrYard;
/**
* File used to write the ZIP archive containing the solr index.
*/
private File solrArchive;
/**
* File used to write the properties file that refers to {@link #solrArchive}.
* This is typically included in distributions of huge indexes and will
* request the user to download the archive with the actual data.
*/
private File solrArchiveRef;
/**
* This provides metadata about what fields and languages are indexed in the
* created SolrIndex.
*/
private Collection<FieldMapping> indexFieldConfiguration;
private IndexingConfig indexingConfig;
/*
* Fields required for the FST model creation
*/
/**
* The SolrCore used by the {@link #solrYard}
*/
private SolrCore core;
/**
* The FST configurations. Parsed in the {@link #setConfiguration(Map)}
* and initialised during {@link #initialise()}. <code>null</code> if no
* {@link #FST_CONF} is set.
*/
private List<FstConfig> fstConfigs;
/**
* The number of threads used to build FST models.
* Set in {@link #setConfiguration(Map)}
*/
private int fstThreads = DEFAULT_FST_THREADS;
private NamespacePrefixService namespacePrefixService;
/**
* This Constructor relays on a subsequent call to
* {@link #setConfiguration(Map)} to parse the required configuration
*/
public SolrYardIndexingDestination(){
}
/**
* Constructs an SolrYard based IndexingTarget based on the parsed parameters
* @param yardName the name of the SolrYard
* @param solrLocation the location of the SolrYard
*/
public SolrYardIndexingDestination(String yardName,String solrLocation,
NamespacePrefixService namespacePrefixService){
this(yardName,solrLocation,null,null,null,namespacePrefixService);
}
/**
* Constructs an SolrYard based IndexingTarget based on the parsed parameters
* @param yardName the name of the SolrYard
* @param parsedSolrLocation the location of the SolrYard
* @param solrConfig directory holding the Solr schema used for the indexing or
* <code>null</code> to use the default
* @param indexFieldConfig The field and languages indexed in this index
* @param fieldBoostMap A map containing field names as key and boost factors
* as values. Parse <code>null</code> to use no boosts.
*/
public SolrYardIndexingDestination(final String yardName,
final String parsedSolrLocation,
final String solrConfig,
Collection<FieldMapping> indexFieldConfig,
Map<String,Float> fieldBoostMap,
NamespacePrefixService namespacePrefixService){
if(yardName == null || yardName.isEmpty()){
throw new IllegalArgumentException("Tha name of the Yard MUST NOT be NULL nor empty!");
}
if(parsedSolrLocation == null || parsedSolrLocation.isEmpty()){
throw new IllegalArgumentException("Tha parsed Solr location MUST NOT be NULL nor empty!");
}
if(namespacePrefixService == null){
throw new IllegalArgumentException("The parsed NamespacePrefixService MUST NOT be NULL!");
}
this.indexFieldConfiguration = indexFieldConfig;
this.solrYardConfig = createSolrYardConfig(yardName, parsedSolrLocation);
//init the manages solr directory relative to the working directory
File managedDirectory = new File(System.getProperty("user.dir"), DEFAULT_SOLR_INDEX_DIRECTORY);
//init the solr directory and validate the parsed values
File[] solrDirectories = initSolrDirectories(parsedSolrLocation, solrConfig,
managedDirectory);
this.solrIndexLocation = solrDirectories[0];
this.solrIndexConfig = solrDirectories[1];
this.solrArchive = solrDirectories[2];
this.solrArchiveRef = solrDirectories[3];
//set Boost related stuff
solrYardConfig.setDocumentBoostFieldName(DOCUMENT_BOOST_FIELD);
if(fieldBoostMap != null){
solrYardConfig.setFieldBoosts(fieldBoostMap);
}
}
/**
* Processes the parsed solr index location (may be an URL, an absolute path
* or a relative one) and the optional solr schema configuration (only valid
* in case an relative path was parsed as location) and does all the
* initialisation work (including to set the
* {@link SolrDirectoryManager#MANAGED_SOLR_DIR_PROPERTY} system property)
* @param parsedSolrLocation the parsed location of the SolrServer (may be
* an URL, an absolute path or a relative one)
* @param solrConfig the path to the directory holding the configuration
* for the Solr index used for the indexing or <code>null</code> to use the
* default (only supported in case parsedSolrLocation is a relative path)
* @param managedDirectory the directory used to manage the Solr index (only
* needed in case parsedSolrLocation is an
* @return An array with the length 4 where index <ul>
* <li>"0" contains the File pointing to the directory holding the
* index on the local file system
* <li>"1" contains the File pointing to the directory containing the
* configuration used to initialise the index.
* <li>"2" contains the File used to create the compressed ZIP archive with
* the indexed data
* <li>"3" contains the File used to create the properties file used to link
* to the Solr index archive.
*
* All files will be <code>null</code> if the values are not applicable to
* the current configuration.
*/
private File[] initSolrDirectories(final String parsedSolrLocation,
final String solrConfig,
File managedDirectory) {
File solrIndexLocation;
File solrConfigLocation;
File solrIndexArchive;
File solrIndexArchiveRef;
//set the SolrLocation and init the SolrDirectoryManager system property
//in case the solrLocation is not an remote SolrServer
if(parsedSolrLocation.startsWith("http")
&& parsedSolrLocation.indexOf("://") > 0){ //matches http[s]://{host}
solrIndexLocation = null;
if(solrConfig != null){
//rather throw an error as indexing for some hours to an index
//with the wrong schema!
throw new IllegalArgumentException(String.format(
"Parsing special Solr Configurations (directory=%s) is not " +
"supported for remote SolrServer (url=%s",
parsedSolrLocation,solrConfig));
}
solrConfigLocation = null; //no configuration supported
solrIndexArchive = null;
solrIndexArchiveRef = null;
} else { // local Directory
File parsedSolrLocationFile = new File(parsedSolrLocation);
if(parsedSolrLocationFile.isAbsolute()){ //if absolute
//-> assume an already configured Solr index
solrIndexLocation = parsedSolrLocationFile;
if(solrConfig != null){
throw new IllegalArgumentException(String.format(
"Parsing special Solr Configurations (directory=%s) is not " +
"supported for Embedded SolrServer configured via an absolute" +
"file path (path=%s", parsedSolrLocation,solrConfig));
}
solrConfigLocation = null; //no solr conf supported
} else { //relative path -> init the Solr directory
//set the managed directory
if(managedDirectory == null){
throw new IllegalStateException("In case the Solr index location"+
"is a relative path the parsed managed directory MUST NOT be NULL!");
}
System.setProperty(ManagedSolrServer.MANAGED_SOLR_DIR_PROPERTY,
managedDirectory.getAbsolutePath());
//add the name of the core and save it to solrLocation
//TODO: get the name of the default server somehow ...
File serverLocation = new File(managedDirectory,"default");
solrIndexLocation = new File(serverLocation,parsedSolrLocation);
//check if there is a special SolrLocation
if(solrConfig != null){
solrConfigLocation = new File(solrConfig);
if(!solrConfigLocation.isDirectory()){
throw new IllegalArgumentException("The parsed Solr Configuration "+
solrConfigLocation+" does not exist or is not an direcotry!");
} //else the directory exists ...
//lets assume it is a valid configuration
//otherwise an exception will be thrown in initialise().
} else {
solrConfigLocation = null; //no configuration parsed
}
}
solrIndexArchive = new File(solrIndexLocation.getName()+SOLR_INDEX_ARCHIVE_EXTENSION);
solrIndexArchiveRef = new File(solrIndexLocation.getName()+SOLR_INDEX_ARCHIVE_REF_EXTENSION);
}
return new File[]{solrIndexLocation,solrConfigLocation,
solrIndexArchive,solrIndexArchiveRef};
}
@Override
public void setConfiguration(Map<String,Object> config) {
indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
namespacePrefixService = indexingConfig.getNamespacePrefixService();
String yardName;
//read the Yard name configuration
Object value = config.get(PARAM_YARD_NAME);
if(value == null || value.toString().isEmpty()){
yardName = indexingConfig.getName()+"Index";
} else {
yardName = value.toString();
}
//read the Solr index name configuration
String indexName;
value = config.get(PARAM_SOLR_INDEX_NAME);
if(value == null || value.toString().isEmpty()){
indexName = indexingConfig.getName();
} else {
indexName = value.toString();
}
this.solrYardConfig = createSolrYardConfig(yardName, indexName);
this.solrYardConfig.setName("SolrIndex for "+indexingConfig.getName());
//set the Index Field Configuration
this.indexFieldConfiguration = indexingConfig.getIndexFieldConfiguration();
//set a description for the yard
if(indexingConfig.getDescription() != null){
//reuse the description
solrYardConfig.setDescription(indexingConfig.getDescription());
} else {
solrYardConfig.setDescription("SolrYard based Index for "+indexingConfig.getName());
}
//get the directors holding the solr configuration
String solrConfig;
if(!config.containsKey(PARAM_SOLR_CONFIG)){ //not present
// -> use the default config
File configDir = indexingConfig.getConfigFile(indexName);
if(!configDir.isDirectory()){
log.info("use default Solr index configuration for index "+indexName);
solrConfig = null;
} else {
solrConfig = configDir.getAbsolutePath();
}
} else { //require the config
value = config.get(PARAM_SOLR_CONFIG);
if(value == null || value.toString().isEmpty()){
value = indexName; //use the indexName as default
}
File configDir = indexingConfig.getConfigFile(value.toString());
if(!configDir.isDirectory()){
throw new IllegalArgumentException("Required Solr Configuration "+
value.toString()+" not found within the config directory "+
indexingConfig.getConfigFolder().getAbsolutePath());
} else {
solrConfig = configDir.getAbsolutePath();
}
}
//init the managed directory within the destination folder
//read the Solr directory configuration
String solrDir;
value = config.get(PARAM_SOLR_INDEX_DIRECTORY);
if(value == null || value.toString().isEmpty()){
solrDir = DEFAULT_SOLR_INDEX_DIRECTORY;
} else {
solrDir = value.toString();
}
File managedDirectory = new File(indexingConfig.getDestinationFolder(),solrDir);
File[] solrDirectories = initSolrDirectories(indexName, solrConfig,
managedDirectory);
this.solrIndexLocation = solrDirectories[0];
this.solrIndexConfig = solrDirectories[1];
this.solrArchive = solrDirectories[2];
this.solrArchiveRef = solrDirectories[3];
//init other configurations
solrYardConfig.setDocumentBoostFieldName(DOCUMENT_BOOST_FIELD);
value = config.get(PARAM_FIELD_BOOST_CONFIG);
if(value != null && !value.toString().isEmpty()){
Map<String,Float> fieldBoosts = new HashMap<String,Float>();
//load this configuration as required if set to cause an Exception
//if not found! -> an exception is the better option as creating an
//index with missing Field Boosts!
for(Entry<String,Object> entry : indexingConfig.getConfig(value.toString(),true).entrySet()){
try {
fieldBoosts.put(entry.getKey(), Float.valueOf(entry.getValue().toString()));
} catch (Exception e) {
//throw exception for any invalid entry!
throw new IllegalArgumentException(String.format(
"Unable to parse Field Boost entry from field %s and boost %s",
entry.getKey(),entry.getValue()),e);
}
}
solrYardConfig.setFieldBoosts(fieldBoosts);
}
//read the FST config
value = config.get(FST_CONF);
if(value != null && !StringUtils.isBlank(value.toString())){
File fstConfigFile = indexingConfig.getConfigFile(value.toString());
if(!fstConfigFile.isFile()){
throw new IllegalArgumentException(String.format(
"Unable to find configured FST configuration file %s",
fstConfigFile));
}
Collection<String> lines;
try {
lines = FileUtils.readLines(fstConfigFile, "UTF-8");
} catch (IOException e) {
throw new IllegalArgumentException(String.format(
"Unable to read FST configuration file %s",
fstConfigFile),e);
}
setFstConfig(lines);
}
value = config.get(FST_THREADS);
if(value instanceof Number){
fstThreads = ((Number)value).intValue();
} else if(value != null){
try {
fstThreads = Integer.parseInt(value.toString());
}catch (NumberFormatException e) {
throw new IllegalArgumentException("Unable to parse the FST thread number from "
+value.toString(), e);
}
}
if(fstThreads <= 0){
fstThreads = DEFAULT_FST_THREADS;
}
}
/**
* Setter for the FST configurations using the same format as defined by the
* configuration file. This defines the FST models create in the
* {@link #finalise()} phase of the indexing process
* @param lines the single FST configurations
*/
public void setFstConfig(Collection<String> lines) {
this.fstConfigs = Collections.unmodifiableList(parseFstConfig(lines));
}
/**
* Setter for the ThreadPool used to create FST models
* @param size
*/
public void setFstThreads(int size){
if(size < 1){
this.fstThreads = DEFAULT_FST_THREADS;
} else {
this.fstThreads = size;
}
}
public int getFstThreads() {
return fstThreads;
}
/**
* Getter for the FST models that are created in the {@link #finalise()}
* phase
* @return the FST c
*/
public List<FstConfig> getFstConfig(){
return fstConfigs;
}
/**
* @param lines
*/
private List<FstConfig> parseFstConfig(Collection<String> lines) {
List<FstConfig> fstConfigs = new ArrayList<FstConfig>();
for(String line : lines){
line = line.trim();
if(!line.isEmpty() && line.charAt(0) != '#'){
String[] fields = new String[] {null,null};
int index = -1;
for(String part : line.split("=|;")){
if(index >= 0){
fields[index] = part;
index = -1;
} else if("index".equalsIgnoreCase(part)){
index = 0;
} else if("store".equalsIgnoreCase(part)){
index = 1;
}
}
if(fields[0] == null){
throw new IllegalArgumentException("Invalid FST configuration "
+ "line: "+line +". Param 'index={field}' is required "
+ "(syntax: 'index={field};store={field}', 'store is optional'')!");
}
fstConfigs.add(new FstConfig(fields[0], fields[1]));
}
}
return fstConfigs;
}
/**
* Creates a {@link SolrYardConfig} and initialised it to used single Yard
* Layout, lazy commits and a commitWithin duration of an minute
* @param yardName the name of the yard
* @param indexName the name of the index
*/
private SolrYardConfig createSolrYardConfig(String yardName, String indexName) {
SolrYardConfig solrYardConfig = new SolrYardConfig(yardName, indexName);
solrYardConfig.setMultiYardIndexLayout(Boolean.FALSE);
//use the lazy commit feature
solrYardConfig.setImmediateCommit(Boolean.FALSE);
solrYardConfig.setCommitWithinDuration(1000*60);//one minute
return solrYardConfig;
}
@Override
public boolean needsInitialisation() {
return true;
}
@Override
public void initialise() {
log.info("initialise {}",getClass().getSimpleName());
//The constructors and the setConfiguration(..) only validate the parsed
//parameters and initialise the member variables. This method performs
//the the actual initialisation of the SolrYard!
//copy a custom configuration (if present)
EmbeddedSolrServer server;
IndexReference solrServerRef = IndexReference.parse(solrYardConfig.getSolrServerLocation());
if(solrIndexConfig != null){ //can only be != null if also solrIndexLocation
//copy the configuration
try {
log.info(" ... copy Solr Configuration form {} to {}",solrIndexConfig,solrIndexLocation);
FileUtils.copyDirectory(solrIndexConfig, solrIndexLocation);
} catch (IOException e) {
throw new IllegalStateException(String.format(
"Unable to copy the Solr index configuration from %s to %s!",
solrIndexConfig,solrIndexLocation),e);
}
solrYardConfig.setAllowInitialisation(Boolean.FALSE);
server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(
solrServerRef,solrServerRef.getIndex());
this.core = server.getCoreContainer().getCore(solrServerRef.getIndex());
} else {
//allow the default initialisation
solrYardConfig.setAllowInitialisation(Boolean.TRUE);
StandaloneEmbeddedSolrServerProvider.getInstance();
server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(
solrServerRef,solrYardConfig.getIndexConfigurationName());
if(server != null){
log.info(" ... initialised SolrCore with default configuration");
this.core = server.getCoreContainer().getCore(solrServerRef.getIndex());
} else if(solrServerRef.isPath() && new File(solrServerRef.getIndex()).isAbsolute()){
//the parsed absolute path is not within the managed SolrServer
//so we need to create some CoreContainer and init/register
//the core at the parsed location
StandaloneManagedSolrServer s;
if(solrServerRef.getServer() == null){
s = StandaloneManagedSolrServer.getManagedServer();
} else {
s = StandaloneManagedSolrServer.getManagedServer(solrServerRef.getServer());
}
CoreContainer cc = s.getCoreContainer();
CoreDescriptor cd = new CoreDescriptor(cc, "dummy",
solrServerRef.getIndex());
this.core = cc.create(cd);
cc.register(core, false);
server = new EmbeddedSolrServer(cc, "dummy");
log.info(" ... initialised existing SolrCore at {}",solrServerRef.getIndex());
} else {
throw new IllegalStateException("Unable to initialise SolrCore "+solrServerRef);
}
}
log.info(" ... create SolrYard");
this.solrYard = new SolrYard(server,solrYardConfig, namespacePrefixService);
}
@Override
public Yard getYard() {
if(solrYard == null){
throw new IllegalStateException("SolrYard not initialised. Call initialise first!");
}
return solrYard;
}
@SuppressWarnings("unchecked")
@Override
public void finalise() {
//write the indexing configuration
if(indexFieldConfiguration != null){
FieldMapper mapper = FieldMappingUtils.createDefaultFieldMapper(indexFieldConfiguration);
try {
CacheUtils.storeBaseMappingsConfiguration(solrYard, mapper);
} catch (YardException e) {
log.error("Unable to store FieldMapperConfiguration to the Store!",e);
}
}
log.info(" ... optimize SolrCore");
try {
solrYard.optimize();
} catch (YardException e) {
log.error("Unable to optimize SolrIndex after indexing! IndexArchive will not be optimized ...",e);
}
//build the FST models
if(fstConfigs != null){
//(1) FST config initialisation
log.info(" ... init FST configuration(s)");
IndexSchema schema = core.getLatestSchema();
File fstDir = new File(new File(core.getDataDir()),"fst");
if(!fstDir.isDirectory()){
try {
FileUtils.forceMkdir(fstDir);
} catch (IOException e) {
throw new IllegalStateException("Unable to create Directory "
+ fstDir.getAbsolutePath() + "for storing the FST models "
+ "of SolrCore "+core.getName());
}
}
RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
try {
for(FstConfig fstConfig : fstConfigs){
fstConfig.setFstDirectory(fstDir); //set the FST directory
log.info("> FST config {}", fstConfig);
fstConfig.buildConfig(schema, searcherRef.get().getAtomicReader());
for(CorpusCreationInfo corpus : fstConfig.getCorpusCreationInfos()){
log.info(" - {}",corpus);
}
}
} finally {
searcherRef.decref();
}
List<Future<?>> fstCreationTasks = new ArrayList<Future<?>>();
ExecutorService es = Executors.newFixedThreadPool(fstThreads);
log.info(" ... build FST models ");
for(FstConfig config : fstConfigs){
for(final CorpusCreationInfo corpus : config.getCorpusCreationInfos()){
fstCreationTasks.add(es.submit(new CorpusCreationTask(core, corpus)));
}
}
//now wait for the completion of the tasks
Iterator<Future<?>> taskIt = fstCreationTasks.iterator();
while(taskIt.hasNext()){
Future<?> task = taskIt.next();
try {
task.get(); //wait until ready
taskIt.remove();
} catch (ExecutionException e) {
log.error("Exception while building FST models for SolrCore "
+ core.getName(),e);
} catch (InterruptedException e) {
log.error("Interupped while building FST models for SolrCore "
+ core.getName(),e);
Thread.currentThread().interrupt();
}
}
if(!fstCreationTasks.isEmpty()){
log.warn("Unable to build {} FST models for SolrCore {}",
fstCreationTasks.size(), core.getName());
} else {
log.info("All FST modles for SolrCore {} build successfully!",
core.getName());
}
} //no FST modles to build
//all Solr specific stuff is now ready
log.info(" ... close SolrCore");
solrYard.close();
//if a indexing config is present we need to create the distribution files
if(indexingConfig != null){
//first check if the distribution folder needs to be created and is valid
File distFolder = indexingConfig.getDistributionFolder();
if(!distFolder.exists()){
if(!distFolder.mkdirs()){
throw new IllegalStateException("Unable to create distribution folder " +
distFolder.getAbsolutePath());
}
} else if(!distFolder.isDirectory()){
throw new IllegalStateException("Distribution folder" + distFolder.getAbsolutePath()
+ "is not a Directory!");
}
//zip the index and copy it over to distribution
log.info(" ... build Solr index archive");
if(solrArchive != null){
try {
writeSolrIndexArchive(indexingConfig);
}catch (IOException e) {
log.error("Error while creating Solr Archive "+solrArchive.getAbsolutePath()+
"! The archive will not be created!",e);
log.error("As a Workaround you can manually create the Solr Archive " +
"by creating a ZIP archive with the contents of the Folder " +
solrIndexLocation+"!");
}
}
if(solrArchiveRef != null){
try {
writeSolrIndexReference(indexingConfig);
} catch (IOException e) {
log.error("Error while creating Solr Archive Reference "+
solrArchiveRef.getAbsolutePath()+
"! The file will not be created!",e);
}
}
//finally create the Osgi Configuration
try {
OsgiConfigurationUtil.writeSiteConfiguration(indexingConfig);
} catch (IOException e) {
log.error("Unable to write OSGI configuration file for the referenced site",e);
}
try {
OsgiConfigurationUtil.writeCacheConfiguration(indexingConfig);
} catch (IOException e) {
log.error("Unable to write OSGI configuration file for the Cache",e);
}
//create the SolrYard configuration
try {
writeSolrYardConfiguration(indexingConfig);
} catch (IOException e) {
log.error("Unable to write OSGI configuration file for the SolrYard",e);
}
//create the bundle
OsgiConfigurationUtil.createBundle(indexingConfig);
}
}
/**
*
*/
private void writeSolrIndexReference(IndexingConfig indexingConfig) throws IOException {
Properties properties = new Properties();
properties.setProperty("Index-Archive", solrArchive.getName());
properties.setProperty("Name", solrYardConfig.getName());
if(solrYardConfig.getDescription() != null){
properties.setProperty("Description", solrYardConfig.getDescription());
}
Object syncronizedConfig = indexingConfig.getProperty(ManagedIndexConstants.SYNCHRONIZED);
if(syncronizedConfig != null){
properties.setProperty(ManagedIndexConstants.SYNCHRONIZED,
Boolean.toString(Boolean.parseBoolean(syncronizedConfig.toString())));
} else {
properties.setProperty(ManagedIndexConstants.SYNCHRONIZED,
Boolean.toString(DEFAULT_SYNCHRONIZED_STATE));
}
File solrArchiveFile = new File(
OsgiConfigurationUtil.getConfigDirectory(indexingConfig),solrArchiveRef.getName());
properties.store(new FileOutputStream(solrArchiveFile), null);
}
/**
*
*/
private void writeSolrIndexArchive(IndexingConfig indexingConfig) throws IOException{
//we need to get the length of the parent to calc the entry names for
//the archvie
//Note that the Archive need to include the name of the index,
//therefore we need use the parent dir as context
int parentPathLength = solrIndexLocation.getParentFile().getAbsolutePath().length();
if(solrIndexLocation.getAbsolutePath().charAt(parentPathLength-1) != File.separatorChar){
parentPathLength++; //add the missing '/'
}
//Moved over to use java.util.zip because Apache commons compression
//seams not support files > 2Gb
File solrArchiveFile = new File(indexingConfig.getDistributionFolder(),solrArchive.getName());
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(solrArchiveFile));
for(File file : FileUtils.listFiles(solrIndexLocation, null, true)){
String name = file.getAbsolutePath().substring(parentPathLength);
if(!file.isHidden() && !name.endsWith(SOLR_WRITE_LOCK)){
log.info("add "+name);
out.putNextEntry(new ZipEntry(name));
if(!file.isDirectory()){
FileInputStream fileIn = new FileInputStream(file);
IOUtils.copyLarge(fileIn,out);
out.closeEntry();
IOUtils.closeQuietly(fileIn);
}
} else {
log.info("exclude "+name);
}
}
out.finish();
IOUtils.closeQuietly(out);
}
/**
* @throws IOException
*
*/
private void writeSolrYardConfiguration(IndexingConfig indexingConfig) throws IOException {
Dictionary<String,Object> yardConfig = OsgiConfigurationUtil.createYardConfig(indexingConfig);
//we need now add the solrYard specific parameters
String fieldBoostName = solrYardConfig.getDocumentBoostFieldName();
if(fieldBoostName != null){
yardConfig.put(DOCUMENT_BOOST_FIELD, fieldBoostName);
}
//TODO: fieldBoosts are currently not supported by the SolrYard Config
//solrYardConfig.getFieldBoosts();
//The default values for the following parameters are OK
//solrYardConfig.getMaxBooleanClauses();
//solrYardConfig.getMaxQueryResultNumber();
yardConfig.put(SOLR_SERVER_LOCATION, FilenameUtils.getName(solrYardConfig.getSolrServerLocation()));
//the server type needs not to be set. It is automatically detected by
//the value of the server location
//solrYardConfig.getSolrServerType();
//deactivate default initialisation!
yardConfig.put(ALLOW_INITIALISATION_STATE, Boolean.FALSE);
//for immediate commit use the default value (optionally one could also
//fore TRUE)
//yardConfig.put(SolrYard.IMMEDIATE_COMMIT, Boolean.TRUE);
//deactivate multi yard layout!
yardConfig.put(MULTI_YARD_INDEX_LAYOUT, Boolean.FALSE);
String solrYardConfigFileName = SOLR_YARD_COMPONENT_ID+'-'+indexingConfig.getName()+".config";
OsgiConfigurationUtil.writeOsgiConfig(indexingConfig,solrYardConfigFileName, yardConfig);
}
@Override
public void close() {
//nothing todo
}
}