/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.entityhub.yard.solr.impl; import java.util.Collections; import java.util.Dictionary; import java.util.Map; import java.util.Map.Entry; import org.apache.solr.client.solrj.SolrServer; import org.apache.stanbol.entityhub.core.yard.AbstractYard.YardConfig; import org.osgi.service.cm.ConfigurationException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Used for the configuration of a SolrYard. Especially if the SolrYard is not running within an OSGI context, * than an instance of this class must be configured and than parsed to the constructor of {@link SolrYard}. * <p> * When running within an OSGI context, the configuration is provided by the OSGI environment. I that case * this class is used as a wrapper for easy access to the configuration. * * @author Rupert Westenthaler * */ public final class SolrYardConfig extends YardConfig { private final Logger log = LoggerFactory.getLogger(SolrYardConfig.class); /** * The key used to configure the URL for the SolrServer */ public static final String SOLR_SERVER_LOCATION = "org.apache.stanbol.entityhub.yard.solr.solrUri"; /** * The key used to configure if data of multiple Yards are stored within the same index ( * <code>default=false</code>) */ public static final String MULTI_YARD_INDEX_LAYOUT = "org.apache.stanbol.entityhub.yard.solr.multiYardIndexLayout"; /** * The maximum boolean clauses as configured in the solrconfig.xml of the SolrServer. The default value * for this config in Solr 1.4 is 1024. * <p> * This value is important for generating queries that search for multiple documents, because it * determines the maximum number of OR combination for the searched document ids. */ public static final String MAX_BOOLEAN_CLAUSES = "org.apache.stanbol.entityhub.yard.solr.maxBooleanClauses"; /** * This property allows to define a field that is used to parse the boost for the parsed representation. * Typically this will be the pageRank of that entity within the referenced site (e.g. * {@link Math#log1p(double)} of the number of incoming links */ public static final String DOCUMENT_BOOST_FIELD = "org.apache.stanbol.entityhub.yard.solr.documentBoost"; /** * Key used to configure {@link Entry Entry<String,Float>} for fields with the boost. If no Map is * configured or a field is not present in the Map, than 1.0f is used as Boost. If a Document boost is * present than the boost of a Field is documentBoost*fieldBoost. */ public static final String FIELD_BOOST_MAPPINGS = "org.apache.stanbol.entityhub.yard.solr.fieldBoosts"; /** * Key used to to enable/disable the default configuration. If this is enabled, * that the index will get initialised with the Default configuration.<p> * Notes:<ul> * <li> Configuration is only supported for EmbeddedSolrServers that use a * relative path * <li> If this property is enabled the value of the * {@link #SOLR_INDEX_CONFIGURATION_NAME} will be ignored. * </ul> * Only applies in case a EmbeddedSolrServer is used. * @see SolrYardConfig#isAllowInitialisation() * @see SolrYardConfig#setAllowInitialisation(Boolean) */ public static final String ALLOW_INITIALISATION_STATE = "org.apache.stanbol.entityhub.yard.solr.useDefaultConfig"; /** * By default the use of an default configuration is disabled! */ public static final boolean DEFAULT_ALLOW_INITIALISATION_STATE = false; /** * The name of the configuration use as default. */ public static final String DEFAULT_SOLR_INDEX_CONFIGURATION_NAME = "default.solrindex.zip"; /** * Allows to configure the name of the index used for the configuration of the Solr Core. * Only applies in case of using an EmbeddedSolrServer and * {@link #ALLOW_INITIALISATION_STATE} is disabled. * As default the value of the {@link #SOLR_SERVER_LOCATION} is used. * @see SolrYardConfig#getIndexConfigurationName() * @see SolrYardConfig#setIndexConfigurationName(String) */ public static final String SOLR_INDEX_CONFIGURATION_NAME = "org.apache.stanbol.entityhub.yard.solr.configName"; /** * The default value for the maxBooleanClauses of SolrQueries. Set to {@value #DEFAULT_MAX_BOOLEAN_CLAUSES} * the default of Slor 1.4 */ protected static final int DEFAULT_MAX_BOOLEAN_CLAUSES = 1024; /** * Key used to enable/disable committing of update(..) and store(..) operations. Enabling this ensures * that indexed documents are immediately available for searches, but it will also decrease the * performance for updates. */ public static final String IMMEDIATE_COMMIT = "org.apache.stanbol.entityhub.yard.solr.immediateCommit"; /** * By default {@link #IMMEDIATE_COMMIT} is disabled (NOTE: was enabled, but changed with * <a href="https://issues.apache.org/jira/browse/STANBOL-1092">STANBOL-1092</a>) */ public static final boolean DEFAULT_IMMEDIATE_COMMIT_STATE = false; /** * If {@link #IMMEDIATE_COMMIT} is deactivated, than this time is parsed to update(..) and store(..) * operations as the maximum time (in ms) until a commit. */ public static final String COMMIT_WITHIN_DURATION = "org.apache.stanbol.entityhub.yard.solr.commitWithinDuration"; /** * The default value for the {@link #COMMIT_WITHIN_DURATION} parameter is 10 sec. */ public static final int DEFAULT_COMMIT_WITHIN_DURATION = 1000 * 10; /** * Creates a new config with the minimal set of required properties * * @param id * the ID of the Yard * @param solrServer * the base URL of the {@link SolrServer} * @throws IllegalArgumentException * if the parsed valued do not fulfil the requirements. */ public SolrYardConfig(String id, String solrServer) throws IllegalArgumentException { super(id); setSolrServerLocation(solrServer); try { isValid(); } catch (ConfigurationException e) { throw new IllegalArgumentException(e.getMessage(), e); } } /** * Initialise the Yard configuration based on a parsed configuration. Usually used on the context of an * OSGI environment in the activate method. * * @param config * the configuration usually parsed within an OSGI activate method * @throws ConfigurationException * if the configuration is incomplete of some values are not valid * @throws IllegalArgumentException * if <code>null</code> is parsed as configuration */ protected SolrYardConfig(Dictionary<String,Object> config) throws IllegalArgumentException, ConfigurationException { super(config); } /** * Setter for the location of the SolrServer. Might be a URL or a file. * * @param url * the base URL of the SolrServer. Required, NOT NULL. */ public void setSolrServerLocation(String url) { if (url != null) { config.put(SOLR_SERVER_LOCATION, url); } else { config.remove(SOLR_SERVER_LOCATION); } } /** * Getter for the Location of the SolrServer. In case of an remote server this will be the base URL of the * RESTful interface. In case of an embedded Server it is the directory containing the solr.xml or the * directory of the core in case of a multi-core setup. * * @return the URL or path to the SolrServer */ public String getSolrServerLocation() throws IllegalStateException { Object value = config.get(SOLR_SERVER_LOCATION); if (value != null) { return value.toString(); } else { return null; } } /** * Setter for the multi yard index layout state. * <p> * The multi layout state. If data of multiple yards are stored in the same Solr Index, than the YardID * MUST be stored within all indexed documents. In addition the to all queries a fq (filterQuery) must be * added that restricts results to the current yard */ public void setMultiYardIndexLayout(Boolean multiYardIndexLayoutState) { if (multiYardIndexLayoutState != null) { config.put(MULTI_YARD_INDEX_LAYOUT, multiYardIndexLayoutState); } else { config.remove(MULTI_YARD_INDEX_LAYOUT); } } /** * Getter for the multi yard index layout state. * <p> * If data of multiple yards are stored in the same Solr Index, than the YardID MUST be stored within all * indexed documents. In addition the to all queries a fq (filterQuery) must be added that restricts * results to the current yard. * <p> * The default value is <code>false</code> * * @return the multi yard index layout state */ public boolean isMultiYardIndexLayout() { Object value = config.get(MULTI_YARD_INDEX_LAYOUT); if (value != null) { if (value instanceof Boolean) { return (Boolean) value; } else { return Boolean.parseBoolean(value.toString()); } } else { return false; } } /** * Getter for the state if this SolrYard can be initialised by using the default configuration or if it is * required to use a provided configuration. The default is set to <code>true</code>. * <p> * If this property is set to <code>false</code> than the SolrYard can only be initialised if the Index is * already available or the initial configuration is provided to the {@link SolrDirectoryManager}. * * @return the state or <code>true</code> as default */ public boolean isAllowInitialisation() { Object value = config.get(ALLOW_INITIALISATION_STATE); if (value != null) { if (value instanceof Boolean) { return (Boolean) value; } else { return Boolean.parseBoolean(value.toString()); } } else { return DEFAULT_ALLOW_INITIALISATION_STATE; } } /** * Setter for the state if this SolrYard can be initialised by using the default configuration or if it is * required to use a provided configuration. The default is set to <code>true</code>. * <p> * If this property is set to <code>false</code> than the SolrYard can only be initialised if the Index is * already available or the initial configuration is provided to the {@link SolrDirectoryManager}. * * @param defaultInitialisationState * the state or <code>null</code> to remove the current configuration. The default state is * <code>true</code>. */ public void setAllowInitialisation(Boolean defaultInitialisationState) { if (defaultInitialisationState != null) { config.put(ALLOW_INITIALISATION_STATE, defaultInitialisationState); } else { config.remove(ALLOW_INITIALISATION_STATE); } } /** * Getter for the name of the configuration used to initialise the SolrServer. <p> * In case this property is not set the value of {@link #getSolrServerLocation()} * is used as default.<p> * Please NOTE that in case <code>{@link #isAllowInitialisation()} == true</code> * the value of {@link SolrYard#DEFAULT_SOLR_INDEX_CONFIGURATION_NAME} MUST * BE used to initialise the SolrIndex instead of the value returned by this * Method! * @return the name of the configuration of the SolrIndex * @see SolrYard#SOLR_INDEX_CONFIGURATION_NAME * @see SolrYard#ALLOW_INITIALISATION_STATE */ public String getIndexConfigurationName() { Object value = config.get(SOLR_INDEX_CONFIGURATION_NAME); if (value != null) { return value.toString(); } else { return DEFAULT_SOLR_INDEX_CONFIGURATION_NAME; } } /** * Setter for the name of the configuration used to initialise this SolrYard. Parsing <code>null</code>, * empty or equals to the {@link #getSolrServerLocation() Solr serve location} as * name will remove this configuration. * * @param name * the name of the configuration. */ public void setIndexConfigurationName(String name) { if (name == null || name.isEmpty()) { config.remove(SOLR_INDEX_CONFIGURATION_NAME); } else { config.put(SOLR_INDEX_CONFIGURATION_NAME, name); } } /** * Getter for the maximum number of boolean clauses allowed for queries * * @return The configured number of <code>null</code> if not configured or the configured value is not an * valid Integer. */ public int getMaxBooleanClauses() { Object value = config.get(MAX_BOOLEAN_CLAUSES); int clauses; if (value != null) { if (value instanceof Integer) { clauses = ((Integer) value).intValue(); } else { try { clauses = Integer.parseInt(value.toString()); } catch (NumberFormatException e) { log.warn("Unable to parse Integer property '" + MAX_BOOLEAN_CLAUSES + "' from configured value '"+value+"'! Use default '" + DEFAULT_MAX_BOOLEAN_CLAUSES + "' instead.",e); clauses = DEFAULT_MAX_BOOLEAN_CLAUSES; } } } else { clauses = DEFAULT_MAX_BOOLEAN_CLAUSES; } if(clauses < 1){ log.warn("Configured '{}={} is invalid (value MUST BE > 0). Use " + "default {} instead.", new Object[]{ MAX_BOOLEAN_CLAUSES, clauses, DEFAULT_MAX_BOOLEAN_CLAUSES}); clauses = DEFAULT_MAX_BOOLEAN_CLAUSES; } return clauses; } public void setMaxBooleanClauses(Integer integer) { if (integer == null || integer.intValue() <= 0) { config.remove(MAX_BOOLEAN_CLAUSES); } else { config.put(MAX_BOOLEAN_CLAUSES, integer); } } public void setDocumentBoostFieldName(String fieldName) { if (fieldName == null || fieldName.isEmpty()) { config.remove(DOCUMENT_BOOST_FIELD); } else { config.put(DOCUMENT_BOOST_FIELD, fieldName); } } public String getDocumentBoostFieldName() { Object name = config.get(DOCUMENT_BOOST_FIELD); return name == null ? null : name.toString(); } public void setFieldBoosts(Map<String,Float> fieldBoosts) { if (fieldBoosts != null) { config.put(FIELD_BOOST_MAPPINGS, fieldBoosts); } else { config.remove(FIELD_BOOST_MAPPINGS); } } public boolean isImmediateCommit() { Object value = config.get(IMMEDIATE_COMMIT); if (value != null) { if (value instanceof Boolean) { return ((Boolean) value).booleanValue(); } else { return Boolean.parseBoolean(value.toString()); } } else { return DEFAULT_IMMEDIATE_COMMIT_STATE; } } public void setImmediateCommit(Boolean state) { if (state != null) { config.put(IMMEDIATE_COMMIT, state); } else { config.remove(IMMEDIATE_COMMIT); } } public final int getCommitWithinDuration() { Object value = config.get(COMMIT_WITHIN_DURATION); int duration; if (value != null) { if (value instanceof Integer) { duration = ((Integer) value).intValue(); } else { try { duration = Integer.parseInt(value.toString()); } catch (NumberFormatException e) { log.warn("Unable to parse Integer property '" + COMMIT_WITHIN_DURATION + "' from configured value '"+value+"'! Use default " + DEFAULT_COMMIT_WITHIN_DURATION + "ms instead.",e); duration = DEFAULT_COMMIT_WITHIN_DURATION; } } } else { duration = DEFAULT_COMMIT_WITHIN_DURATION; } if(duration <= 0){ log.warn("Configured '{}={}ms is invalid (value MUST BE >= 0). Use " + "default {}ms instead.", new Object[]{ COMMIT_WITHIN_DURATION, duration, DEFAULT_COMMIT_WITHIN_DURATION}); duration = DEFAULT_COMMIT_WITHIN_DURATION; } return duration; } public final void setCommitWithinDuration(Integer duration) { if (duration == null || duration.intValue() <= 0) { config.remove(COMMIT_WITHIN_DURATION); } else { config.put(COMMIT_WITHIN_DURATION, duration); } } @SuppressWarnings("unchecked") public Map<String,Float> getFieldBoosts() { Object fieldBoosts = config.get(FIELD_BOOST_MAPPINGS); if (fieldBoosts == null) { return null; } else if (fieldBoosts instanceof Map<?,?>) { return (Map<String,Float>) fieldBoosts; } else { // TODO: add support for parsing from String[] and Collection<String> return Collections.emptyMap(); } } /** * checks for the {@link SolrYard#SOLR_SERVER_LOCATION} */ @Override protected void validateConfig() throws ConfigurationException { try { String solrServer = getSolrServerLocation(); if (solrServer == null) { throw new ConfigurationException(SOLR_SERVER_LOCATION, "The URL of the Solr server MUST NOT be NULL!"); } } catch (IllegalStateException e) { throw new ConfigurationException(SOLR_SERVER_LOCATION, e.getMessage(), e.getCause()); } } }