/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search.similarities; import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity; // jdoc import org.apache.lucene.search.similarities.Similarity; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.SolrException; import static org.apache.solr.common.SolrException.ErrorCode.*; /** * <p>Factory for {@link SweetSpotSimilarity}.</p> * <p> * <code>SweetSpotSimilarity</code> is an extension of * {@link ClassicSimilarity} that provides additional tuning options for * specifying the "sweetspot" of optimal <code>tf</code> and * <code>lengthNorm</code> values in the source data. * </p> * <p> * In addition to the <code>discountOverlaps</code> init param supported by * {@link ClassicSimilarityFactory} The following sets of init params are * supported by this factory: * </p> * <ul> * <li>Length Norm Settings: <ul> * <li><code>lengthNormMin</code> (int)</li> * <li><code>lengthNormMax</code> (int)</li> * <li><code>lengthNormSteepness</code> (float)</li> * </ul></li> * <li>Baseline TF Settings: <ul> * <li><code>baselineTfBase</code> (float)</li> * <li><code>baselineTfMin</code> (float)</li> * </ul></li> * <li>Hyperbolic TF Settings: <ul> * <li><code>hyperbolicTfMin</code> (float)</li> * <li><code>hyperbolicTfMax</code> (float)</li> * <li><code>hyperbolicTfBase</code> (double)</li> * <li><code>hyperbolicTfOffset</code> (float)</li> * </ul></li> * </ul> * <p> * Note: * </p> * <ul> * <li>If any individual settings from one of the above mentioned sets * are specified, then all settings from that set must be specified. * </li> * <li>If Baseline TF settings are specified, then Hyperbolic TF settings * are not permitted, and vice versa. (The settings specified will * determine whether {@link SweetSpotSimilarity#baselineTf} or * {@link SweetSpotSimilarity#hyperbolicTf} will be used. * </li> * </ul> * <p> * Example usage... * </p> * <pre class="prettyprint"> * <!-- using baseline TF --> * <fieldType name="text_baseline" class="solr.TextField" * indexed="true" stored="false"> * <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/> * <similarity class="solr.SweetSpotSimilarityFactory"> * <!-- TF --> * <float name="baselineTfMin">6.0</float> * <float name="baselineTfBase">1.5</float> * <!-- plateau norm --> * <int name="lengthNormMin">3</int> * <int name="lengthNormMax">5</int> * <float name="lengthNormSteepness">0.5</float> * </similarity> * </fieldType> * * <!-- using hyperbolic TF --> * <fieldType name="text_hyperbolic" class="solr.TextField" * indexed="true" stored="false" > * <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/> * <similarity class="solr.SweetSpotSimilarityFactory"> * <float name="hyperbolicTfMin">3.3</float> * <float name="hyperbolicTfMax">7.7</float> * <double name="hyperbolicTfBase">2.718281828459045</double> <!-- e --> * <float name="hyperbolicTfOffset">5.0</float> * <!-- plateau norm, shallower slope --> * <int name="lengthNormMin">1</int> * <int name="lengthNormMax">5</int> * <float name="lengthNormSteepness">0.2</float> * </similarity> * </fieldType> * </pre> * @see SweetSpotSimilarity The javadocs for the individual methods in * <code>SweetSpotSimilarity</code> for SVG diagrams showing how the * each function behaves with various settings/inputs. */ public class SweetSpotSimilarityFactory extends ClassicSimilarityFactory { private SweetSpotSimilarity sim = null; @Override public void init(SolrParams params) { super.init(params); Integer ln_min = params.getInt("lengthNormMin"); Integer ln_max = params.getInt("lengthNormMax"); Float ln_steep = params.getFloat("lengthNormSteepness"); if (! allOrNoneNull(ln_min, ln_max, ln_steep)) { throw new SolrException(SERVER_ERROR, "Overriding default lengthNorm settings requires all to be specified: lengthNormMin, lengthNormMax, lengthNormSteepness"); } Float hyper_min = params.getFloat("hyperbolicTfMin"); Float hyper_max = params.getFloat("hyperbolicTfMax"); Double hyper_base = params.getDouble("hyperbolicTfBase"); Float hyper_offset = params.getFloat("hyperbolicTfOffset"); if (! allOrNoneNull(hyper_min, hyper_max, hyper_base, hyper_offset)) { throw new SolrException(SERVER_ERROR, "Overriding default hyperbolicTf settings requires all to be specified: hyperbolicTfMin, hyperbolicTfMax, hyperbolicTfBase, hyperbolicTfOffset"); } Float baseline_base = params.getFloat("baselineTfBase"); Float baseline_min = params.getFloat("baselineTfMin"); if (! allOrNoneNull(baseline_min, baseline_base)) { throw new SolrException(SERVER_ERROR, "Overriding default baselineTf settings requires all to be specified: baselineTfBase, baselineTfMin"); } // sanity check that they aren't trying to use two diff tf impls if ((null != hyper_min) && (null != baseline_min)) { throw new SolrException(SERVER_ERROR, "Can not mix hyperbolicTf settings with baselineTf settings"); } // pick Similarity impl based on whether hyper tf settings are set sim = (null != hyper_min) ? new HyperbolicSweetSpotSimilarity() : new SweetSpotSimilarity(); if (null != ln_min) { // overlaps already handled by super factory sim.setLengthNormFactors(ln_min, ln_max, ln_steep, this.discountOverlaps); } if (null != hyper_min) { sim.setHyperbolicTfFactors(hyper_min, hyper_max, hyper_base, hyper_offset); } if (null != baseline_min) { sim.setBaselineTfFactors(baseline_base, baseline_min); } } @Override public Similarity getSimilarity() { assert sim != null : "SweetSpotSimilarityFactory was not initialized"; return sim; } /** * Returns true if either: all of the specified arguments are null; * or none of the specified arguments are null */ private static boolean allOrNoneNull(Object... args) { int nulls = 0; int objs = 0; for (Object o : args) { objs++; if (null == o) nulls++; } return (0 == nulls || nulls == objs); } private static final class HyperbolicSweetSpotSimilarity extends SweetSpotSimilarity { @Override public float tf(float freq) { return hyperbolicTf(freq); } } }