/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.component; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.WeakHashMap; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.QueryElevationParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.DOMUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.Config; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.StrField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SortSpec; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.VersionedFile; import org.apache.solr.util.RefCounted; import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.request.SolrQueryRequest; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * A component to elevate some documents to the top of the result set. * * @version $Id: QueryElevationComponent.java 990577 2010-08-29 13:16:56Z gsingers $ * @since solr 1.3 */ public class QueryElevationComponent extends SearchComponent implements SolrCoreAware { private static Logger log = LoggerFactory.getLogger(QueryElevationComponent.class); // Constants used in solrconfig.xml static final String FIELD_TYPE = "queryFieldType"; static final String CONFIG_FILE = "config-file"; static final String EXCLUDE = "exclude"; // Runtime param -- should be in common? private SolrParams initArgs = null; private Analyzer analyzer = null; private String idField = null; boolean forceElevation = false; // For each IndexReader, keep a query->elevation map // When the configuration is loaded from the data directory. // The key is null if loaded from the config directory, and // is never re-loaded. final Map<IndexReader,Map<String, ElevationObj>> elevationCache = new WeakHashMap<IndexReader, Map<String,ElevationObj>>(); class ElevationObj { final String text; final String analyzed; final BooleanClause[] exclude; final BooleanQuery include; final Map<BytesRef,Integer> priority; // use singletons so hashCode/equals on Sort will just work final FieldComparatorSource comparatorSource; ElevationObj( String qstr, List<String> elevate, List<String> exclude ) throws IOException { this.text = qstr; this.analyzed = getAnalyzedQuery( this.text ); this.include = new BooleanQuery(); this.include.setBoost( 0 ); this.priority = new HashMap<BytesRef, Integer>(); int max = elevate.size()+5; for( String id : elevate ) { TermQuery tq = new TermQuery( new Term( idField, id ) ); include.add( tq, BooleanClause.Occur.SHOULD ); this.priority.put( new BytesRef(id), max-- ); } if( exclude == null || exclude.isEmpty() ) { this.exclude = null; } else { this.exclude = new BooleanClause[exclude.size()]; for( int i=0; i<exclude.size(); i++ ) { TermQuery tq = new TermQuery( new Term( idField, exclude.get(i) ) ); this.exclude[i] = new BooleanClause( tq, BooleanClause.Occur.MUST_NOT ); } } this.comparatorSource = new ElevationComparatorSource(priority); } } @Override public void init( NamedList args ) { this.initArgs = SolrParams.toSolrParams( args ); } public void inform(SolrCore core) { String a = initArgs.get( FIELD_TYPE ); if( a != null ) { FieldType ft = core.getSchema().getFieldTypes().get( a ); if( ft == null ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown FieldType: '"+a+"' used in QueryElevationComponent" ); } analyzer = ft.getQueryAnalyzer(); } SchemaField sf = core.getSchema().getUniqueKeyField(); if( sf == null || !(sf.getType() instanceof StrField)) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent requires the schema to have a uniqueKeyField implemented using StrField" ); } idField = StringHelper.intern(sf.getName()); forceElevation = initArgs.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation ); try { synchronized( elevationCache ) { elevationCache.clear(); String f = initArgs.get( CONFIG_FILE ); if( f == null ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent must specify argument: '"+CONFIG_FILE +"' -- path to elevate.xml" ); } File fC = new File( core.getResourceLoader().getConfigDir(), f ); File fD = new File( core.getDataDir(), f ); if( fC.exists() == fD.exists() ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent missing config file: '"+f + "\n" +"either: "+fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both." ); } if( fC.exists() ) { log.info( "Loading QueryElevation from: "+fC.getAbsolutePath() ); Config cfg = new Config( core.getResourceLoader(), f ); elevationCache.put(null, loadElevationMap( cfg )); } else { // preload the first data RefCounted<SolrIndexSearcher> searchHolder = null; try { searchHolder = core.getNewestSearcher(false); IndexReader reader = searchHolder.get().getReader(); getElevationMap( reader, core ); } finally { if (searchHolder != null) searchHolder.decref(); } } } } catch( Exception ex ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error initializing QueryElevationComponent.", ex ); } } Map<String, ElevationObj> getElevationMap( IndexReader reader, SolrCore core ) throws Exception { synchronized( elevationCache ) { Map<String, ElevationObj> map = elevationCache.get( null ); if (map != null) return map; map = elevationCache.get( reader ); if( map == null ) { String f = initArgs.get( CONFIG_FILE ); if( f == null ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent must specify argument: "+CONFIG_FILE ); } log.info( "Loading QueryElevation from data dir: "+f ); InputStream is = VersionedFile.getLatestFile( core.getDataDir(), f ); Config cfg = new Config( core.getResourceLoader(), f, is, null ); map = loadElevationMap( cfg ); elevationCache.put( reader, map ); } return map; } } private Map<String, ElevationObj> loadElevationMap( Config cfg ) throws IOException { XPath xpath = XPathFactory.newInstance().newXPath(); Map<String, ElevationObj> map = new HashMap<String, ElevationObj>(); NodeList nodes = (NodeList)cfg.evaluate( "elevate/query", XPathConstants.NODESET ); for (int i=0; i<nodes.getLength(); i++) { Node node = nodes.item( i ); String qstr = DOMUtil.getAttr( node, "text", "missing query 'text'" ); NodeList children = null; try { children = (NodeList)xpath.evaluate("doc", node, XPathConstants.NODESET); } catch (XPathExpressionException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "query requires '<doc .../>' child" ); } ArrayList<String> include = new ArrayList<String>(); ArrayList<String> exclude = new ArrayList<String>(); for (int j=0; j<children.getLength(); j++) { Node child = children.item(j); String id = DOMUtil.getAttr( child, "id", "missing 'id'" ); String e = DOMUtil.getAttr( child, EXCLUDE, null ); if( e != null ) { if( Boolean.valueOf( e ) ) { exclude.add( id ); continue; } } include.add( id ); } ElevationObj elev = new ElevationObj( qstr, include, exclude ); if( map.containsKey( elev.analyzed ) ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Boosting query defined twice for query: '"+elev.text+"' ("+elev.analyzed+"')" ); } map.put( elev.analyzed, elev ); } return map; } /** * Helpful for testing without loading config.xml * @throws IOException */ void setTopQueryResults( IndexReader reader, String query, String[] ids, String[] ex ) throws IOException { if( ids == null ) { ids = new String[0]; } if( ex == null ) { ex = new String[0]; } Map<String,ElevationObj> elev = elevationCache.get( reader ); if( elev == null ) { elev = new HashMap<String, ElevationObj>(); elevationCache.put( reader, elev ); } ElevationObj obj = new ElevationObj( query, Arrays.asList(ids), Arrays.asList(ex) ); elev.put( obj.analyzed, obj ); } String getAnalyzedQuery( String query ) throws IOException { if( analyzer == null ) { return query; } StringBuilder norm = new StringBuilder(); TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) ); tokens.reset(); CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); while( tokens.incrementToken() ) { norm.append( termAtt.buffer(), 0, termAtt.length() ); } return norm.toString(); } //--------------------------------------------------------------------------------- // SearchComponent //--------------------------------------------------------------------------------- @Override public void prepare(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrParams params = req.getParams(); // A runtime param can skip if( !params.getBool( QueryElevationParams.ENABLE, true ) ) { return; } boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false); // A runtime parameter can alter the config value for forceElevation boolean force = params.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation ); Query query = rb.getQuery(); String qstr = rb.getQueryString(); if( query == null || qstr == null) { return; } qstr = getAnalyzedQuery(qstr); IndexReader reader = req.getSearcher().getReader(); ElevationObj booster = null; try { booster = getElevationMap( reader, req.getCore() ).get( qstr ); } catch( Exception ex ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", ex ); } if( booster != null ) { // Change the query to insert forced documents if (exclusive == true){ //we only want these results rb.setQuery(booster.include); } else { BooleanQuery newq = new BooleanQuery( true ); newq.add( query, BooleanClause.Occur.SHOULD ); newq.add( booster.include, BooleanClause.Occur.SHOULD ); if( booster.exclude != null ) { for( BooleanClause bq : booster.exclude ) { newq.add( bq ); } } rb.setQuery( newq ); } // if the sort is 'score desc' use a custom sorting method to // insert documents in their proper place SortSpec sortSpec = rb.getSortSpec(); if( sortSpec.getSort() == null ) { sortSpec.setSort( new Sort( new SortField[] { new SortField(idField, booster.comparatorSource, false ), new SortField(null, SortField.SCORE, false) })); } else { // Check if the sort is based on score boolean modify = false; SortField[] current = sortSpec.getSort().getSort(); ArrayList<SortField> sorts = new ArrayList<SortField>( current.length + 1 ); // Perhaps force it to always sort by score if( force && current[0].getType() != SortField.SCORE ) { sorts.add( new SortField(idField, booster.comparatorSource, false ) ); modify = true; } for( SortField sf : current ) { if( sf.getType() == SortField.SCORE ) { sorts.add( new SortField(idField, booster.comparatorSource, sf.getReverse() ) ); modify = true; } sorts.add( sf ); } if( modify ) { sortSpec.setSort( new Sort( sorts.toArray( new SortField[sorts.size()] ) ) ); } } } // Add debugging information if( rb.isDebug() ) { List<String> match = null; if( booster != null ) { // Extract the elevated terms into a list match = new ArrayList<String>(booster.priority.size()); for( Object o : booster.include.clauses() ) { TermQuery tq = (TermQuery)((BooleanClause)o).getQuery(); match.add( tq.getTerm().text() ); } } SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>(); dbg.add( "q", qstr ); dbg.add( "match", match ); if (rb.isDebugQuery()) { rb.addDebugInfo("queryBoosting", dbg ); } } } @Override public void process(ResponseBuilder rb) throws IOException { // Do nothing -- the real work is modifying the input query } //--------------------------------------------------------------------------------- // SolrInfoMBean //--------------------------------------------------------------------------------- @Override public String getDescription() { return "Query Boosting -- boost particular documents for a given query"; } @Override public String getVersion() { return "$Revision: 990577 $"; } @Override public String getSourceId() { return "$Id: QueryElevationComponent.java 990577 2010-08-29 13:16:56Z gsingers $"; } @Override public String getSource() { return "$URL: https://svn.apache.org/repos/asf/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java $"; } @Override public URL[] getDocs() { try { return new URL[] { new URL("http://wiki.apache.org/solr/QueryElevationComponent") }; } catch (MalformedURLException e) { throw new RuntimeException( e ); } } } class ElevationComparatorSource extends FieldComparatorSource { private final Map<BytesRef,Integer> priority; public ElevationComparatorSource( final Map<BytesRef,Integer> boosts) { this.priority = boosts; } public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { return new FieldComparator() { FieldCache.DocTermsIndex idIndex; private final int[] values = new int[numHits]; int bottomVal; private final BytesRef tempBR = new BytesRef(); public int compare(int slot1, int slot2) { return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern } public void setBottom(int slot) { bottomVal = values[slot]; } private int docVal(int doc) throws IOException { BytesRef id = idIndex.getTerm(doc, tempBR); Integer prio = priority.get(id); return prio == null ? 0 : prio.intValue(); } public int compareBottom(int doc) throws IOException { return docVal(doc) - bottomVal; } public void copy(int slot, int doc) throws IOException { values[slot] = docVal(doc); } public void setNextReader(IndexReader reader, int docBase) throws IOException { idIndex = FieldCache.DEFAULT.getTermsIndex(reader, fieldname); } public Comparable value(int slot) { return values[slot]; } }; } }