package com.lucidworks.solr.query; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.handler.component.QueryComponent; import org.apache.solr.handler.component.ResponseBuilder; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.FieldCache; import org.apache.solr.search.SolrIndexSearcher; import java.util.HashSet; import java.util.List; import java.util.StringTokenizer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; public class CategoryExtractionComponent extends QueryComponent { private static final Logger Log = LoggerFactory.getLogger( CategoryExtractionComponent.class ); private String categoryField; @Override public void init( NamedList initArgs ) { Log.info( "init ..." ); // the Solr-Lucene field that will be used to create the filter query String catField = (String)initArgs.get( "field" ); if ( catField != null ) { Log.info( "setting category field: " + catField ); this.categoryField = catField; } } @Override public void prepare( ResponseBuilder rb ) throws IOException { SolrQueryRequest req = rb.req; SolrIndexSearcher searcher = req.getSearcher(); SortedDocValues fieldValues = FieldCache.DEFAULT.getTermsIndex( searcher.getAtomicReader( ), categoryField ); SolrParams params = req.getParams( ); ModifiableSolrParams modParams = new ModifiableSolrParams( params ); String qStr = params.get( CommonParams.Q ); // tokenize the query string, if any part of it matches, remove the token from the list and // add a filter query with <categoryField>:value StringTokenizer strtok = new StringTokenizer(qStr, " .,:;\"'" ); StringBuilder strbldr= new StringBuilder( ); while (strtok.hasMoreTokens( ) ) { String tok = strtok.nextToken( ).toLowerCase( ); Log.info( "got token: " + tok ); BytesRef key = new BytesRef( tok.getBytes() ); if (fieldValues.lookupTerm( key ) >= 0) { String fq = new String( categoryField + ":" + tok ); Log.info( "adding fq " + fq ); modParams.add( "fq", fq ); } else { strbldr.append( tok ); if (strbldr.length() > 0) { strbldr.append( " " ); } } } String modQ = strbldr.toString( ); // should we add the category fields here: need to test this first... // if the query is now empty, make sure it hits on everything if (modQ.trim().length() == 0) { modQ = "*:*"; } Log.info( "final q string is: '" + modQ + "'" ); modParams.set( "q", modQ ); req.setParams( modParams ); } @Override public void process(ResponseBuilder rb) throws IOException { // do nothing - needed so we don't execute the query here. } }