/**
* Copyright (C) 2014 Cohesive Integrations, LLC (info@cohesiveintegrations.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.di2e.ecdr.libs.result.relevance;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import net.di2e.ecdr.commons.constants.SearchConstants;
import net.di2e.ecdr.commons.filter.AbstractFilterDelegate.SupportedGeosOptions;
import net.di2e.ecdr.commons.filter.StrictFilterDelegate;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.opengis.filter.sort.SortBy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ddf.catalog.data.Result;
import ddf.catalog.data.impl.ResultImpl;
import ddf.catalog.filter.FilterAdapter;
import ddf.catalog.operation.Query;
import ddf.catalog.source.UnsupportedQueryException;
/**
* Normalizes the Relevance of a result set by looking at the contextual criteria, then doing a local calculation of
* relevance based on the localized result set
*/
public class RelevanceNormalizer {
public static final String RELEVANCE_TIMER = "RELEVANCE TIMER:";
private static final Logger LOGGER = LoggerFactory.getLogger( RelevanceNormalizer.class );
private static final String METADATA_FIELD = "metadata";
private static final String ID_FIELD = "id";
private FilterAdapter filterAdapter;
public RelevanceNormalizer( FilterAdapter filterAdapter ) {
this.filterAdapter = filterAdapter;
}
/**
* Normalize the relevance score for the results in the query response based on the contextual query criteria
*
* @param results
* @param originalQuery
* @return
*/
public List<Result> normalize( List<Result> results, Query originalQuery ) {
SortBy sortBy = originalQuery.getSortBy();
// We want to do relevance sort if no sort order was specfied or if Relevance sort was specified
if ( sortBy == null || sortBy.getPropertyName() == null || sortBy.getPropertyName().getPropertyName() == null || Result.RELEVANCE.equals( sortBy.getPropertyName().getPropertyName() ) ) {
Map<String, String> filterParameters = getFilterParameters( originalQuery );
if ( canNormalizeQuery( filterParameters ) ) {
LOGGER.debug( "Query contained search phrase and will be sorted by relevance, performing re-indexing to normalize relevance." );
Directory directory = null;
DirectoryReader iReader = null;
Map<String, Result> docMap = new HashMap<>();
List<Result> updatedResults = new ArrayList<>();
StopWatch stopWatch = new StopWatch();
stopWatch.start();
try {
Analyzer analyzer = new StandardAnalyzer();
// create memory-stored index
directory = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig( Version.LATEST, analyzer );
IndexWriter iWriter = new IndexWriter( directory, config );
// loop through all of the results and add them to the index
for ( Result curResult : results ) {
Document doc = new Document();
String text = TextParser.parseTextFrom( curResult.getMetacard().getMetadata() );
String uuid = UUID.randomUUID().toString();
doc.add( new Field( METADATA_FIELD, text, TextField.TYPE_STORED ) );
doc.add( new Field( ID_FIELD, uuid, TextField.TYPE_STORED ) );
iWriter.addDocument( doc );
docMap.put( uuid, curResult );
}
IOUtils.closeQuietly( iWriter );
LOGGER.debug( "{} Document indexing finished in {} seconds.", RELEVANCE_TIMER, (double) stopWatch.getTime() / 1000.0 );
// Now search the index:
iReader = DirectoryReader.open( directory );
IndexSearcher iSearcher = new IndexSearcher( iReader );
// Parse a simple query that searches for "text":
QueryParser parser = new QueryParser( METADATA_FIELD, analyzer );
org.apache.lucene.search.Query query = getQuery( parser, filterParameters );
ScoreDoc[] hits = iSearcher.search( query, null, docMap.size() ).scoreDocs;
LOGGER.debug( "Got back {} results", hits.length );
// loop through the indexed search results and update the scores in the original query results
for ( ScoreDoc curHit : hits ) {
Document doc = iSearcher.doc( curHit.doc );
String uuid = doc.getField( ID_FIELD ).stringValue();
Result result = docMap.get( uuid );
docMap.remove( uuid );
updatedResults.add( updateResult( result, curHit.score ) );
LOGGER.debug( "Relevance for result {} was changed FROM {} TO {}", result.getMetacard().getId(), result.getRelevanceScore(), curHit.score );
}
// check if there are any results left that did not match the keyword query
for ( Map.Entry<String, Result> curEntry : docMap.entrySet() ) {
// add result in with 0 relevance score
updatedResults.add( updateResult( curEntry.getValue(), 0 ) );
}
// create new query response
return updatedResults;
} catch ( ParseException | IOException | RuntimeException e ) {
LOGGER.warn( "Received an exception while trying to perform re-indexing, sending original queryResponse on.", e );
return results;
} finally {
IOUtils.closeQuietly( iReader );
IOUtils.closeQuietly( directory );
stopWatch.stop();
LOGGER.debug( "{} Total relevance process took {} seconds.", RELEVANCE_TIMER, (double) stopWatch.getTime() / 1000.0 );
}
} else {
LOGGER.debug( "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization." );
}
} else {
LOGGER.debug( "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization." );
}
return results;
}
/**
* Checks to see if this query can be normalized.
*
* @param filterParameters
* parameters from original ddf query
* @return true if this query can be normalzed, false if not
*/
protected boolean canNormalizeQuery( Map<String, String> filterParameters ) {
return StringUtils.isNotBlank( getSearchPhrase( filterParameters ) );
}
protected org.apache.lucene.search.Query getQuery( QueryParser parser, Map<String, String> filterParameters ) throws ParseException {
String searchPhrase = getSearchPhrase( filterParameters );
org.apache.lucene.search.Query query = parser.parse( searchPhrase );
if ( filterParameters.containsKey( SearchConstants.FUZZY_PARAMETER ) && StringUtils.equals( filterParameters.get( SearchConstants.FUZZY_PARAMETER ), "1" ) ) {
// should get a boolean query for keyword-based searches
if ( query instanceof BooleanQuery ) {
BooleanQuery booleanQuery = (BooleanQuery) query;
for ( BooleanClause clause : booleanQuery.getClauses() ) {
if ( clause.getQuery() instanceof TermQuery ) {
TermQuery oldQuery = (TermQuery) clause.getQuery();
FuzzyQuery newQuery = new FuzzyQuery( oldQuery.getTerm() );
clause.setQuery( newQuery );
}
}
} else {
LOGGER.debug( "Query was too complex for adding fuzzy. Expected BooleanQuery but ended up being of type {}", query.getClass().getName() );
}
}
return query;
}
/**
* Pull out the string-based search phrase from a query.
*
* @param filterParameters
* filterparameters from the original query
* @return Search phrase or null if no search phrase was found.
*/
protected String getSearchPhrase( Map<String, String> filterParameters ) {
String searchPhrase = null;
if ( filterParameters.containsKey( SearchConstants.KEYWORD_PARAMETER ) ) {
searchPhrase = filterParameters.get( SearchConstants.KEYWORD_PARAMETER );
}
return searchPhrase;
}
protected Map<String, String> getFilterParameters( Query originalQuery ) {
HashMap<String, String> map = new HashMap<>();
try {
map.putAll( filterAdapter.adapt( originalQuery,
new StrictFilterDelegate( false, SupportedGeosOptions.ALL, Collections.<String, String>emptyMap(), Collections.<String, String>emptyMap() ) ) );
} catch ( UnsupportedQueryException uqe ) {
LOGGER.debug( "Query did not contain any contextual criteria (search phrases), cannot perform re-relevance on this query." );
}
return map;
}
/**
* Creates a new result with an updated score.
*
* @param origResult
* Original result that contains an older score.
* @param newScore
* New score to update the result with.
* @return Result with updated score.
*/
protected Result updateResult( Result origResult, float newScore ) {
ResultImpl result = new ResultImpl( origResult.getMetacard() );
result.setRelevanceScore( (double) newScore );
result.setDistanceInMeters( origResult.getDistanceInMeters() );
return result;
}
}