/** * Copyright (C) 2014 Cohesive Integrations, LLC (info@cohesiveintegrations.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.di2e.ecdr.libs.result.relevance; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import net.di2e.ecdr.commons.constants.SearchConstants; import net.di2e.ecdr.commons.filter.AbstractFilterDelegate.SupportedGeosOptions; import net.di2e.ecdr.commons.filter.StrictFilterDelegate; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.time.StopWatch; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.opengis.filter.sort.SortBy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ddf.catalog.data.Result; import ddf.catalog.data.impl.ResultImpl; import ddf.catalog.filter.FilterAdapter; import ddf.catalog.operation.Query; import ddf.catalog.source.UnsupportedQueryException; /** * Normalizes the Relevance of a result set by looking at the contextual criteria, then doing a local calculation of * relevance based on the localized result set */ public class RelevanceNormalizer { public static final String RELEVANCE_TIMER = "RELEVANCE TIMER:"; private static final Logger LOGGER = LoggerFactory.getLogger( RelevanceNormalizer.class ); private static final String METADATA_FIELD = "metadata"; private static final String ID_FIELD = "id"; private FilterAdapter filterAdapter; public RelevanceNormalizer( FilterAdapter filterAdapter ) { this.filterAdapter = filterAdapter; } /** * Normalize the relevance score for the results in the query response based on the contextual query criteria * * @param results * @param originalQuery * @return */ public List<Result> normalize( List<Result> results, Query originalQuery ) { SortBy sortBy = originalQuery.getSortBy(); // We want to do relevance sort if no sort order was specfied or if Relevance sort was specified if ( sortBy == null || sortBy.getPropertyName() == null || sortBy.getPropertyName().getPropertyName() == null || Result.RELEVANCE.equals( sortBy.getPropertyName().getPropertyName() ) ) { Map<String, String> filterParameters = getFilterParameters( originalQuery ); if ( canNormalizeQuery( filterParameters ) ) { LOGGER.debug( "Query contained search phrase and will be sorted by relevance, performing re-indexing to normalize relevance." ); Directory directory = null; DirectoryReader iReader = null; Map<String, Result> docMap = new HashMap<>(); List<Result> updatedResults = new ArrayList<>(); StopWatch stopWatch = new StopWatch(); stopWatch.start(); try { Analyzer analyzer = new StandardAnalyzer(); // create memory-stored index directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig( Version.LATEST, analyzer ); IndexWriter iWriter = new IndexWriter( directory, config ); // loop through all of the results and add them to the index for ( Result curResult : results ) { Document doc = new Document(); String text = TextParser.parseTextFrom( curResult.getMetacard().getMetadata() ); String uuid = UUID.randomUUID().toString(); doc.add( new Field( METADATA_FIELD, text, TextField.TYPE_STORED ) ); doc.add( new Field( ID_FIELD, uuid, TextField.TYPE_STORED ) ); iWriter.addDocument( doc ); docMap.put( uuid, curResult ); } IOUtils.closeQuietly( iWriter ); LOGGER.debug( "{} Document indexing finished in {} seconds.", RELEVANCE_TIMER, (double) stopWatch.getTime() / 1000.0 ); // Now search the index: iReader = DirectoryReader.open( directory ); IndexSearcher iSearcher = new IndexSearcher( iReader ); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser( METADATA_FIELD, analyzer ); org.apache.lucene.search.Query query = getQuery( parser, filterParameters ); ScoreDoc[] hits = iSearcher.search( query, null, docMap.size() ).scoreDocs; LOGGER.debug( "Got back {} results", hits.length ); // loop through the indexed search results and update the scores in the original query results for ( ScoreDoc curHit : hits ) { Document doc = iSearcher.doc( curHit.doc ); String uuid = doc.getField( ID_FIELD ).stringValue(); Result result = docMap.get( uuid ); docMap.remove( uuid ); updatedResults.add( updateResult( result, curHit.score ) ); LOGGER.debug( "Relevance for result {} was changed FROM {} TO {}", result.getMetacard().getId(), result.getRelevanceScore(), curHit.score ); } // check if there are any results left that did not match the keyword query for ( Map.Entry<String, Result> curEntry : docMap.entrySet() ) { // add result in with 0 relevance score updatedResults.add( updateResult( curEntry.getValue(), 0 ) ); } // create new query response return updatedResults; } catch ( ParseException | IOException | RuntimeException e ) { LOGGER.warn( "Received an exception while trying to perform re-indexing, sending original queryResponse on.", e ); return results; } finally { IOUtils.closeQuietly( iReader ); IOUtils.closeQuietly( directory ); stopWatch.stop(); LOGGER.debug( "{} Total relevance process took {} seconds.", RELEVANCE_TIMER, (double) stopWatch.getTime() / 1000.0 ); } } else { LOGGER.debug( "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization." ); } } else { LOGGER.debug( "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization." ); } return results; } /** * Checks to see if this query can be normalized. * * @param filterParameters * parameters from original ddf query * @return true if this query can be normalzed, false if not */ protected boolean canNormalizeQuery( Map<String, String> filterParameters ) { return StringUtils.isNotBlank( getSearchPhrase( filterParameters ) ); } protected org.apache.lucene.search.Query getQuery( QueryParser parser, Map<String, String> filterParameters ) throws ParseException { String searchPhrase = getSearchPhrase( filterParameters ); org.apache.lucene.search.Query query = parser.parse( searchPhrase ); if ( filterParameters.containsKey( SearchConstants.FUZZY_PARAMETER ) && StringUtils.equals( filterParameters.get( SearchConstants.FUZZY_PARAMETER ), "1" ) ) { // should get a boolean query for keyword-based searches if ( query instanceof BooleanQuery ) { BooleanQuery booleanQuery = (BooleanQuery) query; for ( BooleanClause clause : booleanQuery.getClauses() ) { if ( clause.getQuery() instanceof TermQuery ) { TermQuery oldQuery = (TermQuery) clause.getQuery(); FuzzyQuery newQuery = new FuzzyQuery( oldQuery.getTerm() ); clause.setQuery( newQuery ); } } } else { LOGGER.debug( "Query was too complex for adding fuzzy. Expected BooleanQuery but ended up being of type {}", query.getClass().getName() ); } } return query; } /** * Pull out the string-based search phrase from a query. * * @param filterParameters * filterparameters from the original query * @return Search phrase or null if no search phrase was found. */ protected String getSearchPhrase( Map<String, String> filterParameters ) { String searchPhrase = null; if ( filterParameters.containsKey( SearchConstants.KEYWORD_PARAMETER ) ) { searchPhrase = filterParameters.get( SearchConstants.KEYWORD_PARAMETER ); } return searchPhrase; } protected Map<String, String> getFilterParameters( Query originalQuery ) { HashMap<String, String> map = new HashMap<>(); try { map.putAll( filterAdapter.adapt( originalQuery, new StrictFilterDelegate( false, SupportedGeosOptions.ALL, Collections.<String, String>emptyMap(), Collections.<String, String>emptyMap() ) ) ); } catch ( UnsupportedQueryException uqe ) { LOGGER.debug( "Query did not contain any contextual criteria (search phrases), cannot perform re-relevance on this query." ); } return map; } /** * Creates a new result with an updated score. * * @param origResult * Original result that contains an older score. * @param newScore * New score to update the result with. * @return Result with updated score. */ protected Result updateResult( Result origResult, float newScore ) { ResultImpl result = new ResultImpl( origResult.getMetacard() ); result.setRelevanceScore( (double) newScore ); result.setDistanceInMeters( origResult.getDistanceInMeters() ); return result; } }