DefaultSearchEngine.java example

Explorer
maven-indexer-master
package org.apache.maven.index;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0    
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import javax.inject.Named;
import javax.inject.Singleton;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.context.NexusIndexMultiReader;
import org.apache.maven.index.context.NexusIndexMultiSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A default search engine implementation
 * 
 * @author Eugene Kuleshov
 * @author Tamas Cservenak
 */
@Singleton
@Named
public class DefaultSearchEngine
    implements SearchEngine
{

    private final Logger logger = LoggerFactory.getLogger( getClass() );

    protected Logger getLogger()
    {
        return logger;
    }

    @Deprecated
    public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
                                         IndexingContext indexingContext, Query query )
        throws IOException
    {
        return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator, indexingContext ),
            Arrays.asList( indexingContext ), true ).getResults();
    }

    @Deprecated
    public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
                                         Collection<IndexingContext> indexingContexts, Query query )
        throws IOException
    {
        return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator ), indexingContexts ).getResults();
    }

    public FlatSearchResponse searchFlatPaged( FlatSearchRequest request, Collection<IndexingContext> indexingContexts )
        throws IOException
    {
        return searchFlatPaged( request, indexingContexts, false );
    }

    public FlatSearchResponse forceSearchFlatPaged( FlatSearchRequest request,
                                                    Collection<IndexingContext> indexingContexts )
        throws IOException
    {
        return searchFlatPaged( request, indexingContexts, true );
    }

    protected FlatSearchResponse searchFlatPaged( FlatSearchRequest request,
                                                  Collection<IndexingContext> indexingContexts, boolean ignoreContext )
        throws IOException
    {
        List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );

        final TreeSet<ArtifactInfo> result = new TreeSet<ArtifactInfo>( request.getArtifactInfoComparator() );
        return new FlatSearchResponse( request.getQuery(), searchFlat( request, result, contexts, request.getQuery() ),
            result );
    }

    // ==

    public GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
                                                Collection<IndexingContext> indexingContexts )
        throws IOException
    {
        return searchGrouped( request, indexingContexts, false );
    }

    public GroupedSearchResponse forceSearchGrouped( GroupedSearchRequest request,
                                                     Collection<IndexingContext> indexingContexts )
        throws IOException
    {
        return searchGrouped( request, indexingContexts, true );
    }

    protected GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
                                                   Collection<IndexingContext> indexingContexts, boolean ignoreContext )
        throws IOException
    {
        List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );

        final TreeMap<String, ArtifactInfoGroup> result =
            new TreeMap<String, ArtifactInfoGroup>( request.getGroupKeyComparator() );

        return new GroupedSearchResponse( request.getQuery(), searchGrouped( request, result, request.getGrouping(),
            contexts, request.getQuery() ), result );
    }

    // ===

    protected int searchFlat( FlatSearchRequest req, Collection<ArtifactInfo> result,
                              List<IndexingContext> participatingContexts, Query query )
        throws IOException
    {
        int hitCount = 0;
        for ( IndexingContext context : participatingContexts )
        {
            final IndexSearcher indexSearcher = context.acquireIndexSearcher();
            try
            {
                final TopScoreDocCollector collector = doSearchWithCeiling( req, indexSearcher, query );

                if ( collector.getTotalHits() == 0 )
                {
                    // context has no hits, just continue to next one
                    continue;
                }

                ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;

                // uhm btw hitCount contains dups

                hitCount += collector.getTotalHits();

                int start = 0; // from == FlatSearchRequest.UNDEFINED ? 0 : from;

                // we have to pack the results as long: a) we have found aiCount ones b) we depleted hits
                for ( int i = start; i < scoreDocs.length; i++ )
                {
                    Document doc = indexSearcher.doc( scoreDocs[i].doc );

                    ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );

                    if ( artifactInfo != null )
                    {
                        artifactInfo.setRepository( context.getRepositoryId() );
                        artifactInfo.setContext( context.getId() );

                        if ( req.getArtifactInfoFilter() != null )
                        {
                            if ( !req.getArtifactInfoFilter().accepts( context, artifactInfo ) )
                            {
                                continue;
                            }
                        }
                        if ( req.getArtifactInfoPostprocessor() != null )
                        {
                            req.getArtifactInfoPostprocessor().postprocess( context, artifactInfo );
                        }

                        result.add( artifactInfo );
                    }
                }
            }
            finally
            {
                context.releaseIndexSearcher( indexSearcher );
            }
        }

        return hitCount;
    }

    protected int searchGrouped( GroupedSearchRequest req, Map<String, ArtifactInfoGroup> result, Grouping grouping,
                                 List<IndexingContext> participatingContexts, Query query )
        throws IOException
    {
        int hitCount = 0;

        for ( IndexingContext context : participatingContexts )
        {
            final IndexSearcher indexSearcher = context.acquireIndexSearcher();
            try
            {
                final TopScoreDocCollector collector = doSearchWithCeiling( req, indexSearcher, query );

                if ( collector.getTotalHits() > 0 )
                {
                    ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;

                    hitCount += collector.getTotalHits();

                    for ( int i = 0; i < scoreDocs.length; i++ )
                    {
                        Document doc = indexSearcher.doc( scoreDocs[i].doc );

                        ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );

                        if ( artifactInfo != null )
                        {
                            artifactInfo.setRepository( context.getRepositoryId() );
                            artifactInfo.setContext( context.getId() );

                            if ( req.getArtifactInfoFilter() != null )
                            {
                                if ( !req.getArtifactInfoFilter().accepts( context, artifactInfo ) )
                                {
                                    continue;
                                }
                            }
                            if ( req.getArtifactInfoPostprocessor() != null )
                            {
                                req.getArtifactInfoPostprocessor().postprocess( context, artifactInfo );
                            }

                            if ( !grouping.addArtifactInfo( result, artifactInfo ) )
                            {
                                // fix the hitCount accordingly
                                hitCount--;
                            }
                        }
                    }
                }
            }
            finally
            {
                context.releaseIndexSearcher( indexSearcher );
            }
        }

        return hitCount;
    }

    // == NG Search

    public IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
                                                       Collection<IndexingContext> indexingContexts )
        throws IOException
    {
        return searchIteratorPaged( request, indexingContexts, false );
    }

    public IteratorSearchResponse forceSearchIteratorPaged( IteratorSearchRequest request,
                                                            Collection<IndexingContext> indexingContexts )
        throws IOException
    {
        return searchIteratorPaged( request, indexingContexts, true );
    }

    private IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
                                                        Collection<IndexingContext> indexingContexts,
                                                        boolean ignoreContext )
        throws IOException
    {
        List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );

        NexusIndexMultiReader multiReader = getMergedIndexReader( indexingContexts, ignoreContext );

        NexusIndexMultiSearcher indexSearcher = new NexusIndexMultiSearcher( multiReader );

        try
        {
            TopScoreDocCollector hits = doSearchWithCeiling( request, indexSearcher, request.getQuery() );

            return new IteratorSearchResponse( request.getQuery(), hits.getTotalHits(),
                                               new DefaultIteratorResultSet( request, indexSearcher, contexts,
                                                                             hits.topDocs() ) );
        }
        catch ( IOException e )
        {
            try
            {
                indexSearcher.release();
            }
            catch ( Exception secondary )
            {
                // do not mask original exception
            }
            throw e;
        }
        catch ( RuntimeException e )
        {
            try
            {
                indexSearcher.release();
            }
            catch ( Exception secondary )
            {
                // do not mask original exception
            }
            throw e;
        }
    }

    // ==

    protected TopScoreDocCollector doSearchWithCeiling( final AbstractSearchRequest request,
                                                        final IndexSearcher indexSearcher, final Query query )
        throws IOException
    {
        int topHitCount = getTopDocsCollectorHitNum( request, AbstractSearchRequest.UNDEFINED );

        if ( AbstractSearchRequest.UNDEFINED != topHitCount )
        {
            // count is set, simply just execute it as-is
            final TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount );

            indexSearcher.search( query, hits );

            return hits;
        }
        else
        {
            // set something reasonable as 1k
            topHitCount = 1000;

            // perform search
            TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount );
            indexSearcher.search( query, hits );

            // check total hits against, does it fit?
            if ( topHitCount < hits.getTotalHits() )
            {
                topHitCount = hits.getTotalHits();

                if ( getLogger().isDebugEnabled() )
                {
                    // warn the user and leave trace just before OOM might happen
                    // the hits.getTotalHits() might be HUUGE
                    getLogger().debug(
                        "Executing unbounded search, and fitting topHitCounts to "
                            + topHitCount
                            + ", an OOMEx might follow. To avoid OOM use narrower queries or limit your expectancy with request.setCount() method where appropriate. See MINDEXER-14 for details." );
                }

                // redo all, but this time with correct numbers
                hits = TopScoreDocCollector.create( topHitCount );
                indexSearcher.search( query, hits );
            }

            return hits;
        }
    }

    /**
     * Returns the list of participating contexts. Does not locks them, just builds a list of them.
     */
    protected List<IndexingContext> getParticipatingContexts( final Collection<IndexingContext> indexingContexts,
                                                              final boolean ignoreContext )
    {
        // to not change the API all away, but we need stable ordering here
        // filter for those 1st, that take part in here
        final ArrayList<IndexingContext> contexts = new ArrayList<IndexingContext>( indexingContexts.size() );

        for ( IndexingContext ctx : indexingContexts )
        {
            if ( ignoreContext || ctx.isSearchable() )
            {
                contexts.add( ctx );
            }
        }

        return contexts;
    }

    /**
     * Locks down participating contexts, and returns a "merged" reader of them. In case of error, unlocks as part of
     * cleanup and re-throws exception. Without error, it is the duty of caller to unlock contexts!
     * 
     * @param indexingContexts
     * @param ignoreContext
     * @return
     * @throws IOException
     */
    protected NexusIndexMultiReader getMergedIndexReader( final Collection<IndexingContext> indexingContexts,
                                                          final boolean ignoreContext )
        throws IOException
    {
        final List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
        return new NexusIndexMultiReader( contexts );
    }

    protected int getTopDocsCollectorHitNum( final AbstractSearchRequest request, final int ceiling )
    {
        if ( request instanceof AbstractSearchPageableRequest )
        {
            final AbstractSearchPageableRequest prequest = (AbstractSearchPageableRequest) request;

            if ( AbstractSearchRequest.UNDEFINED != prequest.getCount() )
            {
                // easy, user knows and tells us how many results he want
                return prequest.getCount() + prequest.getStart();
            }
        }
        else
        {
            if ( AbstractSearchRequest.UNDEFINED != request.getCount() )
            {
                // easy, user knows and tells us how many results he want
                return request.getCount();
            }
        }

        return ceiling;
    }
}