package org.apache.maven.index; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.TextFragment; import org.apache.maven.index.context.IndexUtils; import org.apache.maven.index.context.IndexingContext; import org.apache.maven.index.context.NexusIndexMultiSearcher; import org.apache.maven.index.creator.JarFileContentsIndexCreator; /** * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this! * * @author cstamas */ public class DefaultIteratorResultSet implements IteratorResultSet { private final IteratorSearchRequest searchRequest; private final NexusIndexMultiSearcher indexSearcher; private final List<IndexingContext> contexts; private final int[] starts; private final ArtifactInfoFilter filter; private final ArtifactInfoPostprocessor postprocessor; private final List<MatchHighlightRequest> matchHighlightRequests; private final TopDocs hits; private final int from; private final int count; private final int maxRecPointer; private int pointer; private int processedArtifactInfoCount; private ArtifactInfo ai; protected DefaultIteratorResultSet( final IteratorSearchRequest request, final NexusIndexMultiSearcher indexSearcher, final List<IndexingContext> contexts, final TopDocs hits ) throws IOException { this.searchRequest = request; this.indexSearcher = indexSearcher; this.contexts = contexts; { int maxDoc = 0; this.starts = new int[contexts.size() + 1]; // build starts array // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence // #acquire() already invoked on underlying NexusIndexMultiReader final List<IndexSearcher> acquiredSearchers = indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers(); for ( int i = 0; i < contexts.size(); i++ ) { starts[i] = maxDoc; maxDoc += acquiredSearchers.get( i ).getIndexReader().maxDoc(); // compute maxDocs } starts[contexts.size()] = maxDoc; } this.filter = request.getArtifactInfoFilter(); this.postprocessor = request.getArtifactInfoPostprocessor(); this.matchHighlightRequests = request.getMatchHighlightRequests(); List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<MatchHighlightRequest>(); for ( MatchHighlightRequest hr : request.getMatchHighlightRequests() ) { Query rewrittenQuery = hr.getQuery().rewrite( indexSearcher.getIndexReader() ); matchHighlightRequests.add( new MatchHighlightRequest( hr.getField(), rewrittenQuery, hr.getHighlightMode() ) ); } this.hits = hits; this.from = request.getStart(); this.count = ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min( request.getCount(), hits.scoreDocs.length ) ); this.pointer = from; this.processedArtifactInfoCount = 0; this.maxRecPointer = from + count; ai = createNextAi(); if ( ai == null ) { cleanUp(); } } public boolean hasNext() { return ai != null; } public ArtifactInfo next() { ArtifactInfo result = ai; try { ai = createNextAi(); } catch ( IOException e ) { ai = null; throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e ); } finally { if ( ai == null ) { cleanUp(); } } return result; } public void remove() { throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() ); } public Iterator<ArtifactInfo> iterator() { return this; } public void close() { cleanUp(); } public int getTotalProcessedArtifactInfoCount() { return processedArtifactInfoCount; } @Override public void finalize() throws Throwable { super.finalize(); if ( !cleanedUp ) { System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query " + searchRequest.getQuery().toString() ); cleanUp(); } } // == protected ArtifactInfo createNextAi() throws IOException { ArtifactInfo result = null; // we should stop if: // a) we found what we want // b) pointer advanced over more documents that user requested // c) pointer advanced over more documents that hits has // or we found what we need while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) ) { Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc ); IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc ); result = IndexUtils.constructArtifactInfo( doc, context ); if ( result != null ) { // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION // For debugging only!!! if ( searchRequest.isLuceneExplain() ) { result.getAttributes().put( Explanation.class.getName(), indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() ); } result.setLuceneScore( hits.scoreDocs[pointer].score ); result.setRepository( context.getRepositoryId() ); result.setContext( context.getId() ); if ( filter != null ) { if ( !filter.accepts( context, result ) ) { result = null; } } if ( result != null && postprocessor != null ) { postprocessor.postprocess( context, result ); } if ( result != null && matchHighlightRequests.size() > 0 ) { calculateHighlights( context, doc, result ); } } pointer++; processedArtifactInfoCount++; } return result; } private volatile boolean cleanedUp = false; protected synchronized void cleanUp() { if ( cleanedUp ) { return; } try { indexSearcher.release(); } catch ( IOException e ) { throw new IllegalStateException( e ); } this.cleanedUp = true; } /** * Creates the MatchHighlights and adds them to ArtifactInfo if found/can. * * @param context * @param d * @param ai */ protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai ) throws IOException { IndexerField field = null; String text = null; List<String> highlightFragment = null; for ( MatchHighlightRequest hr : matchHighlightRequests ) { field = selectStoredIndexerField( hr.getField() ); if ( field != null ) { text = ai.getFieldValue( field.getOntology() ); if ( text != null ) { highlightFragment = highlightField( context, hr, field, text ); if ( highlightFragment != null && highlightFragment.size() > 0 ) { MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment ); ai.getMatchHighlights().add( matchHighlight ); } } } } } /** * Select a STORED IndexerField assigned to passed in Field. * * @param field * @return */ protected IndexerField selectStoredIndexerField( Field field ) { // hack here if ( MAVEN.CLASSNAMES.equals( field ) ) { return JarFileContentsIndexCreator.FLD_CLASSNAMES; } else { return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next(); } } /** * Returns a string that contains match fragment highlighted in style as user requested. * * @param context * @param hr * @param field * @param text * @return * @throws IOException */ protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text ) throws IOException { // exception with classnames if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) ) { text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" ); } Analyzer analyzer = context.getAnalyzer(); TokenStream baseTokenStream = analyzer.tokenStream( field.getKey(), new StringReader( text ) ); CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream); Formatter formatter = null; if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) ) { formatter = new SimpleHTMLFormatter(); } else { tokenStream.reset(); tokenStream.end(); tokenStream.close(); throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!" ); } List<String> bestFragments = getBestFragments( hr.getQuery(), formatter, tokenStream, text, 3 ); return bestFragments; } protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag; // Get text ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); try { frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } } catch ( InvalidTokenOffsetsException e ) { // empty? } return fragTexts; } protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr ) { return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) ); } private static int readerIndex( int n, int[] starts, int numSubReaders ) { // find reader for doc n: int lo = 0; // search starts array int hi = numSubReaders - 1; // for first element less while ( hi >= lo ) { int mid = ( lo + hi ) >>> 1; int midValue = starts[mid]; if ( n < midValue ) { hi = mid - 1; } else if ( n > midValue ) { lo = mid + 1; } else { // found a match while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue ) { mid++; // scan to last match } return mid; } } return hi; } }