/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.index.lucene;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import javax.jcr.query.qom.Constraint;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LRUQueryCache;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.Bits;
import org.modeshape.common.annotation.Immutable;
import org.modeshape.common.annotation.ThreadSafe;
import org.modeshape.common.logging.Logger;
import org.modeshape.common.util.NamedThreadFactory;
import org.modeshape.jcr.cache.NodeKey;
import org.modeshape.jcr.index.lucene.query.LuceneQueryFactory;
import org.modeshape.jcr.spi.index.IndexConstraints;
import org.modeshape.jcr.spi.index.provider.Filter;
/**
* Class which handles the actual Lucene searching for the {@link LuceneIndexProvider}
*
* @author Horia Chiorean (hchiorea@redhat.com)
* @since 4.5
*/
@Immutable
@ThreadSafe
public class Searcher {
// the implicit score that will be used when no explicit scoring is requested
protected static final float DEFAULT_SCORE = 1.0f;
private static final Logger LOGGER = Logger.getLogger(Searcher.class);
private static final int MAX_QUERIES_TO_CACHE = 200;
private static final long MAX_RAM_BYTES_TO_USE = 50 * 1024L * 1024L;
private static final Set<String> ID_FIELD_SET = Collections.singleton(FieldUtil.ID);
private final SearcherManager searchManager;
private final ScheduledExecutorService searchManagerRefreshService;
private final ScheduledFuture<?> searchManagerRefreshResult;
private final QueryCache queryCache;
protected Searcher( LuceneConfig config, IndexWriter writer, String name ) {
this.searchManager = config.searchManager(writer);
this.queryCache = new LRUQueryCache(MAX_QUERIES_TO_CACHE, MAX_RAM_BYTES_TO_USE);
this.searchManagerRefreshService = Executors.newScheduledThreadPool(1, new NamedThreadFactory(
name + "-lucene-search-manager-refresher"));
this.searchManagerRefreshResult = this.searchManagerRefreshService.scheduleWithFixedDelay(this::refreshSearchManager,
0,
config.refreshTimeSeconds(),
TimeUnit.SECONDS);
}
protected void close() {
try {
searchManagerRefreshResult.cancel(false);
searchManagerRefreshService.shutdown();
searchManager.close();
} catch (IOException e) {
LOGGER.warn(e, LuceneIndexProviderI18n.warnErrorWhileClosingSearcher);
}
}
protected Filter.Results filter(IndexConstraints indexConstraints,
LuceneQueryFactory queryFactory,
long cardinalityEstimate) {
Query query = createQueryFromConstraints(indexConstraints.getConstraints(), queryFactory);
return new LuceneResults(query, queryFactory.scoreDocuments(), cardinalityEstimate);
}
protected long estimateCardinality( final List<Constraint> andedConstraints, final LuceneQueryFactory queryFactory ) throws IOException {
return search(searcher -> {
Query query = createQueryFromConstraints(andedConstraints, queryFactory);
return (long) searcher.count(query);
}, true);
}
protected Document loadDocumentById(final String id) throws IOException {
// this is a potentially costly operation
return search(searcher -> {
DocumentByIdCollector collector = new DocumentByIdCollector();
searcher.search(FieldUtil.idQuery(id), collector);
return collector.document();
}, true);
}
private Query createQueryFromConstraints( Collection<Constraint> andedConstraints, LuceneQueryFactory queryFactory ) {
if (andedConstraints.isEmpty()) {
// if there are no anded constraint but this index was called to filter results, simply return everything...
return new MatchAllDocsQuery();
} else if (andedConstraints.size() == 1) {
return queryFactory.createQuery(andedConstraints.iterator().next());
} else {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setDisableCoord(true);
for (Constraint constraint : andedConstraints) {
builder.add(queryFactory.createQuery(constraint), BooleanClause.Occur.MUST);
}
return builder.build();
}
}
protected void refreshSearchManager() {
try {
searchManager.maybeRefreshBlocking();
} catch (InterruptedIOException ie) {
Thread.currentThread().interrupt();
} catch (IOException e) {
LOGGER.warn(e, LuceneIndexProviderI18n.warnErrorWhileClosingSearcher);
}
}
protected <T> T search(Searchable<T> searchable, boolean refreshReader) {
if (refreshReader) {
refreshSearchManager();
}
IndexSearcher searcher = null;
try {
searcher = searchManager.acquire();
searcher.setQueryCache(queryCache);
return searchable.search(searcher);
} catch (IOException e) {
throw new LuceneIndexException(e);
} finally {
if (searcher != null) {
try {
searchManager.release(searcher);
} catch (IOException e) {
LOGGER.debug(e, "Cannot release Lucene searcher");
}
}
}
}
private class LuceneResults implements Filter.Results {
private final boolean scoreDocuments;
private final long size;
private Query query;
private Iterator<NodeKey> keysIterator;
private Iterator<Float> scoresIterator;
private int currentBatch;
protected LuceneResults( Query query, boolean scoreDocuments, long size ) {
this.scoreDocuments = scoreDocuments;
this.query = query;
this.currentBatch = 0;
this.size = size;
}
@Override
public Filter.ResultBatch getNextBatch(final int batchSize) {
int startPosition = currentBatch++ * batchSize;
int endPosition = (int) Math.min(size, startPosition + batchSize);
boolean hasNextBatch = endPosition != size;
int size = endPosition - startPosition;
return new Filter.ResultBatch() {
private int keysCount = 0;
private int scoresCount = 0;
@Override
public Iterable<NodeKey> keys() {
return () -> new Iterator<NodeKey>() {
@Override
public boolean hasNext() {
if (keysCount == size) {
return false;
}
if (keysIterator == null) {
runQuery();
}
return keysIterator.hasNext();
}
@Override
public NodeKey next() {
if (keysCount++ == size) {
throw new NoSuchElementException();
}
if (keysIterator == null) {
runQuery();
}
return keysIterator.next();
}
};
}
@Override
public Iterable<Float> scores() {
return () -> new Iterator<Float>() {
@Override
public boolean hasNext() {
if (scoresCount == size) {
return false;
}
if (scoresIterator == null) {
runQuery();
}
return scoresIterator.hasNext();
}
@Override
public Float next() {
if (scoresCount++ == size) {
throw new NoSuchElementException();
}
if (scoresIterator == null) {
runQuery();
}
return scoresIterator.next();
}
};
}
@Override
public boolean hasNext() {
return hasNextBatch;
}
@Override
public int size() {
return size;
}
private void runQuery() {
if (keysIterator == null && scoresIterator == null) {
Map<NodeKey, Float> results = search(searcher -> getSearchResults(searcher), true);
keysIterator = results.keySet().iterator();
scoresIterator = results.values().iterator();
}
}
};
}
private Map<NodeKey, Float> getSearchResults(IndexSearcher searcher) throws IOException {
IdsCollector collector = new IdsCollector(scoreDocuments, searcher.getIndexReader().maxDoc());
searcher.search(query, collector);
BitSet docIds = collector.documents();
Map<NodeKey, Float> results = new LinkedHashMap<>();
for (int i = docIds.nextSetBit(0); i >= 0; i = docIds.nextSetBit(i + 1)) {
try {
// this is a valid document which we have to load...
Document document = searcher.doc(i, ID_FIELD_SET);
String id = document.getBinaryValue(FieldUtil.ID).utf8ToString();
Float score = collector.scoreFor(i);
results.put(new NodeKey(id), score);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return results;
}
@Override
public void close() {
keysIterator = null;
scoresIterator = null;
query = null;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(query.toString());
sb.append("=").append("[").append(size).append( " keys]");
return sb.toString();
}
}
private static class IdsCollector extends SimpleCollector {
private final float[] scores;
private BitSet docHits;
private Scorer scorer;
private int docBase;
private Bits liveDocs;
protected IdsCollector(boolean scoring, int maxDoc) {
this.scores = scoring ? new float[maxDoc] : null;
this.docHits = new BitSet(maxDoc);
}
@Override
protected void doSetNextReader( LeafReaderContext context ) throws IOException {
this.docBase = context.docBase;
this.liveDocs = context.reader().getLiveDocs();
}
@Override
public void setScorer( Scorer scorer ) throws IOException {
if (isScoring()) {
this.scorer = scorer;
}
}
@Override
public void collect( int doc ) throws IOException {
if (liveDocs != null && !liveDocs.get(doc)) {
// 'doc' has been deleted, so ignore it
return;
}
int docId = doc + docBase;
if (isScoring()) {
scores[docId] = scorer.score();
}
docHits.set(docId);
}
@Override
public boolean needsScores() {
return isScoring();
}
protected BitSet documents() {
return docHits;
}
protected Float scoreFor(int docId) {
return isScoring() ? scores[docId] : DEFAULT_SCORE;
}
private boolean isScoring() {
return scores != null;
}
}
@FunctionalInterface
protected interface Searchable<T> {
T search(IndexSearcher searcher) throws IOException;
}
private static class DocumentByIdCollector extends SimpleCollector {
private LeafReader currentReader;
private Document document;
@Override
public void collect( int doc ) throws IOException {
if (document == null) {
document = currentReader.document(doc);
} else {
throw new CollectionTerminatedException();
}
}
@Override
protected void doSetNextReader( LeafReaderContext context ) throws IOException {
if (document != null) {
// we already found our document, so terminate
throw new CollectionTerminatedException();
}
currentReader = context.reader();
}
@Override
public boolean needsScores() {
return false;
}
protected Document document() {
return document;
}
}
}