/** * Copyright 2008 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.sf.katta.lib.lucene; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import net.sf.katta.node.IContentServer; import net.sf.katta.util.ClassUtil; import net.sf.katta.util.NodeConfiguration; import net.sf.katta.util.WritableType; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Text; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.MapFieldSelector; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.Term; import org.apache.lucene.search.CachingWrapperFilter; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TimeLimitingCollector; import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Counter; import org.apache.lucene.util.PriorityQueue; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; /** * The back end server which searches a set of Lucene indices. Each shard is a * Lucene index directory. * <p> * Normal usage is to first call getDocFreqs() to get the global term * frequencies, then pass that back in to search(). This way you get uniform * scoring across all the nodes / instances of LuceneServer. */ public class LuceneServer implements IContentServer, ILuceneServer { private final static Logger LOG = Logger.getLogger(LuceneServer.class); public final static String CONF_KEY_SEARCHER_FACTORY_CLASS = "lucene.searcher.factory-class"; public final static String CONF_KEY_COLLECTOR_TIMOUT_PERCENTAGE = "lucene.collector.timeout-percentage"; public final static String CONF_KEY_COLLECTOR_TRACK_DOC_SCORES_ON_FIELD_SORT = "lucene.collector.track-doc-scores-on-field-sort"; public final static String CONF_KEY_SEARCHER_THREADPOOL_CORESIZE = "lucene.searcher.threadpool.core-size"; public final static String CONF_KEY_SEARCHER_THREADPOOL_MAXSIZE = "lucene.searcher.threadpool.max-size"; public final static String CONF_KEY_FILTER_CACHE_ENABLED = "lucene.filter.cache.enabled"; private static final int INDEX_HANDLE_CLOSE_SLEEP_TIME = 500; protected final Map<String, SearcherHandle> _searcherHandlesByShard = new ConcurrentHashMap<String, SearcherHandle>(); protected Cache<Filter, CachingWrapperFilter> _filterCache; protected ExecutorService _threadPool; private TimeLimitingCollector.TimerThread _searchTimerThread; private Counter _searchTimerCounter; protected String _nodeName; private float _timeoutPercentage = 0.75f; private boolean _trackDocScoresOnSort; private ISeacherFactory _seacherFactory; public LuceneServer() { // default way of initializing an IContentServer } /** * Constructor for testing purpose, {@link #init(String, NodeConfiguration)} * need not to be called. * * @param name * @param seacherFactory * @param timeoutPercentage */ public LuceneServer(String name, ISeacherFactory seacherFactory, float timeoutPercentage) { Properties properties = new Properties(); init(name, new NodeConfiguration(properties)); _seacherFactory = seacherFactory; _timeoutPercentage = timeoutPercentage; } @Override public long getProtocolVersion(final String protocol, final long clientVersion) throws IOException { return 0L; } @Override public void init(String nodeName, NodeConfiguration nodeConfiguration) { _nodeName = nodeName; _seacherFactory = (ISeacherFactory) ClassUtil.newInstance(nodeConfiguration.getClass( CONF_KEY_SEARCHER_FACTORY_CLASS, DefaultSearcherFactory.class)); _timeoutPercentage = nodeConfiguration.getFloat(CONF_KEY_COLLECTOR_TIMOUT_PERCENTAGE, _timeoutPercentage); _trackDocScoresOnSort = nodeConfiguration.getBoolean(CONF_KEY_COLLECTOR_TRACK_DOC_SCORES_ON_FIELD_SORT, false); if (_timeoutPercentage < 0 || _timeoutPercentage > 1) { throw new IllegalArgumentException("illegal value '" + _timeoutPercentage + "' for " + CONF_KEY_COLLECTOR_TIMOUT_PERCENTAGE + ". Only values between 0 and 1 are allowed."); } int coreSize = nodeConfiguration.getInt(CONF_KEY_SEARCHER_THREADPOOL_CORESIZE, 25); int maxSize = nodeConfiguration.getInt(CONF_KEY_SEARCHER_THREADPOOL_MAXSIZE, 100); boolean filterCacheEnabled = nodeConfiguration.getBoolean(CONF_KEY_FILTER_CACHE_ENABLED, true); _threadPool = new ThreadPoolExecutor(coreSize, maxSize, 100L, TimeUnit.MINUTES, new LinkedBlockingQueue<Runnable>()); if (filterCacheEnabled) { _filterCache = CacheBuilder.newBuilder().expireAfterAccess(10, TimeUnit.MINUTES).maximumSize(1000).build(); } _searchTimerCounter = Counter.newCounter(true); _searchTimerThread = new TimeLimitingCollector.TimerThread(_searchTimerCounter); _searchTimerThread.start(); } public String getNodeName() { return _nodeName; } public float getTimeoutPercentage() { return _timeoutPercentage; } public long getCollectorTiemout(long clientTimeout) { return (long) (_timeoutPercentage * clientTimeout); } /** * Adds an shard index search for given name to the list of shards * MultiSearcher search in. * * @param shardName * @param shardDir * @throws IOException */ @Override public void addShard(final String shardName, final File shardDir) throws IOException { LOG.info("LuceneServer " + _nodeName + " got shard " + shardName); try { IndexSearcher indexSearcher = _seacherFactory.createSearcher(shardName, shardDir); _searcherHandlesByShard.put(shardName, new SearcherHandle(indexSearcher)); } catch (CorruptIndexException e) { LOG.error("Error building index for shard " + shardName, e); throw e; } } /** * Removes a search by given shardName from the list of searchers. */ @Override public void removeShard(final String shardName) { LOG.info("LuceneServer " + _nodeName + " removing shard " + shardName); SearcherHandle handle = _searcherHandlesByShard.remove(shardName); if (handle == null) { return; // nothing to do. } try { handle.closeSearcher(); } catch (Exception e) { LOG.error("LuceneServer " + _nodeName + " error removing shard " + shardName, e); } } @Override public Collection<String> getShards() { return Collections.unmodifiableCollection(_searcherHandlesByShard.keySet()); } /** * Returns the number of documents a shard has. * * @param shardName * @return the number of documents in the shard. */ protected int shardSize(String shardName) { final SearcherHandle handle = getSearcherHandleByShard(shardName); IndexSearcher searcher = handle.getSearcher(); try { if (searcher != null) { int size = searcher.getIndexReader().numDocs(); if (LOG.isDebugEnabled()) { LOG.debug("Shard '" + shardName + "' has " + size + " docs."); } return size; } throw new IllegalArgumentException("Shard '" + shardName + "' unknown"); } finally { handle.finishSearcher(); } } /** * Returns data about a shard. Currently the only standard key is * SHARD_SIZE_KEY. This value will be reported by the listIndexes command. The * units depend on the type of server. It is OK to return an empty map or * null. * * @param shardName * The name of the shard to measure. This was the name provided in * addShard(). * @return a map of key/value pairs which describe the shard. * @throws Exception */ @Override public Map<String, String> getShardMetaData(String shardName) throws Exception { Map<String, String> metaData = new HashMap<String, String>(); metaData.put(SHARD_SIZE_KEY, Integer.toString(shardSize(shardName))); return metaData; } /** * Close all Lucene indices. No further calls will be made after this one. */ @Override public void shutdown() throws IOException { for (final SearcherHandle handle : _searcherHandlesByShard.values()) { handle.closeSearcher(); } _searcherHandlesByShard.clear(); _searchTimerThread.stopTimer(); } /** * Returns the <code>IndexHandle</code> of the given shardName. * * @param shardName * the name of the shard * @return the <code>IndexHandle</code> of the given shardName */ protected SearcherHandle getSearcherHandleByShard(String shardName) { SearcherHandle handle = _searcherHandlesByShard.get(shardName); if (handle == null) { throw new IllegalStateException("no index-server for shard '" + shardName + "' found - probably undeployed"); } return handle; } @Override public HitsMapWritable search(QueryWritable query, DocumentFrequencyWritable freqs, String[] shardNames, long timeout) throws IOException { return search(query, freqs, shardNames, timeout, Integer.MAX_VALUE); } @Override public HitsMapWritable search(final QueryWritable query, final DocumentFrequencyWritable freqs, final String[] shards, final long timeout, final int count) throws IOException { return search(query, freqs, shards, timeout, count, null, null); } @Override public HitsMapWritable search(QueryWritable query, DocumentFrequencyWritable freqs, String[] shards, final long timeout, int count, SortWritable sortWritable) throws IOException { return search(query, freqs, shards, timeout, count, sortWritable, null); } @Override public HitsMapWritable search(QueryWritable query, DocumentFrequencyWritable freqs, String[] shards, final long timeout, int count, FilterWritable filterWritable) throws IOException { return search(query, freqs, shards, timeout, count, null, filterWritable); } @Override public HitsMapWritable search(QueryWritable query, DocumentFrequencyWritable freqs, String[] shards, final long timeout, int count, SortWritable sortWritable, FilterWritable filterWritable) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("You are searching with the query: '" + query.getQuery() + "'"); } Query luceneQuery = query.getQuery(); if (LOG.isDebugEnabled()) { LOG.debug("Lucene query: " + luceneQuery.toString()); } long completeSearchTime = 0; final HitsMapWritable result = new net.sf.katta.lib.lucene.HitsMapWritable(getNodeName()); long start = 0; if (LOG.isDebugEnabled()) { start = System.currentTimeMillis(); } Sort sort = null; if (sortWritable != null) { sort = sortWritable.getSort(); } Filter filter = null; if (filterWritable != null) { filter = filterWritable.getFilter(); } if (_filterCache != null && filter != null) { CachingWrapperFilter cachedFilter = _filterCache.getIfPresent(filter); if (cachedFilter == null) { cachedFilter = new CachingWrapperFilter(filter); _filterCache.put(filter, cachedFilter); } filter = cachedFilter; } search(luceneQuery, freqs, shards, result, count, sort, timeout, filter); if (LOG.isDebugEnabled()) { final long end = System.currentTimeMillis(); LOG.debug("Search took " + (end - start) / 1000.0 + "sec."); completeSearchTime += (end - start); } if (LOG.isDebugEnabled()) { LOG.debug("Complete search took " + completeSearchTime / 1000.0 + "sec."); final DataOutputBuffer buffer = new DataOutputBuffer(); result.write(buffer); LOG.debug("Result size to transfer: " + buffer.getLength()); } return result; } @Override public DocumentFrequencyWritable getDocFreqs(final QueryWritable input, final String[] shards) throws IOException { Query luceneQuery = input.getQuery(); final Query rewrittenQuery = rewrite(luceneQuery, shards); final DocumentFrequencyWritable docFreqs = new DocumentFrequencyWritable(); final HashSet<Term> termSet = new HashSet<Term>(); rewrittenQuery.extractTerms(termSet); for (final String shard : shards) { final java.util.Iterator<Term> termIterator = termSet.iterator(); SearcherHandle handle = getSearcherHandleByShard(shard); IndexSearcher searcher = handle.getSearcher(); if (searcher != null) { try { while (termIterator.hasNext()) { final Term term = termIterator.next(); final int docFreq = searcher.docFreq(term); docFreqs.put(term.field(), term.text(), docFreq); } } finally { handle.finishSearcher(); } } docFreqs.addNumDocs(shardSize(shard)); } return docFreqs; } @Override public MapWritable getDetails(final String[] shards, final int docId) throws IOException { return getDetails(shards, docId, null); } @Override public MapWritable getDetails(final String[] shards, final int docId, final String[] fieldNames) throws IOException { final MapWritable result = new MapWritable(); final Document doc = doc(shards[0], docId, fieldNames); final List<Fieldable> fields = doc.getFields(); for (final Fieldable field : fields) { final String name = field.name(); if (field.isBinary()) { final byte[] binaryValue = field.getBinaryValue(); result.put(new Text(name), new BytesWritable(binaryValue)); } else { final String stringValue = field.stringValue(); result.put(new Text(name), new Text(stringValue)); } } return result; } @Override public int getResultCount(final QueryWritable query, final String[] shards, long timeout) throws IOException { final DocumentFrequencyWritable docFreqs = getDocFreqs(query, shards); return search(query, docFreqs, shards, timeout, 1).getTotalHits(); } @Override public int getResultCount(final QueryWritable query, FilterWritable filter, final String[] shards, long timeout) throws IOException { final DocumentFrequencyWritable docFreqs = getDocFreqs(query, shards); return search(query, docFreqs, shards, timeout, 1, null, filter).getTotalHits(); } /** * Search in the given shards and return max hits for given query * * @param query * @param freqs * @param shards * @param result * @param max * @throws IOException */ protected final void search(final Query query, final DocumentFrequencyWritable freqs, final String[] shards, final HitsMapWritable result, final int max, Sort sort, long timeout, Filter filter) throws IOException { timeout = getCollectorTiemout(timeout); final Query rewrittenQuery = rewrite(query, shards); final int numDocs = freqs.getNumDocsAsInteger(); final Weight weight = rewrittenQuery.weight(new CachedDfSource(freqs.getAll(), numDocs, new DefaultSimilarity())); int totalHits = 0; final int shardsCount = shards.length; // Run the search in parallel on the shards with a thread pool. CompletionService<SearchResult> csSearch = new ExecutorCompletionService<SearchResult>(_threadPool); for (int i = 0; i < shardsCount; i++) { SearchCall call = new SearchCall(shards[i], weight, max, sort, timeout, i, filter); csSearch.submit(call); } final ScoreDoc[][] scoreDocs = new ScoreDoc[shardsCount][]; ScoreDoc scoreDocExample = null; for (int i = 0; i < shardsCount; i++) { try { final SearchResult searchResult = csSearch.take().get(); final int callIndex = searchResult.getSearchCallIndex(); totalHits += searchResult._totalHits; scoreDocs[callIndex] = searchResult._scoreDocs; if (scoreDocExample == null && scoreDocs[callIndex].length > 0) { scoreDocExample = scoreDocs[callIndex][0]; } } catch (InterruptedException e) { throw new IOException("Multithread shard search interrupted:", e); } catch (ExecutionException e) { throw new IOException("Multithread shard search could not be executed:", e); } } result.addTotalHits(totalHits); final Iterable<Hit> finalHitList; // Limit the request to the number requested or the total number of // documents, whichever is smaller. int limit = Math.min(numDocs, max); if (sort == null || totalHits == 0) { final KattaHitQueue hq = new KattaHitQueue(limit); int pos = 0; BitSet done = new BitSet(shardsCount); while (done.cardinality() != shardsCount) { ScoreDoc scoreDoc = null; for (int i = 0; i < shardsCount; i++) { // only process this shard if it is not yet done. if (!done.get(i)) { final ScoreDoc[] docs = scoreDocs[i]; if (pos < docs.length) { scoreDoc = docs[pos]; final Hit hit = new Hit(shards[i], getNodeName(), scoreDoc.score, scoreDoc.doc); if (!hq.insert(hit)) { // no doc left that has a higher score than the lowest score in // the queue done.set(i, true); } } else { // no docs left in this shard done.set(i, true); } } } // we always wait until we got all hits from this position in all // shards. pos++; if (scoreDoc == null) { // we do not have any more data break; } } finalHitList = hq; } else { WritableType[] sortFieldsTypes = null; FieldDoc fieldDoc = (FieldDoc) scoreDocExample; sortFieldsTypes = WritableType.detectWritableTypes(fieldDoc.fields); result.setSortFieldTypes(sortFieldsTypes); finalHitList = mergeFieldSort(new FieldSortComparator(sort.getSort(), sortFieldsTypes), limit, scoreDocs, shards, getNodeName()); } for (Hit hit : finalHitList) { if (hit != null) { result.addHit(hit); } } } /** * Merges the already sorted sub-lists to one big sorted list. */ private final static List<Hit> mergeFieldSort(FieldSortComparator comparator, int count, ScoreDoc[][] sortedFieldDocs, String[] shards, String nodeName) { int[] arrayPositions = new int[sortedFieldDocs.length]; final List<Hit> sortedResult = new ArrayList<Hit>(count); BitSet listDone = new BitSet(sortedFieldDocs.length); for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) { if (sortedFieldDocs[subListIndex].length == 0) { listDone.set(subListIndex, true); } } do { int fieldDocArrayWithSmallestFieldDoc = -1; FieldDoc smallestFieldDoc = null; for (int subListIndex = 0; subListIndex < arrayPositions.length; subListIndex++) { if (!listDone.get(subListIndex)) { FieldDoc hit = (FieldDoc) sortedFieldDocs[subListIndex][arrayPositions[subListIndex]]; if (smallestFieldDoc == null || comparator.compare(hit.fields, smallestFieldDoc.fields) < 0) { smallestFieldDoc = hit; fieldDocArrayWithSmallestFieldDoc = subListIndex; } } } ScoreDoc[] smallestElementList = sortedFieldDocs[fieldDocArrayWithSmallestFieldDoc]; FieldDoc fieldDoc = (FieldDoc) smallestElementList[arrayPositions[fieldDocArrayWithSmallestFieldDoc]]; arrayPositions[fieldDocArrayWithSmallestFieldDoc]++; final Hit hit = new Hit(shards[fieldDocArrayWithSmallestFieldDoc], nodeName, fieldDoc.score, fieldDoc.doc); hit.setSortFields(WritableType.convertComparable(comparator.getFieldTypes(), fieldDoc.fields)); sortedResult.add(hit); if (arrayPositions[fieldDocArrayWithSmallestFieldDoc] >= smallestElementList.length) { listDone.set(fieldDocArrayWithSmallestFieldDoc, true); } } while (sortedResult.size() < count && listDone.cardinality() < arrayPositions.length); return sortedResult; } /** * Returns a specified lucene document from a given shard where all or only * the given fields are loaded from the index. * * @param shardName * @param docId * @param fieldNames * @return * @throws IOException */ protected Document doc(final String shardName, final int docId, final String[] fieldNames) throws IOException { final SearcherHandle handle = getSearcherHandleByShard(shardName); IndexSearcher searcher = handle.getSearcher(); try { if (searcher != null) { if (fieldNames == null) { return searcher.doc(docId); } else { return searcher.doc(docId, new MapFieldSelector(fieldNames)); } } return null; } finally { handle.finishSearcher(); } } /** * Rewrites a query for the given shards * * @param original * @param shardNames * @return * @throws IOException */ protected Query rewrite(final Query original, final String[] shardNames) throws IOException { final Query[] queries = new Query[shardNames.length]; for (int i = 0; i < shardNames.length; i++) { final String shard = shardNames[i]; final SearcherHandle handle = getSearcherHandleByShard(shard); IndexSearcher searcher = handle.getSearcher(); try { if (searcher == null) { throw new IllegalStateException("no index-server for shard '" + shard + "' found - probably undeployed"); } else { queries[i] = searcher.rewrite(original); } } finally { handle.finishSearcher(); } } if (queries.length > 0 && queries[0] != null) { return queries[0].combine(queries); } else { LOG.error("No queries available for shards: " + Arrays.toString(shardNames)); } return original; } /** * Implements a single thread of a search. Each shard has a separate * SearchCall and they are run more or less in parallel. */ protected class SearchCall implements Callable<SearchResult> { protected final String _shardName; protected final Weight _weight; protected final int _limit; protected final Sort _sort; protected final long _timeout; protected final int _callIndex; protected final Filter _filter; public SearchCall(String shardName, Weight weight, int limit, Sort sort, long timeout, int callIndex, Filter filter) { _shardName = shardName; _weight = weight; _limit = limit; _sort = sort; _timeout = timeout; _callIndex = callIndex; _filter = filter; } @Override @SuppressWarnings({ "rawtypes" }) public SearchResult call() throws Exception { SearcherHandle handle = getSearcherHandleByShard(_shardName); IndexSearcher searcher = handle.getSearcher(); try { if (searcher == null) { LOG.warn(String.format("Search attempt for shard %s skipped because shard was closed; empty result returned", _shardName)); // return empty result... return new SearchResult(0, new ScoreDoc[0], _callIndex); } int nDocs = Math.min(_limit, searcher.maxDoc()); // empty index (or result limit <= 0); return empty results (as the // collectors will fail if nDocs <= 0) if (nDocs <= 0) { return new SearchResult(0, new ScoreDoc[0], _callIndex); } TopDocsCollector resultCollector; if (_sort != null) { boolean fillFields = true;// see IndexSearcher#search(...) boolean fieldSortDoMaxScore = false; resultCollector = TopFieldCollector.create(_sort, nDocs, fillFields, _trackDocScoresOnSort, fieldSortDoMaxScore, !_weight.scoresDocsOutOfOrder()); } else { resultCollector = TopScoreDocCollector.create(nDocs, !_weight.scoresDocsOutOfOrder()); } try { searcher.search(_weight, _filter, wrapInTimeoutCollector(resultCollector)); } catch (TimeExceededException e) { LOG.warn("encountered exceeded timout for query '" + _weight.getQuery() + " on shard '" + _shardName + "' with timeout set to '" + _timeout + "'"); } TopDocs docs = resultCollector.topDocs(); return new SearchResult(docs.totalHits, docs.scoreDocs, _callIndex); } finally { handle.finishSearcher(); } } @SuppressWarnings({ "rawtypes" }) private Collector wrapInTimeoutCollector(TopDocsCollector resultCollector) { if (_timeout <= 0) { return resultCollector; } TimeLimitingCollector timeoutCollector = new TimeLimitingCollector(resultCollector, _searchTimerCounter, _timeout); timeoutCollector.setBaseline(); return timeoutCollector; } } protected static class SearchResult { protected final int _totalHits; protected final ScoreDoc[] _scoreDocs; protected int _searchCallIndex; public SearchResult(int totalHits, ScoreDoc[] scoreDocs, int searchCallIndex) { _totalHits = totalHits; _scoreDocs = scoreDocs; _searchCallIndex = searchCallIndex; } public int getTotalHits() { return _totalHits; } public ScoreDoc[] getScoreDocs() { return _scoreDocs; } public int getSearchCallIndex() { return _searchCallIndex; } } // Cached document frequency source from apache lucene // MultiSearcher. /** * Document Frequency cache acting as a Dummy-Searcher. This class is not a * fully-fledged Searcher, but only supports the methods necessary to * initialize Weights. */ protected static class CachedDfSource extends Searcher { private final Map<TermWritable, Integer> dfMap; // Map from Terms to // corresponding doc freqs. private final int maxDoc; // Document count. public CachedDfSource(final Map<TermWritable, Integer> dfMap, final int maxDoc, final Similarity similarity) { this.dfMap = dfMap; this.maxDoc = maxDoc; setSimilarity(similarity); } @Override public int docFreq(final Term term) { int df; try { df = dfMap.get(new TermWritable(term.field(), term.text())); } catch (final NullPointerException e) { throw new IllegalArgumentException("df for term " + term.text() + " not available in df-map:" + dfMap, e); } return df; } @Override public int[] docFreqs(final Term[] terms) { final int[] result = new int[terms.length]; for (int i = 0; i < terms.length; i++) { result[i] = docFreq(terms[i]); } return result; } @Override public int maxDoc() { return maxDoc; } @Override public Query rewrite(final Query query) { // this is a bit of a hack. We know that a query which // creates a Weight based on this Dummy-Searcher is // always already rewritten (see preparedWeight()). // Therefore we just return the unmodified query here return query; } @Override public void close() { throw new UnsupportedOperationException(); } @Override public Document doc(final int i) { throw new UnsupportedOperationException(); } @Override public Document doc(final int i, final FieldSelector fieldSelector) { throw new UnsupportedOperationException(); } @Override public Explanation explain(final Weight weight, final int doc) { throw new UnsupportedOperationException(); } @Override public void search(final Weight weight, final Filter filter, final Collector hitCollector) { throw new UnsupportedOperationException(); } @Override public TopDocs search(final Weight weight, final Filter filter, final int n) { throw new UnsupportedOperationException(); } @Override public TopFieldDocs search(final Weight weight, final Filter filter, final int n, final Sort sort) { throw new UnsupportedOperationException(); } } protected static class KattaHitQueue extends PriorityQueue<Hit> implements Iterable<Hit> { private final int _maxSize; KattaHitQueue(final int maxSize) { _maxSize = maxSize; initialize(maxSize); } public boolean insert(Hit hit) { if (size() < _maxSize) { add(hit); return true; } if (lessThan(top(), hit)) { insertWithOverflow(hit); return true; } return false; } @Override protected final boolean lessThan(final Hit hitA, final Hit hitB) { return hitA.compareTo(hitB) > 0; } @Override public Iterator<Hit> iterator() { return new Iterator<Hit>() { @Override public boolean hasNext() { return KattaHitQueue.this.size() > 0; } @Override public Hit next() { return KattaHitQueue.this.pop(); } @Override public void remove() { throw new UnsupportedOperationException("Can't remove using this iterator"); } }; } } /** * Holds an IndexSearcher and maintains the current number of threads using * it. For every call to getSearcher(), finishSearcher() must be called * exactly one time. finally blocks are a good idea. */ protected static class SearcherHandle { private volatile IndexSearcher _indexSearcher; private final Object _lock = new Object(); private final AtomicInteger _refCount = new AtomicInteger(0); public SearcherHandle(IndexSearcher indexSearcher) { _indexSearcher = indexSearcher; } /** * Returns the IndexSearcher and increments the usage count. * finishSearcher() must be called once after each call to getSearcher(). * * @return the searcher */ public IndexSearcher getSearcher() { synchronized (_lock) { if (_refCount.get() < 0) { return null; } _refCount.incrementAndGet(); } return _indexSearcher; } /** * Decrements the searcher usage count. */ public void finishSearcher() { synchronized (_lock) { _refCount.decrementAndGet(); } } /** * Spins until the searcher is no longer in use, then closes it. * * @throws IOException * on IndexSearcher close failure */ public void closeSearcher() throws IOException { while (true) { synchronized (_lock) { if (_refCount.get() == 0) { IndexSearcher indexSearcher = _indexSearcher; _indexSearcher = null; _refCount.set(-1); indexSearcher.close(); return; } } try { Thread.sleep(INDEX_HANDLE_CLOSE_SLEEP_TIME); } catch (InterruptedException e) { } } } } }