package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Collections; import java.util.Map; import java.util.WeakHashMap; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; // javadocs import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.Bits; /** * Wraps another {@link Filter}'s result and caches it. The purpose is to allow * filters to simply filter, and then wrap with this class * to add caching. */ public class CachingWrapperFilter extends Filter { // TODO: make this filter aware of ReaderContext. a cached filter could // specify the actual readers key or something similar to indicate on which // level of the readers hierarchy it should be cached. private final Filter filter; private final Map<Object,DocIdSet> cache = Collections.synchronizedMap(new WeakHashMap<Object,DocIdSet>()); private final boolean recacheDeletes; /** Wraps another filter's result and caches it. * Deletions are not cached and AND'd in on the fly, see * {@link #CachingWrapperFilter(Filter,boolean)} for an explanation. * This constructor is recommended for often changing indexes. * @param filter Filter to cache results of * @see #CachingWrapperFilter(Filter,boolean) */ public CachingWrapperFilter(Filter filter) { this(filter, false); } /** Wraps another filter's result and caches it. If * {@code recacheDeletes} is {@code true}, then new deletes (for example * after {@link DirectoryReader#openIfChanged}) will cause the filter * {@link DocIdSet} to be recached. * * <p>If your index changes seldom, it is recommended to use {@code recacheDeletes=true}, * as recaching will only occur when the index is reopened. * For near-real-time indexes or indexes that are often * reopened with (e.g., {@link DirectoryReader#openIfChanged} is used), you should * pass {@code recacheDeletes=false}. This will cache the filter results omitting * deletions and will AND them in while scoring. * @param filter Filter to cache results of * @param recacheDeletes if deletions on the underlying index should recache */ public CachingWrapperFilter(Filter filter, boolean recacheDeletes) { this.filter = filter; this.recacheDeletes = recacheDeletes; } /** Provide the DocIdSet to be cached, using the DocIdSet provided * by the wrapped Filter. * <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} * returns <code>true</code>, else it copies the {@link DocIdSetIterator} into * a {@link FixedBitSet}. */ protected DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader) throws IOException { if (docIdSet == null) { // this is better than returning null, as the nonnull result can be cached return DocIdSet.EMPTY_DOCIDSET; } else if (docIdSet.isCacheable()) { return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. if (it == null) { return DocIdSet.EMPTY_DOCIDSET; } else { final FixedBitSet bits = new FixedBitSet(reader.maxDoc()); bits.or(it); return bits; } } } // for testing int hitCount, missCount; @Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { final AtomicReader reader = context.reader(); // Only cache if incoming acceptDocs is == live docs; // if Lucene passes in more interesting acceptDocs in // the future (@UweSays: it already does when you chain FilteredQuery) we don't want to over-cache: final Bits liveDocs = reader.getLiveDocs(); final boolean doCacheAcceptDocs = (recacheDeletes && acceptDocs == liveDocs); final Object key; final Bits cacheAcceptDocs; if (doCacheAcceptDocs) { assert acceptDocs == liveDocs; key = reader.getCombinedCoreAndDeletesKey(); cacheAcceptDocs = acceptDocs; } else { key = reader.getCoreCacheKey(); cacheAcceptDocs = null; } DocIdSet docIdSet = cache.get(key); if (docIdSet != null) { hitCount++; } else { missCount++; docIdSet = docIdSetToCache(filter.getDocIdSet(context, cacheAcceptDocs), reader); cache.put(key, docIdSet); } if (doCacheAcceptDocs) { return docIdSet; } else { return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs); } } @Override public String toString() { return "CachingWrapperFilter("+filter+",recacheDeletes=" + recacheDeletes + ")"; } @Override public boolean equals(Object o) { if (!(o instanceof CachingWrapperFilter)) return false; final CachingWrapperFilter other = (CachingWrapperFilter) o; return this.filter.equals(other.filter) && this.recacheDeletes == other.recacheDeletes; } @Override public int hashCode() { return (filter.hashCode() ^ 0x1117BF25) + (recacheDeletes ? 0 : 1); } }