package org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.Serializable; import java.io.IOException; import java.util.Map; import java.util.WeakHashMap; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSetDISI; import org.apache.lucene.util.Bits; /** * Wraps another filter's result and caches it. The purpose is to allow * filters to simply filter, and then wrap with this class * to add caching. * * <p><b>NOTE</b>: if you wrap this filter as a query (eg, * using ConstantScoreQuery), you'll likely want to enforce * deletions (using either {@link DeletesMode#RECACHE} or * {@link DeletesMode#DYNAMIC}). */ public class CachingWrapperFilter extends Filter { Filter filter; /** * Expert: Specifies how new deletions against a reopened * reader should be handled. * * <p>The default is IGNORE, which means the cache entry * will be re-used for a given segment, even when that * segment has been reopened due to changes in deletions. * This is a big performance gain, especially with * near-real-timer readers, since you don't hit a cache * miss on every reopened reader for prior segments.</p> * * <p>However, in some cases this can cause invalid query * results, allowing deleted documents to be returned. * This only happens if the main query does not rule out * deleted documents on its own, such as a toplevel * ConstantScoreQuery. To fix this, use RECACHE to * re-create the cached filter (at a higher per-reopen * cost, but at faster subsequent search performance), or * use DYNAMIC to dynamically intersect deleted docs (fast * reopen time but some hit to search performance).</p> */ public static enum DeletesMode {IGNORE, RECACHE, DYNAMIC}; protected final FilterCache<DocIdSet> cache; static abstract class FilterCache<T> implements Serializable { /** * A transient Filter cache (package private because of test) */ // NOTE: not final so that we can dynamically re-init // after de-serialize transient Map<Object,T> cache; private final DeletesMode deletesMode; public FilterCache(DeletesMode deletesMode) { this.deletesMode = deletesMode; } public synchronized T get(IndexReader reader, Object coreKey, Object delCoreKey) throws IOException { T value; if (cache == null) { cache = new WeakHashMap<Object,T>(); } if (deletesMode == DeletesMode.IGNORE) { // key on core value = cache.get(coreKey); } else if (deletesMode == DeletesMode.RECACHE) { // key on deletes, if any, else core value = cache.get(delCoreKey); } else { assert deletesMode == DeletesMode.DYNAMIC; // first try for exact match value = cache.get(delCoreKey); if (value == null) { // now for core match, but dynamically AND NOT // deletions value = cache.get(coreKey); if (value != null) { final Bits delDocs = MultiFields.getDeletedDocs(reader); if (delDocs != null) { value = mergeDeletes(delDocs, value); } } } } return value; } protected abstract T mergeDeletes(Bits delDocs, T value); public synchronized void put(Object coreKey, Object delCoreKey, T value) { if (deletesMode == DeletesMode.IGNORE) { cache.put(coreKey, value); } else if (deletesMode == DeletesMode.RECACHE) { cache.put(delCoreKey, value); } else { cache.put(coreKey, value); cache.put(delCoreKey, value); } } } /** * New deletes are ignored by default, which gives higher * cache hit rate on reopened readers. Most of the time * this is safe, because the filter will be AND'd with a * Query that fully enforces deletions. If instead you * need this filter to always enforce deletions, pass * either {@link DeletesMode#RECACHE} or {@link * DeletesMode#DYNAMIC}. * @param filter Filter to cache results of */ public CachingWrapperFilter(Filter filter) { this(filter, DeletesMode.IGNORE); } /** * Expert: by default, the cached filter will be shared * across reopened segments that only had changes to their * deletions. * * @param filter Filter to cache results of * @param deletesMode See {@link DeletesMode} */ public CachingWrapperFilter(Filter filter, DeletesMode deletesMode) { this.filter = filter; cache = new FilterCache<DocIdSet>(deletesMode) { @Override public DocIdSet mergeDeletes(final Bits delDocs, final DocIdSet docIdSet) { return new FilteredDocIdSet(docIdSet) { @Override protected boolean match(int docID) { return !delDocs.get(docID); } }; } }; } /** Provide the DocIdSet to be cached, using the DocIdSet provided * by the wrapped Filter. * <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} * returns <code>true</code>, else it copies the {@link DocIdSetIterator} into * an {@link OpenBitSetDISI}. */ protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException { if (docIdSet == null) { // this is better than returning null, as the nonnull result can be cached return DocIdSet.EMPTY_DOCIDSET; } else if (docIdSet.isCacheable()) { return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc()); } } // for testing int hitCount, missCount; @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final Object coreKey = reader.getCoreCacheKey(); final Object delCoreKey = reader.hasDeletions() ? MultiFields.getDeletedDocs(reader) : coreKey; DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey); if (docIdSet != null) { hitCount++; return docIdSet; } missCount++; // cache miss docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader); if (docIdSet != null) { cache.put(coreKey, delCoreKey, docIdSet); } return docIdSet; } @Override public String toString() { return "CachingWrapperFilter("+filter+")"; } @Override public boolean equals(Object o) { if (!(o instanceof CachingWrapperFilter)) return false; return this.filter.equals(((CachingWrapperFilter)o).filter); } @Override public int hashCode() { return filter.hashCode() ^ 0x1117BF25; } }