package org.apache.lucene.facet.search;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.cache.CategoryListCache;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
* facet indexing params.
*
* @lucene.experimental
*/
public final class TotalFacetCountsCache {
/**
* Default size of in memory cache for computed total facet counts.
* Set to 2 for the case when an application reopened a reader and
* the original one is still in use (Otherwise there will be
* switching again and again between the two.)
*/
public static final int DEFAULT_CACHE_SIZE = 2;
private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
/**
* Get the single instance of this cache
*/
public static TotalFacetCountsCache getSingleton() {
return singleton;
}
/**
* In-memory cache of TFCs.
* <ul>
* <li>It's size is kept within limits through {@link #trimCache()}.
* <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
* <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
* </ul>
* @see #markRecentlyUsed(TFCKey)
*/
private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
/**
* A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
* @see #markRecentlyUsed(TFCKey)
*/
private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
private int maxCacheSize = DEFAULT_CACHE_SIZE;
/** private constructor for singleton pattern */
private TotalFacetCountsCache() {
}
/**
* Get the total facet counts for a reader/taxonomy pair and facet indexing parameters.
* If not in cache, computed here and added to the cache for later use.
* @param indexReader the documents index
* @param taxonomy the taxonomy index
* @param facetIndexingParams facet indexing parameters
* @param clCache category list cache for faster computation, can be null
* @return the total facet counts.
*/
public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
// create the key
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
// it is important that this call is not synchronized, so that available TFC
// would not wait for one that needs to be computed.
TotalFacetCounts tfc = cache.get(key);
if (tfc != null) {
markRecentlyUsed(key);
return tfc;
}
return computeAndCache(key, clCache);
}
/**
* Mark key as it as recently used.
* <p>
* <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
* <ol>
* <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
* But unfortunately, Java does not provide one.
* Instead, we combine two concurrent objects:
* <ul>
* <li>{@link ConcurrentHashMap} for the cached TFCs.
* <li>{@link ConcurrentLinkedQueue} for active keys
* </ul>
* <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
* <li>Checks for a cached item through getTotalCounts() are not synchronized.
* Therefore, the case that a needed TFC is in the cache is very fast:
* it does not wait for the computation of other TFCs.
* <li>computeAndCache() is synchronized, and, has a (double) check of the required
* TFC, to avoid computing the same TFC twice.
* <li>A race condition in this method (markRecentlyUsed) might result in two copies
* of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
* where an attempt to remove the same key twice is a no-op.
* </ol>
*/
private void markRecentlyUsed(TFCKey key) {
lruKeys.remove(key);
lruKeys.add(key);
}
private synchronized void trimCache() {
// loop until cache is of desired size.
while (cache.size()>maxCacheSize ) {
TFCKey key = lruKeys.poll();
if (key==null) { //defensive
// it is defensive since lruKeys presumably covers the cache keys
key = cache.keys().nextElement();
}
// remove this element. Note that an attempt to remove with the same key again is a no-op,
// which gracefully handles the possible race in markRecentlyUsed().
cache.remove(key);
}
}
/**
* compute TFC and cache it, after verifying it was not just added - for this
* matter this method is synchronized, which is not too bad, because there is
* lots of work done in the computations.
*/
private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
TotalFacetCounts tfc = cache.get(key);
if (tfc == null) {
tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
lruKeys.add(key);
cache.put(key,tfc);
trimCache();
}
return tfc;
}
/**
* Load {@link TotalFacetCounts} matching input parameters from the provided outputFile
* and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams.
* If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be
* replaced by the loaded one.
* @param inputFile file from which to read the data
* @param indexReader the documents index
* @param taxonomy the taxonomy index
* @param facetIndexingParams the facet indexing parameters
* @throws IOException on error
* @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
*/
public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
}
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
cache.put(key,tfc);
trimCache();
markRecentlyUsed(key);
}
/**
* Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile,
* making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}.
* If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are
* not in the cache, this call will first compute them (which will also add them to the cache).
* @param outputFile file to store in.
* @param indexReader the documents index
* @param taxonomy the taxonomy index
* @param facetIndexingParams the facet indexing parameters
* @param clCache category list cache for faster computation, can be null
* @throws IOException on error
* @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
* @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
*/
public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
File parentFile = outputFile.getParentFile();
if (
( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
(!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
) {
throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
}
TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
TotalFacetCounts.storeToFile(outputFile, tfc);
}
private static class TFCKey {
final IndexReader indexReader;
final TaxonomyReader taxonomy;
private final Iterable<CategoryListParams> clps;
private final int hashCode;
private final int nDels; // needed when a reader used for faceted search was just used for deletion.
final FacetIndexingParams facetIndexingParams;
public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) {
this.indexReader = indexReader;
this.taxonomy = taxonomy;
this.facetIndexingParams = facetIndexingParams;
this.clps = facetIndexingParams.getAllCategoryListParams();
this.nDels = indexReader.numDeletedDocs();
hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object other) {
TFCKey o = (TFCKey) other;
if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
return false;
}
Iterator<CategoryListParams> it1 = clps.iterator();
Iterator<CategoryListParams> it2 = o.clps.iterator();
while (it1.hasNext() && it2.hasNext()) {
if (!it1.next().equals(it2.next())) {
return false;
}
}
return it1.hasNext() == it2.hasNext();
}
}
/**
* Clear the cache.
*/
public synchronized void clear() {
cache.clear();
lruKeys.clear();
}
/**
* @return the maximal cache size
*/
public int getCacheSize() {
return maxCacheSize;
}
/**
* Set the number of TotalFacetCounts arrays that will remain in memory cache.
* <p>
* If new size is smaller than current size, the cache is appropriately trimmed.
* <p>
* Minimal size is 1, so passing zero or negative size would result in size of 1.
* @param size new size to set
*/
public void setCacheSize(int size) {
if (size < 1) size = 1;
int origSize = maxCacheSize;
maxCacheSize = size;
if (maxCacheSize < origSize) { // need to trim only if the cache was reduced
trimCache();
}
}
}