/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.exoplatform.services.jcr.impl.core.query.lucene; import org.apache.commons.collections.map.LRUMap; import org.apache.commons.collections.map.LinkedMap; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.Iterator; import java.util.Map; /** * <code>TermDocsCache</code> implements a cache for frequently read * {@link TermDocs}. */ public class TermDocsCache { /** * The logger instance for this class. */ private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.TermDocsCache"); /** * The default cache size. */ private static final int CACHE_SIZE = 10; /** * The underlying index reader. */ private final IndexReader reader; /** * Only TermDocs for the given <code>field</code> are cached. */ private final String field; /** * Map of {@link Term#text()} that are unknown to the underlying index. */ @SuppressWarnings("unchecked") private final Map<String, String> unknownValues = Collections.synchronizedMap(new LRUMap(100)); /** * The cache of the {@link #CACHE_SIZE} most frequently requested TermDocs. * Maps term text <code>String</code> to {@link CacheEntry}. */ private final LinkedMap cache = new LinkedMap(); /** * Creates a new cache for the given <code>reader</code> and * <code>field</code>. * * @param reader the index reader. * @param field the field name of the terms to potentially cache. */ public TermDocsCache(IndexReader reader, String field) { this.reader = reader; this.field = field; } /** * Returns the {@link TermDocs} for the given term. * * @param t the term. * @return the term docs for the given term. * @throws IOException if an error occurs while reading from the index. */ @SuppressWarnings("unchecked") public TermDocs termDocs(final Term t) throws IOException { if (t==null || t.field() != field) { return reader.termDocs(t); } String text = t.text(); if (unknownValues.get(text) != null) { log.debug("EmptyTermDocs({},{})", field, text); return EmptyTermDocs.INSTANCE; } // maintain cache CacheEntry entry; synchronized (cache) { entry = (CacheEntry)cache.get(text); if (entry == null) { // check space if (cache.size() >= CACHE_SIZE) { // prune half of them and adjust the rest CacheEntry[] entries = (CacheEntry[])cache.values().toArray(new CacheEntry[cache.size()]); Arrays.sort(entries); int threshold = entries[CACHE_SIZE / 2].numAccessed; for (Iterator<Map.Entry<String, CacheEntry>> it = cache.entrySet().iterator(); it.hasNext();) { Map.Entry<String, CacheEntry> e = it.next(); if (e.getValue().numAccessed <= threshold) { // prune it.remove(); } else { // adjust CacheEntry ce = (CacheEntry)e.getValue(); ce.numAccessed = (int)Math.sqrt(ce.numAccessed); } } } entry = new CacheEntry(); cache.put(text, entry); } else { entry.numAccessed++; } } // this is a threshold to prevent caching of TermDocs // that are read only irregularly. if (entry.numAccessed < 10) { if (log.isDebugEnabled()) { log.debug("#{} TermDocs({},{})", new Object[]{new Integer(entry.numAccessed), field, text}); } return reader.termDocs(t); } if (entry.bits == null) { // collect bits BitSet bits = null; TermDocs tDocs = reader.termDocs(t); try { while (tDocs.next()) { if (bits == null) { bits = new BitSet(reader.maxDoc()); } bits.set(tDocs.doc()); } } finally { tDocs.close(); } if (bits != null) { entry.bits = bits; } } if (entry.bits == null) { // none collected unknownValues.put(text, text); return EmptyTermDocs.INSTANCE; } else { if (log.isDebugEnabled()) { log.debug("CachedTermDocs({},{},{}/{})", new Object[]{field, text, new Integer(entry.bits.cardinality()), new Integer(reader.maxDoc())}); } return new CachedTermDocs(entry.bits); } } /** * Implements a {@link TermDocs} base on a {@link BitSet}. */ private static final class CachedTermDocs implements TermDocs { /** * The cached docs for this term. */ private final BitSet docs; /** * The current position into the {@link #docs}. */ private int position = -1; /** * <code>true</code> if there are potentially more docs. */ private boolean moreDocs = true; public CachedTermDocs(BitSet docs) { this.docs = docs; } /** * @throws UnsupportedOperationException always. */ public void seek(Term term) throws IOException { throw new UnsupportedOperationException(); } /** * @throws UnsupportedOperationException always. */ public void seek(TermEnum termEnum) throws IOException { throw new UnsupportedOperationException(); } /** * {@inheritDoc} */ public int doc() { return position; } /** * {@inheritDoc} */ public int freq() { return 1; } /** * {@inheritDoc} */ public boolean next() throws IOException { if (moreDocs) { position = docs.nextSetBit(position + 1); moreDocs = position != -1; } return moreDocs; } /** * {@inheritDoc} */ public int read(int[] docs, int[] freqs) throws IOException { int count; for (count = 0; count < docs.length && next(); count++) { docs[count] = doc(); freqs[count] = 1; } return count; } /** * {@inheritDoc} */ public boolean skipTo(int target) throws IOException { if (moreDocs) { position = docs.nextSetBit(target); moreDocs = position != -1; } return moreDocs; } /** * {@inheritDoc} */ public void close() throws IOException { } } private static final class CacheEntry implements Comparable<CacheEntry> { private volatile int numAccessed = 1; private volatile BitSet bits; public int compareTo(CacheEntry other) { return (numAccessed < other.numAccessed ? -1 : (numAccessed == other.numAccessed ? 0 : 1)); } } }