/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.cache.bloom.simple;
import org.apache.lucene.index.*;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.StringHelper;
import org.elasticsearch.common.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Unicode;
import org.elasticsearch.common.bloom.BloomFilter;
import org.elasticsearch.common.bloom.BloomFilterFactory;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.SizeUnit;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.cache.bloom.BloomCache;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.threadpool.ThreadPool;
import java.nio.channels.ClosedChannelException;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicBoolean;
/**
*
*/
public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, SegmentReader.CoreClosedListener {
private final ThreadPool threadPool;
private final long maxSize;
private final ConcurrentMap<Object, ConcurrentMap<String, BloomFilterEntry>> cache;
private final Object creationMutex = new Object();
@Inject
public SimpleBloomCache(Index index, @IndexSettings Settings indexSettings, ThreadPool threadPool) {
super(index, indexSettings);
this.threadPool = threadPool;
this.maxSize = indexSettings.getAsSize("index.cache.bloom.max_size", new SizeValue(500, SizeUnit.MEGA)).singles();
this.cache = ConcurrentCollections.newConcurrentMap();
}
@Override
public void close() throws ElasticSearchException {
clear();
}
@Override
public void clear() {
cache.clear();
}
@Override
public void onClose(SegmentReader owner) {
clear(owner);
}
@Override
public void clear(IndexReader reader) {
ConcurrentMap<String, BloomFilterEntry> map = cache.remove(reader.getCoreCacheKey());
// help soft/weak handling GC
if (map != null) {
map.clear();
}
}
@Override
public long sizeInBytes() {
// the overhead of the map is not really relevant...
long sizeInBytes = 0;
for (ConcurrentMap<String, BloomFilterEntry> map : cache.values()) {
for (BloomFilterEntry filter : map.values()) {
sizeInBytes += filter.filter.sizeInBytes();
}
}
return sizeInBytes;
}
@Override
public long sizeInBytes(String fieldName) {
long sizeInBytes = 0;
for (ConcurrentMap<String, BloomFilterEntry> map : cache.values()) {
BloomFilterEntry filter = map.get(fieldName);
if (filter != null) {
sizeInBytes += filter.filter.sizeInBytes();
}
}
return sizeInBytes;
}
@Override
public BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad) {
int currentNumDocs = reader.numDocs();
if (currentNumDocs == 0) {
return BloomFilter.EMPTY;
}
ConcurrentMap<String, BloomFilterEntry> fieldCache = cache.get(reader.getCoreCacheKey());
if (fieldCache == null) {
synchronized (creationMutex) {
fieldCache = cache.get(reader.getCoreCacheKey());
if (fieldCache == null) {
if (reader instanceof SegmentReader) {
((SegmentReader) reader).addCoreClosedListener(this);
}
fieldCache = ConcurrentCollections.newConcurrentMap();
cache.put(reader.getCoreCacheKey(), fieldCache);
}
}
}
BloomFilterEntry filter = fieldCache.get(fieldName);
if (filter == null) {
synchronized (fieldCache) {
filter = fieldCache.get(fieldName);
if (filter == null) {
filter = new BloomFilterEntry(currentNumDocs, BloomFilter.NONE);
fieldCache.put(fieldName, filter);
// now, do the async load of it...
if (currentNumDocs < maxSize) {
filter.loading.set(true);
BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName);
if (asyncLoad) {
threadPool.executor(ThreadPool.Names.CACHE).execute(loader);
} else {
loader.run();
filter = fieldCache.get(fieldName);
}
}
}
}
}
// if we too many deletes, we need to reload the bloom filter so it will be more effective
if (filter.numDocs > 1000 && filter.numDocs < maxSize && (currentNumDocs / filter.numDocs) < 0.6) {
if (filter.loading.compareAndSet(false, true)) {
// do the async loading
BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName);
if (asyncLoad) {
threadPool.executor(ThreadPool.Names.CACHE).execute(loader);
} else {
loader.run();
filter = fieldCache.get(fieldName);
}
}
}
return filter.filter;
}
class BloomFilterLoader implements Runnable {
private final IndexReader reader;
private final String field;
BloomFilterLoader(IndexReader reader, String field) {
this.reader = reader;
this.field = StringHelper.intern(field);
}
@SuppressWarnings({"StringEquality"})
@Override
public void run() {
TermDocs termDocs = null;
TermEnum termEnum = null;
try {
UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result();
BloomFilter filter = BloomFilterFactory.getFilter(reader.numDocs(), 15);
termDocs = reader.termDocs();
termEnum = reader.terms(new Term(field));
do {
Term term = termEnum.term();
if (term == null || term.field() != field) break;
// LUCENE MONITOR: 4.0, move to use bytes!
Unicode.fromStringAsUtf8(term.text(), utf8Result);
termDocs.seek(termEnum);
while (termDocs.next()) {
// when traversing, make sure to ignore deleted docs, so the key->docId will be correct
if (!reader.isDeleted(termDocs.doc())) {
filter.add(utf8Result.result, 0, utf8Result.length);
}
}
} while (termEnum.next());
ConcurrentMap<String, BloomFilterEntry> fieldCache = cache.get(reader.getCoreCacheKey());
if (fieldCache != null) {
if (fieldCache.containsKey(field)) {
BloomFilterEntry filterEntry = new BloomFilterEntry(reader.numDocs(), filter);
filterEntry.loading.set(false);
fieldCache.put(field, filterEntry);
}
}
} catch (AlreadyClosedException e) {
// ignore, we are getting closed
} catch (ClosedChannelException e) {
// ignore, we are getting closed
} catch (Exception e) {
// ignore failures that result from a closed reader...
if (reader.getRefCount() > 0) {
logger.warn("failed to load bloom filter for [{}]", e, field);
}
} finally {
try {
if (termDocs != null) {
termDocs.close();
}
} catch (Exception e) {
// ignore
}
try {
if (termEnum != null) {
termEnum.close();
}
} catch (Exception e) {
// ignore
}
}
}
}
static class BloomFilterEntry {
final int numDocs;
final BloomFilter filter;
final AtomicBoolean loading = new AtomicBoolean();
public BloomFilterEntry(int numDocs, BloomFilter filter) {
this.numDocs = numDocs;
this.filter = filter;
}
}
}