package eu.fbk.knowledgestore.datastore; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.annotation.Nullable; import com.google.common.base.MoreObjects; import com.google.common.base.Preconditions; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.openrdf.model.URI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.fbk.knowledgestore.data.Record; import eu.fbk.knowledgestore.data.Stream; import eu.fbk.knowledgestore.data.XPath; import eu.fbk.knowledgestore.vocabulary.KS; // TODO: global cache should store byte[] rather than Record object trees, so that a larger cache // size can be used; this need to move serialization logic (no more Avro-based, please!) in // ks-core /** * A {@code DataStore} wrapper providing a transactional and a global cache for looked up and * modified records. * <p> * This wrapper aims at improving the performances of record lookups ( * {@link DataTransaction#lookup(URI, Set, Set) lookup} calls) and modifications ( * {@link DataTransaction#store(URI, Record) store} and {@link DataTransaction#delete(URI, URI) * delete} calls) through a two-level caching mechanism. * </p> * <p> * A global cache holds records previously looked up by transactions, up to a configurable * {@code maxSize} number of records for each record type. This cache provides optimal * performances in a read-only load. However, in presence of read-write transactions a record may * have multiple versions (one committed and other locally modified in active transactions), * therefore it may be not possible for all the transactions to look up / place data in the global * cache (this is enforced via a revision number mechanism). * </p> * <p> * To overcome the limits of the global cache with read-write transactions, each transaction is * also given a local cache that stores records looked up and modified locally by the transaction; * this cache is synchronized with the global cache upon a successful commit (either by copying * modified records or invalidating them). Note that synchronization is possible only if the * complete set of records modified by the transaction is known. When this is impossible because * the transaction modified more than {@code maxChanges} records to allow them to be stored in * memory, then an invalidation of the global cache is mandatory in order to avoid dirty reads * (this degrades performances!). The local cache is not just used for lookups but also to * implement a write-back mechanism, where up to {@code maxBufferedChanges} records modified by * the transaction are kept locally and flushed to the underlying data store only when strictly * necessary. More precisely, changes are flushed at commit time and every time operations * {@link DataTransaction#retrieve(URI, XPath, Set) retrieve}, * {@link DataTransaction#count(URI, XPath) count} and * {@link DataTransaction#match(Map, Map, Map) match} are called. * </p> * <p> * Some statistics about the number of cache hits (local and global caches), fetches, changes and * flushes are logged at close time. * </p> */ public class CachingDataStore extends ForwardingDataStore { private static final Logger LOGGER = LoggerFactory.getLogger(CachingDataStore.class); private static final int DEFAULT_MAX_SIZE = 1024; private static final int DEFAULT_MAX_CHANGES = 1024; private static final int DEFAULT_MAX_BUFFERED_CHANGES = 1024; private static final Record NULL = Record.create(); private final DataStore delegate; private final int maxChanges; private final int maxBufferedChanges; private final ReadWriteLock globalLock; private final Map<URI, Cache<URI, Record>> globalCaches; private long globalRevision; // Counters for statistics private final AtomicLong globalHitCount; private final AtomicLong localHitCount; private final AtomicLong fetchCount; private final AtomicLong changeCount; private final AtomicLong flushCount; /** * Creates a new instance for the wrapped {@code DataStore} specified. * * @param delegate * the wrapped {@code DataStore} * @param maxSize * the maximum size of global per-record-type caches ( number of records); if null * defaults to 1024 * @param maxChanges * the max number (per-type) of records that a transaction can change before * modification tracking is aborted forcing the invalidation of global caches upon * commit; if null defaults to 1024 * @param maxBufferedChanges * the max number (per-type) of records changed by a transactions that are buffered * locally, before being flushed to the underlying {@code DataTransaction}; if null * defaults to 1024 */ public CachingDataStore(final DataStore delegate, @Nullable final Integer maxSize, @Nullable final Integer maxChanges, @Nullable final Integer maxBufferedChanges) { final int actualMaxSize = MoreObjects.firstNonNull(maxSize, DEFAULT_MAX_SIZE); final int actualMaxChanges = MoreObjects.firstNonNull(maxChanges, DEFAULT_MAX_CHANGES); final int actualMaxBufferedChanges = MoreObjects.firstNonNull(maxBufferedChanges, DEFAULT_MAX_BUFFERED_CHANGES); Preconditions.checkArgument(actualMaxSize > 0); Preconditions.checkArgument(actualMaxChanges > 0); this.delegate = Preconditions.checkNotNull(delegate); this.maxChanges = actualMaxChanges; this.maxBufferedChanges = actualMaxBufferedChanges; this.globalLock = new ReentrantReadWriteLock(true); this.globalCaches = Maps.newHashMap(); this.globalRevision = 0L; this.globalHitCount = new AtomicLong(0); this.localHitCount = new AtomicLong(0); this.fetchCount = new AtomicLong(0); this.changeCount = new AtomicLong(0); this.flushCount = new AtomicLong(0); for (final URI type : DataStore.SUPPORTED_TYPES) { // Original setting (may cause OutOfMemory if maximimum value is inappropriate // this.globalCaches.put(type, CacheBuilder.newBuilder().maximumSize(actualMaxSize) // .<URI, Record>build()); this.globalCaches.put(type, CacheBuilder.newBuilder().softValues().maximumSize(actualMaxSize) .<URI, Record>build()); } CachingDataStore.LOGGER.info("{} configured", this.getClass().getSimpleName()); } @Override protected DataStore delegate() { return this.delegate; } @Override public DataTransaction begin(final boolean readOnly) throws IOException, IllegalStateException { // Need to acquire an exclusive lock to prevent commits in the meanwhile CachingDataStore.this.globalLock.readLock().lock(); try { final long revision = CachingDataStore.this.globalRevision; final DataTransaction tx = delegate().begin(readOnly); return new CachingDataTransaction(tx, readOnly, revision); } finally { CachingDataStore.this.globalLock.readLock().unlock(); } } @Override public void close() { try { LOGGER.info("{} - {} local cache hits, {} global cache hits, {} fetches, " + "{} changes, {} flushes", this.getClass().getSimpleName(), this.localHitCount, this.globalHitCount, this.fetchCount, this.changeCount, this.flushCount); } finally { super.close(); } } private class CachingDataTransaction extends ForwardingDataTransaction { private final DataTransaction delegate; @Nullable private final Set<URI> dirty; // contains types for which a flush has been done @Nullable private final Map<URI, Map<URI, Record>> changes; // null if read-only @Nullable private final Map<URI, Set<URI>> invalidated; // null if read-only private final Map<URI, Cache<URI, Record>> localCaches; private final long localRevision; CachingDataTransaction(final DataTransaction delegate, final boolean readOnly, final long revision) { this.delegate = Preconditions.checkNotNull(delegate); if (readOnly) { this.dirty = null; this.changes = null; this.invalidated = null; } else { this.dirty = Sets.newHashSet(); this.changes = Maps.newHashMap(); this.invalidated = Maps.newHashMap(); for (final URI type : DataStore.SUPPORTED_TYPES) { this.changes.put(type, Maps.<URI, Record>newHashMap()); this.invalidated.put(type, Sets.<URI>newHashSet()); } } this.localRevision = revision; this.localCaches = Maps.newHashMap(); for (final URI type : DataStore.SUPPORTED_TYPES) { this.localCaches.put(type, CacheBuilder.newBuilder().softValues() .<URI, Record>build()); } } @Override protected DataTransaction delegate() { return this.delegate; } @Override public Stream<Record> lookup(final URI type, final Set<? extends URI> ids, final Set<? extends URI> properties) throws IOException, IllegalArgumentException, IllegalStateException { final long globalRevision = CachingDataStore.this.globalRevision; final Cache<URI, Record> globalCache = CachingDataStore.this.globalCaches.get(type); final Cache<URI, Record> localCache = this.localCaches.get(type); final List<Record> result = Lists.newArrayList(); // Determine if there is a chance to use the global cache. // The global cache cannot be used if we lost track of what we changed in current // transaction, or if it got polluted with changes from other concurrent transaction // (based on revision number) final boolean mightUseGlobalCache = this.localRevision == globalRevision && (this.dirty == null || !this.dirty.contains(type)); // Lookup in local cache final Set<URI> missingIDs = Sets.newHashSet(); for (final URI id : ids) { final Record record = localCache.getIfPresent(id); if (record == null) { missingIDs.add(id); } else if (record != CachingDataStore.NULL) { CachingDataStore.this.localHitCount.incrementAndGet(); result.add(Record.create(record, true)); // clone to preserve cached one } } // Lookup in global cache, if possible. Need to check revision number holding a shared // lock on the global cache if (mightUseGlobalCache) { CachingDataStore.this.globalLock.readLock().lock(); try { if (this.localRevision == globalRevision) { for (final Iterator<URI> i = missingIDs.iterator(); i.hasNext();) { final URI id = i.next(); final Record record = globalCache.getIfPresent(id); if (record != null) { CachingDataStore.this.globalHitCount.incrementAndGet(); localCache.put(id, record); // propagate to local cache result.add(Record.create(record, true)); // clone record i.remove(); // ID no more missing } } } } finally { CachingDataStore.this.globalLock.readLock().unlock(); } } // Fetch missing records (possibly NOP) final List<Record> fetched = missingIDs.isEmpty() ? ImmutableList.<Record>of() : // delegate().lookup(type, missingIDs, null).toList(); CachingDataStore.this.fetchCount.addAndGet(missingIDs.size()); // Add fetched records to result (cloning them) and to local cache; update missing IDs for (final Record record : fetched) { result.add(Record.create(record, true)); localCache.put(record.getID(), record); missingIDs.remove(record.getID()); } // Non-existing records are also tracked in local cache for efficiency reasons for (final URI id : missingIDs) { localCache.put(id, CachingDataStore.NULL); } // If possible, fetched data is also put in the global cache. To access it, need to // acquire a shared lock and check again the revision number. if (mightUseGlobalCache) { CachingDataStore.this.globalLock.readLock().lock(); try { if (this.localRevision == CachingDataStore.this.globalRevision) { for (final Record record : fetched) { globalCache.put(record.getID(), record); } } } finally { CachingDataStore.this.globalLock.readLock().unlock(); } } // All the data is here. Perform projection, if required if (properties != null && !properties.isEmpty()) { for (final Record record : result) { final URI[] projection = properties.toArray(new URI[properties.size()]); record.retain(projection); } } // Return a stream over the requested records return Stream.create(result); } @Override public Stream<Record> retrieve(final URI type, final XPath condition, final Set<? extends URI> properties) throws IOException, IllegalArgumentException, IllegalStateException { if (this.changes != null) { flushChanges(type); } return delegate().retrieve(type, condition, properties); } @Override public long count(final URI type, final XPath condition) throws IOException, IllegalArgumentException, IllegalStateException { if (this.changes != null) { flushChanges(type); } return delegate().count(type, condition); } @Override public Stream<Record> match(final Map<URI, XPath> conditions, final Map<URI, Set<URI>> ids, final Map<URI, Set<URI>> properties) throws IOException, IllegalStateException { if (this.changes != null) { flushChanges(KS.RESOURCE); flushChanges(KS.MENTION); flushChanges(KS.ENTITY); flushChanges(KS.AXIOM); } return delegate().match(conditions, ids, properties); } @Override public void store(final URI type, final Record record) throws IOException, IllegalStateException { Preconditions.checkState(this.changes != null, "Read-only DataTransaction"); registerChange(type, record.getID(), record); } @Override public void delete(final URI type, final URI id) throws IOException, IllegalStateException { Preconditions.checkState(this.changes != null, "Read-only DataTransaction"); registerChange(type, id, CachingDataStore.NULL); } @Override public void end(final boolean commit) throws IOException, IllegalStateException { // Simply delegate if read-only or on rollback if (this.changes == null || !commit) { this.delegate.end(commit); return; } // On read/write commit, start by flushing pending changes for (final URI type : DataStore.SUPPORTED_TYPES) { flushChanges(type); } // Then perform the commit and synchronize the global cache by holding an exclusive // lock, so to properly handle revision numbers. Pre-existing transactions will be // forced to stop using the global cache. CachingDataStore.this.globalLock.writeLock().lock(); try { delegate().end(true); ++CachingDataStore.this.globalRevision; for (final URI type : DataStore.SUPPORTED_TYPES) { synchronizeCaches(// this.invalidated.get(type), // this.localCaches.get(type), // CachingDataStore.this.globalCaches.get(type)); } } finally { CachingDataStore.this.globalLock.writeLock().unlock(); } } private void synchronizeCaches(@Nullable final Set<URI> invalidatedIDs, final Cache<URI, Record> localCache, final Cache<URI, Record> globalCache) { if (invalidatedIDs == null) { globalCache.invalidateAll(); return; } globalCache.invalidateAll(invalidatedIDs); for (final Map.Entry<URI, Record> entry : localCache.asMap().entrySet()) { final URI id = entry.getKey(); final Record record = entry.getValue(); if (record != CachingDataStore.NULL) { globalCache.put(id, record); } } } private void registerChange(final URI type, final URI id, final Record record) throws IOException { assert this.changes != null && this.invalidated != null; // need read/write tx CachingDataStore.this.changeCount.incrementAndGet(); this.localCaches.get(type).put(id, record); final Map<URI, Record> changeMap = this.changes.get(type); changeMap.put(id, record); if (changeMap.size() > CachingDataStore.this.maxBufferedChanges) { flushChanges(type); } final Set<URI> invalidatedIDs = this.invalidated.get(type); if (invalidatedIDs != null) { invalidatedIDs.add(id); if (invalidatedIDs.size() > CachingDataStore.this.maxChanges) { this.invalidated.put(type, null); } } } private void flushChanges(final URI type) throws IOException { assert this.changes != null && this.invalidated != null; // need read/write tx final Map<URI, Record> map = this.changes.get(type); if (map.isEmpty()) { return; } this.dirty.add(type); CachingDataStore.this.flushCount.addAndGet(map.size()); for (final Map.Entry<URI, Record> entry : map.entrySet()) { final URI id = entry.getKey(); final Record record = entry.getValue(); if (record == CachingDataStore.NULL) { delegate().delete(type, id); } else { delegate().store(type, record); } } map.clear(); } } }