package com.bigdata.rdf.sparql.ast.cache; import java.util.LinkedHashSet; import java.util.Set; import org.apache.log4j.Logger; import com.bigdata.rdf.changesets.ChangeAction; import com.bigdata.rdf.changesets.IChangeLog; import com.bigdata.rdf.changesets.IChangeRecord; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; import com.bigdata.rdf.sparql.ast.eval.CustomServiceFactoryBase; import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions; import com.bigdata.rdf.sparql.ast.service.IServiceOptions; import com.bigdata.rdf.sparql.ast.service.ServiceCall; import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.BD; /** * This service tracks KB updates via an {@link IChangeLog} and is responsible * for DESCRIBE cache invalidation for resources for which an update has been * observed. * * @see BD#DESCRIBE * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/584"> DESCRIBE * cache </a> * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class DescribeServiceFactory extends CustomServiceFactoryBase { static private transient final Logger log = Logger .getLogger(DescribeServiceFactory.class); private final IServiceOptions serviceOptions; public DescribeServiceFactory() { this.serviceOptions = new BigdataNativeServiceOptions(); } @Override public IServiceOptions getServiceOptions() { return serviceOptions; } /** * TODO Implement: The {@link DescribeServiceFactory} COULD be integrated * into query processing using a rewrite of a DESCRIBE or a star-join into * an invocation of this service. */ @Override public ServiceCall<?> create(final ServiceCallCreateParams params) { throw new UnsupportedOperationException(); } /** * Register an {@link IChangeLog} listener that will manage the maintenance * of the describe cache. */ @Override public void startConnection(final BigdataSailConnection conn) { /** * TODO This really should not be using getCacheConnection() but rather * getExistingCacheConnection(). I need to figure out the pattern that * brings the cache connection into existence and who is responsible for * invoking it. The problem is that there are multiple entry points, * including AST evaluation, the DescribeServlet, and the test suite. * AST2BOpContext does this, but it is not always created before we need * the cache connection. */ final ICacheConnection cacheConn = CacheConnectionFactory .getCacheConnection(conn.getBigdataSail().getQueryEngine()); if (cacheConn == null) { // Cache is not enabled. return; } final AbstractTripleStore tripleStore = conn.getTripleStore(); final IDescribeCache describeCache = cacheConn.getDescribeCache( tripleStore.getNamespace(), tripleStore.getTimestamp()); if (describeCache == null) { // DESCRIBE cache is not enabled. return; } conn.addChangeLog(new DescribeCacheChangeLogListener(describeCache)); } /** * Handles cache maintenance/invalidation. * <p> * There are several very different scenarios for cache maintenance: * <dl> * <dt>Invalidation only.</dt> * <dd>Changes for resources in the subject or object position cause the * resource to be invalidated in the cache.</dd> * <dt>Maintenance</dt> * <dd>The cache is actually a fully populated and maintained index. All * updates are propagated into the cache so it remains consistent with the * statement indices.</dd> * </dl> * In addition to these strategies, we could treat the cache as a partial * representation of the linked data available on the open web for the * resources and track metadata about the age of the resource description * for each linked data authority. * <p> * Another twist for invalidation would be to bound the cache capacity. That * would require us to also maintain metadata to support an eviction policy. * The easiest way to do that is to manage the raw journal entries on the * store and a LIRS/LRU eviction policy on the {@link IV}s, together with * the address of the raw record (Blob/Stream). * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> */ static private class DescribeCacheChangeLogListener implements IChangeLog { /** The vector size for updates. */ private static final int threshold = 10000; /** An updatable view of the cache. */ private final IDescribeCache cache; /** The set of IVs to be invalidated (lazily instantiated). */ private Set<IV<?, ?>> ivs; /** The size of that set (tracked). */ private int size = 0; DescribeCacheChangeLogListener(final IDescribeCache cache) { if (cache == null) throw new IllegalArgumentException(); this.cache = cache; } /** * Vectors updates against the DESCRIBE cache. */ @Override public void changeEvent(final IChangeRecord record) { if (record.getChangeAction() == ChangeAction.UPDATED) { /* * This state change does not matter for cache maintenance * unless we also plan to note the {Axiom, Inference, Explicit} * state on the statements in the cache. */ return; } if (ivs == null) { // Lazy instantiation. ivs = new LinkedHashSet<IV<?, ?>>(); size = 0; } final ISPO spo = record.getStatement(); if (log.isTraceEnabled()) log.trace("Invalidation notice: spo=" + spo); if (ivs.add(spo.s())) size++; if (ivs.add(spo.o())) size++; if (size > threshold) { flush(); } } @Override public void transactionBegin() { } @Override public void transactionPrepare() { flush(); } @Override public void transactionCommited(final long commitTime) { } @Override public void transactionAborted() { reset(); } /** * See {@link IChangeLog#close()}. */ @Override public void close() { reset(); } /** * Incremental flush (vectored cache invalidation notices). */ private void flush() { if (ivs != null) { cache.invalidate(ivs); reset(); } } /** Reset the buffer. */ private void reset() { ivs = null; size = 0; } } // class DescribeCacheChangeLogListener } // class DescribeServiceFactory