/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.bigdata.rdf.sparql.ast.ssets; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.UUID; import org.apache.log4j.Logger; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IVariable; import com.bigdata.bop.NamedSolutionSetRefUtility; import com.bigdata.bop.solutions.SolutionSetStream; import com.bigdata.btree.IndexMetadata; import com.bigdata.journal.IBTreeManager; import com.bigdata.journal.IJournal; import com.bigdata.journal.ITx; import com.bigdata.journal.TimestampUtility; import com.bigdata.rdf.sparql.ast.ISolutionSetStats; import com.bigdata.rdf.spo.ISPO; import com.bigdata.relation.AbstractResource; import com.bigdata.stream.Stream.StreamIndexMetadata; import com.bigdata.striterator.CloseableIteratorWrapper; import cutthecrap.utils.striterators.ICloseableIterator; /** * A manager for named SOLUTION SETS scoped by some namespace and timestamp. * * @see <a * href="https://sourceforge.net/apps/mediawiki/bigdata/index.php?title=SPARQL_Update"> * SPARQL Update </a> * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/531"> SPARQL * UPDATE for NAMED SOLUTION SETS </a> * * @see <a href="http://aksw.org/Projects/QueryCache"> Adaptive SPARQL Query * Cache </a> * * @see <a * href="http://www.informatik.uni-leipzig.de/~auer/publication/caching.pdf * > Improving the Performance of Semantic Web Applications with SPARQL * Query Caching </a> * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/524"> SPARQL * Query Cache </a> * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ //* Add support for declared and maintained views. Views would be //* declared using SPARQL queries. The view would listen for updates to //* statement patterns and invalidate/maintain the SPARQL result sets when a //* triple in a statement pattern in use by the query for that solution set //* has been added or removed. //* <p> //* Maintained views should be used transparently in queries where they can //* be incorporated by subsumption. General match of solution sets should be //* based on the hash code of the SPARQL query or the deep hash code of a //* normalized and optimized AST. Detailed match must be on either the query //* text or the AST (deep equals). AST based caching allows sub-select //* caching or even caching of sub-groups. //* <p> //* When BINDINGS are present, then the query solutions are not the same as //* when they are not present. This makes the cache somewhat more difficult //* to integration since the same query is not always the same (e.g., //* include the hash of the exogenous solutions in the query hash code and //* we will get less reuse). Therefore, either the view must be computed //* without reference to a set of exogenous solutions or the exogenous //* solutions must be incorporated into the declaration of the view and //* considered when making decisions about subsumption. //* <p> //* Benchmark impact of cache on BSBM explore+update. The cache should be //* integrated into the query planner so we can cache solution sets for //* sub-groups and sub-selects. public class SolutionSetManager implements ISolutionSetManager { private static transient final Logger log = Logger .getLogger(SolutionSetManager.class); /** * The backing store. */ private final IBTreeManager store; /** * The backing store. */ private IBTreeManager getStore() { return store; } // /** // * MVCC VIEWS: Convert over to Name2Addr and layered resolution with // * appropriate concurrency control at each layer. // * // * <pre> // * (cache|query|database)[.queryUUID].namespace[.joinVars] // * </pre> // * // * or something like that. // * <p> // * There are several problems here. // * <p> // * 1. We need a common semantics for visibility for the named solution sets // * and the query and update operations. This cache can not provide that // * without being somehow integrated with the MVCC architecture. // * <p> // * 2. We need to expire (at least some) cache objects. That expiration // * should have a default but should also be configurable for each cache // * object. The visibility issue also exists for expiration (we can not // * expire a result set while it is being used). // * <p> // * 3. If we allow updates against named solution sets, then the visibility // * of those updates must again be consistent with the MVCC architecture for // * the query and update operations. // * // * Appropriate synchronization, including for updates, and on the // * Stream class (unisolated index protection). // * // * This probably needs to buffer {@link Name2Addr} changes in a manner // * similar to {@link AbstractTask} in order to handle tx isolation // * correctly. However, this is not even hooked into the commit protocol so // * it is difficult to set how the view can evolve. I am unsure how this will // * play out for SPARQL UPDATE request sequences for either the UNISOLATED // * case or the case where the changes are isolated by a read/write tx. // * // * The SolutionSetStream is not transaction aware at all. This makes // * it difficult to use named solution sets in SPARQL UPDATE in conjunction // * with full read-write transactions. There is no sense of a "FusedView" and // * the Stream is not being access through an AbstractTask running on the // * concurrency manager. // */ // @Deprecated // We need to use Name2Addr and Name2Addr prefix scans // private final ConcurrentHashMap<String/* name */, SolutionSetStream> cacheMap; // private final ConcurrentWeakValueCacheWithTimeout<String/* name */, // IMemoryManager /* allocationContext */> cacheMap; private final String namespace; private final long timestamp; @Override public String toString() { return super.toString() + "{namespace=" + namespace + ",timestamp=" + TimestampUtility.toString(timestamp) + "}"; } public SolutionSetManager(final IBTreeManager store, final String namespace, final long timestamp) { if (store == null) throw new IllegalArgumentException(); if (namespace == null) throw new IllegalArgumentException(); this.store = store; this.namespace = namespace; this.timestamp = timestamp; // /* // * TODO The expire should be per cached object, not global. We would // * need a different cache map class for that. // */ // final long timeoutNanos = TimeUnit.SECONDS.toNanos(20); // this.cacheMap = new ConcurrentWeakValueCacheWithTimeout<String, // IMemoryManager>( // 0/* queueCapacity */, timeoutNanos); // this.cacheMap = new ConcurrentHashMap<String, SolutionSetStream>(); } @Override public void init() { // NOP } /** * {@inheritDoc} * <p> * Note: Explicit close is not safe. We want to destroy to cached solution * sets if the AbstractTripleStore is destroyed. The hook is currently in * {@link AbstractResource#destroy()}. */ @Override public void close() { // cacheMap.clear(); } /** * Return the fully qualified name of the named solution set. * * @param localName * The local (aka application) name. * * @return The fully qualified name. * * TODO Support different index orders. We can do a prefix scan on * Name2Addr to find the existing index orders (but this is not * currently efficient due to a bug in Name2Addr). * <p> * When reading out the {@link ISolutionSetStats} or the solutions * themselves, all we need is the first such index that we find for * a given namespace and localName. * <p> * When writing, we need to write on ALL such indices. * <p> * Either way, the scan should give us what we need. * <p> * For CREATE, the desired index order(s) should be declared in the * create metadata. */ private String getFQN(final String localName) { return NamedSolutionSetRefUtility.getFQN(namespace, localName, IVariable.EMPTY/* joinVars */); } private void assertNotReadOnly() { if (TimestampUtility.isReadOnly(timestamp)) { throw new UnsupportedOperationException("Read Only"); } } @Override public void clearAllSolutions() { assertNotReadOnly(); final String prefix = NamedSolutionSetRefUtility.getPrefix(namespace) .toString(); if (log.isInfoEnabled()) log.info("Scanning: prefix=" + prefix); final Iterator<String> itr = getStore().indexNameScan(prefix, timestamp); while(itr.hasNext()) { final String name = itr.next(); getStore().dropIndex(name); if (log.isInfoEnabled()) log.info("Dropping: " + name); } // final Iterator<Map.Entry<String, SolutionSetStream>> itr = cacheMap // .entrySet().iterator(); // // while (itr.hasNext()) { // // final Map.Entry<String, SolutionSetStream> e = itr.next(); // // final String solutionSet = e.getKey(); // // final SolutionSetStream sset = e.getValue(); // // if (log.isInfoEnabled()) // log.info("solutionSet: " + solutionSet); // // sset.clear(); // // itr.remove(); // // } } /** * Return the named solution set. * * @param fqn * The fully qualified name. * * @return The named solution set -or- <code>null</code> if it was not * found. */ private SolutionSetStream getSolutionSet(final String fqn) { if (timestamp == ITx.READ_COMMITTED) { return (SolutionSetStream) getStore().getIndexLocal(fqn, getStore().getLastCommitTime()); } else if (timestamp == ITx.UNISOLATED) { return (SolutionSetStream) getStore().getUnisolatedIndex(fqn); } else if(TimestampUtility.isReadWriteTx(timestamp)){ final long ts; if (getStore() instanceof IJournal) { /* * Optimized code path uses the readsOnCommitTime to improve * caching. */ ts = ((IJournal) getStore()) .getLocalTransactionManager().getTx(timestamp) .getReadsOnCommitTime(); } else { /** * Note: This code path is used by the TemporaryStore and * possibly ResourceManager (for a data service). The [store] * reference here is always the local index manager. Thus it can * not be an IBigdataFederation. * * TODO Use the readsOnCommitTime. Test coverage for * TemporaryStore and DataService (but there is an open question * about how to handle hash partitioned solution sets on a * federation). * * @see <a * href="https://sourceforge.net/apps/trac/bigdata/ticket/266"> * Refactor native long tx id to thin object. </a> */ ts = timestamp; } return (SolutionSetStream) getStore().getIndexLocal(fqn, ts); } else { return (SolutionSetStream) getStore().getIndexLocal(fqn, timestamp); } // Note: Forces all access to be unisolated. // return (SolutionSetStream) getStore().getUnisolatedIndex(fqn); } @Override public boolean existsSolutions(final String solutionSet) { if (solutionSet == null) throw new IllegalArgumentException(); final SolutionSetStream sset = getSolutionSet(getFQN(solutionSet)); return sset != null; } @Override public boolean clearSolutions(final String solutionSet) { if (log.isInfoEnabled()) log.info("solutionSet: " + solutionSet); if (existsSolutions(solutionSet)) { final String fqn = getFQN(solutionSet); getStore().dropIndex(fqn); return true; } // final SolutionSetStream sset = cacheMap.remove(fqn); // // if (sset != null) { // sset.clear(); // // return true; // // } return false; } @Override public void putSolutions(final String solutionSet, final ICloseableIterator<IBindingSet[]> src) { if (src == null) throw new IllegalArgumentException(); final String fqn = getFQN(solutionSet); SolutionSetStream sset = getSolutionSet(fqn); if (sset == null) { sset = _create(fqn, getDefaultMetadata()); } // write out the solutions. writeSolutions(sset, src); } /** * Create iff it does not exist. * * @param solutionSet * The name. * @param params * The configuration parameters. * @return A solution set with NOTHING written on it. * * TODO ISPO[] params is ignored (you can not configure for a BTree * or HTree index for the solutions with a specified set of join * variables for the index). */ private SolutionSetStream _create(final String fqn, final ISPO[] params) { SolutionSetStream sset = getSolutionSet(fqn); if (sset != null) throw new RuntimeException("Exists: " + fqn); if (log.isInfoEnabled()) log.info("Create: fqn=" + fqn + ", params=" + Arrays.toString(params)); final StreamIndexMetadata md = new StreamIndexMetadata(fqn, UUID.randomUUID()); /** * TODO GIST : We should not have to do this here. See * Checkpoint.create() and SolutionSetStream.create() for why this is * necessary. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/585 (GIST) */ md.setStreamClassName(SolutionSetStream.class.getName()); sset = (SolutionSetStream) getStore().register(fqn, md); // sset = SolutionSetStream.create(getRWStrategy(), md); // sset = new SolutionSetMetadata(getStore(), // params == null ? getDefaultMetadata() : params, false/* readOnly */); // cacheMap.put(fqn, sset); return sset; } @Override public void createSolutions(final String solutionSet, final ISPO[] params) { final String fqn = getFQN(solutionSet); final SolutionSetStream sset = _create(fqn, params); /* * Write an empty solution set. */ final List<IBindingSet[]> emptySolutionSet = new LinkedList<IBindingSet[]>(); final ICloseableIterator<IBindingSet[]> src = new CloseableIteratorWrapper<IBindingSet[]>( emptySolutionSet.iterator()); // write the solutions. writeSolutions(sset, src); } private void writeSolutions(final SolutionSetStream sset, final ICloseableIterator<IBindingSet[]> src) { sset.put(src); } @Override public ISolutionSetStats getSolutionSetStats(final String solutionSet) { final String fqn = getFQN(solutionSet); final SolutionSetStream sset = getSolutionSet(fqn); if (sset != null) { final ISolutionSetStats stats = sset.getStats(); if (stats == null) throw new RuntimeException("No statistics? solutionSet=" + solutionSet); return stats; } return null; } @Override public ICloseableIterator<IBindingSet[]> getSolutions( final String solutionSet) { final String fqn = getFQN(solutionSet); final SolutionSetStream sset = getSolutionSet(fqn); if (sset == null) throw new IllegalStateException("Not found: " + solutionSet); // Return iterator over the decoded solutions. return sset.get(); } /** * Return the default metadata used when a named solution set is declared * implicitly rather than explicitly. * * @return The metadata describing that solution set. * * TODO This is ignored and needs to be reconciled with * {@link IndexMetadata}. However, we do want to provide this * metadata in a CREATE schema as triples. */ protected ISPO[] getDefaultMetadata() { return new ISPO[] {}; } }