package com.bigdata.rdf.sparql.ast.cache;
import java.util.Arrays;
import java.util.Set;
import org.apache.log4j.Logger;
import org.openrdf.model.Graph;
import org.openrdf.query.GraphQueryResult;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.htree.HTree;
import com.bigdata.io.SerializerUtil;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.store.AbstractTripleStore;
/**
* An interface providing a maintained DESCRIBE cache for some
* {@link AbstractTripleStore}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*
* FIXME MVCC VIEWS: The same integration issue also needs to be
* addressed for the {@link CacheConnectionImpl} for named solution sets.
*
* TODO Support hash partitioned and remove DESCRIBE cache instances.
* These will need access to a service that resolves {@link BigdataURI}
* s to {@link IV}s efficiently. That service can be exposed using the
* NSS or using jini/River.
*/
public class DescribeCache implements IDescribeCache {
static private transient final Logger log = Logger
.getLogger(CacheConnectionImpl.class);
/**
* The cache. The keys are {@link IV}s. The values are the {@link Graph} s
* describing those {@link IV}s.
*/
private HTree map;
public DescribeCache(final HTree map) {
if (map == null)
throw new IllegalArgumentException();
this.map = map;
}
public void close() {
this.map.close();
}
public void destroy() {
this.map.removeAll();
}
/**
* Return a thread-local instance.
*
*/
private IKeyBuilder getKeyBuilder() {
return map.getIndexMetadata().getKeyBuilder();
}
/**
* Returns the sort key for the URI.
*
* @param uri
* The URI.
*
* @return The sort key.
*/
private byte[] iv2key(final IKeyBuilder keyBuilder, final IV<?, ?> iv) {
if (keyBuilder == null)
throw new IllegalArgumentException();
if (iv == null)
throw new IllegalArgumentException();
keyBuilder.reset();
return iv.encode(keyBuilder).getKey();
}
/**
* {@inheritDoc}
*
* TODO Compute the sketch and use an efficient representation for the
* describe graph. The insert should be vectored, scalable, and page
* oriented (blob stream API). The only scalable way to compute and store
* the sketch is to stream onto a buffer backed by temporary file, computing
* the sketch as we go and then replay the stream into a compact
* representation for the resource description. However, note that the API
* currently presumes that the {@link Graph} is transmitted as a unit. A
* {@link GraphQueryResult} provides an iterator oriented view of a graph
* more suitable to the transmission of large graphs and streaming graphs
* over a network.
* <p>
* The sketch can be used to compress the resource description. For example,
* it includes a frequency count of the predicates that can be used to
* assign Huffman codes. It would also be useful to be able to efficiently
* skip forward in the stream to the offset where specific edges are stored.
* Perhaps we could organize the edges using SPO (attributes and forward
* links) and POS (reverse links) projections.
*
* TODO If we explicit manage the raw records then we need to change how the
* metadata is declared. We would have a fixed length value (the addr on the
* backing store - either 4 or 8 bytes). We would also have to manage the
* storage explicitly, including explicitly deleting the backing raw record
* for each cache entry when that cache entry is invalidated.
*/
public void insert(final IV<?, ?> iv, final Graph g) {
final byte[] key = iv2key(getKeyBuilder(), iv);
final byte[] val = SerializerUtil.serialize(g);
synchronized (map) {
map.remove(key);
map.insert(key, val);
}
}
public Graph lookup(final IV<?, ?> iv) {
final byte[] key = iv2key(getKeyBuilder(), iv);
final byte[] val = map.lookupFirst(key);
if (val == null)
return null;
final Graph x = (Graph) SerializerUtil.deserialize(val);
return x;
}
/**
* {@inheritDoc}
*
* TODO Invalidation should probably for lastCommitTime+1 (that is, anything
* after the most current lastCommitTime). However, there is still a race
* condition when something is invalidated while there is a concurrent
* request to describe that thing. This can probably be solved by passing
* along the timestamp that the DESCRIBE query is reading on to the
* {@link DescribeCacheUpdater} and from there to the DESCRIBE cache. We
* always should prefer the description of a resource associated with the
* most current view of the database.
*/
public void invalidate(final Set<IV<?, ?>> ivs) {
if (ivs == null)
throw new IllegalArgumentException();
final int size = ivs.size();
if (size == 0)
return;
final IV<?, ?>[] a = ivs.toArray(new IV[size]);
// Sort 1st for better locality of updates.
Arrays.sort(a);
final IKeyBuilder keyBuilder = getKeyBuilder();
synchronized (map) {
for (IV<?, ?> iv : a) {
final byte[] key = iv2key(keyBuilder, iv);
map.remove(key);
}
if (log.isTraceEnabled())
log.trace("Invalidated cache entries: n=" + size);
}
}
}