package com.bigdata.rdf.sparql.ast.cache;
import info.aduna.iteration.CloseableIteration;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.openrdf.model.Graph;
import org.openrdf.model.Resource;
import org.openrdf.model.Value;
import org.openrdf.model.impl.GraphImpl;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.rio.RDFWriter;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.eval.ASTConstructIterator;
import com.bigdata.rdf.store.TempTripleStore;
/**
* Collects statements written onto the {@link RDFWriter} interface and
* adds/replaces the DESCRIBE of the {@link Resource} specified to the
* constructor.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
* Thompson</a>
*/
public class DescribeCacheUpdater implements
CloseableIteration<BigdataStatement, QueryEvaluationException> {
static private transient final Logger log = Logger
.getLogger(DescribeCacheUpdater.class);
/**
* The cache to be updated.
*/
private final IDescribeCache cache;
/**
* The {@link BigdataValue}s that become bound for the projection of the
* original DESCRIBE query. We will collect all statements having a
* described resource as either a subject or an object.
* <p>
* Note: This set is populated as the solutions are observed before they are
* fed into the {@link ASTConstructIterator}. It MUST be a thread-safe
* {@link Set} in order to ensure the visibility of the updates to this
* class. It should also support high concurrency.
*/
private final Set<BigdataValue> describedResources;
/**
* The source iterator visiting the statements that are the description
* of the projected resources.
*/
private final CloseableIteration<BigdataStatement, QueryEvaluationException> src;
/**
* The statements to be inserted into the cache as the description of
* that {@link IV}.
*
* TODO This is not scalable to very large numbers of described
* resources nor to resources with very large numbers of statements in
* their descriptions. Try {@link TempTripleStore} with ONE (1) access
* path on SPO. However, we want to have the {@link BigdataStatement}
* with its {@link IV}s and its {@link Value}s, so the
* {@link TempTripleStore} will not work. Something more custom?
*/
final private HashMap<BigdataValue, Graph> graphs = new HashMap<BigdataValue, Graph>();
private boolean open = true;
/**
*
* @param cache
* The cache to be updated.
* @param describedResources
* The {@link BigdataValue}s that become bound for the projection
* of the original DESCRIBE query. We will collect all statements
* having a described resource as either a subject or an object.
* This MUST be a thread-safe (and concurrency favorable) set in
* order to ensure the visibility of the updates.
* @param src
* The source iterator, visiting the statements that are the
* description of the resource(s) identified in the
* {@link ProjectionNode}.
*/
public DescribeCacheUpdater(
final IDescribeCache cache,
final Set<BigdataValue> describedResources,
final CloseableIteration<BigdataStatement, QueryEvaluationException> src) {
if (cache == null)
throw new IllegalArgumentException();
if (describedResources == null)
throw new IllegalArgumentException();
if (src == null)
throw new IllegalArgumentException();
this.cache = cache;
this.describedResources = describedResources;
this.src = src;
}
@Override
public boolean hasNext() throws QueryEvaluationException {
if (src.hasNext())
return true;
if (open) {
try {
/*
* Update the DESCRIBE cache IFF the iterator is exhausted
* by normal means (versus a thrown exception from the
* source iterator).
*/
updateCache();
} finally {
// Close the iterator regardless.
close();
}
}
return false;
}
/**
* TODO In order to support CBD, we will also have to recognize
* statements that describe blank nodes that are part of the description
* of a described resource as belonging to that described resource. This
* is necessary in order to capture the transitive closure of the
* resource description specified by CBD. The code in this method only
* recognizes statements that directly have a described resource as a
* subject or object. We probably need a reverse map that will allow us
* to navigate from a BigdataValue (or perhaps just a BigdataBNode) to
* all described resources for which that value was observed. That map
* might only need to contain the blank nodes since the description can
* never expand beyond a statement having a blank node in the subject
* (or object) position and a non-blank node in the object (or subject)
* position.
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/578">
* Concise Bounded Description </a>
*/
@Override
public BigdataStatement next() throws QueryEvaluationException {
// A statement produced by the CONSTRUCT iterator.
final BigdataStatement stmt = src.next();
// Check the Subject.
{
final BigdataValue s = stmt.getSubject();
// Is the subject one of the described resources?
if (describedResources.contains(s)) {
record(s, stmt);
}
}
// Check the Object.
{
final BigdataValue o = stmt.getObject();
// Is the object one of the described resources?
if (describedResources.contains(o)) {
record(o, stmt);
}
}
return stmt;
}
/**
* Associate the statement with the resource. It is part of the
* description of that resource.
*
* @param describedResource
* A resource that is being described.
* @param stmt
* A statement having that resource as either the subject or
* object.
*/
private void record(final BigdataValue describedResource,
final BigdataStatement stmt) {
Graph g = graphs.get(describedResource);
if(g == null) {
graphs.put(describedResource, g = new GraphImpl());
}
g.add(stmt);
if (log.isDebugEnabled())
log.debug("DESCRIBE: describedResource=" + describedResource
+ ", statement=" + stmt);
}
private void updateCache() {
for (Map.Entry<BigdataValue, Graph> e : graphs.entrySet()) {
final BigdataValue describedResource = e.getKey();
final IV<?, ?> iv = describedResource.getIV();
if (iv == null)
throw new AssertionError("IV not set: " + describedResource);
final Graph graph = e.getValue();
cache.insert(iv, graph);
if (log.isInfoEnabled())
log.info("DESCRIBE UPDARTE: describedResource="
+ describedResource + ", graph=" + graph);
}
}
@Override
public void close() throws QueryEvaluationException {
if(open) {
src.close();
open = false;
}
}
@Override
public void remove() throws QueryEvaluationException {
throw new UnsupportedOperationException();
}
}