package com.bigdata.rdf.store;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.openrdf.model.Value;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.engine.SolutionsLog;
import com.bigdata.bop.rdf.join.ChunkedMaterializationOp;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.impl.BlobIV;
import com.bigdata.rdf.internal.impl.TermId;
import com.bigdata.rdf.internal.impl.bnode.SidIV;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.striterator.AbstractChunkedResolverator;
import com.bigdata.striterator.IChunkedOrderedIterator;
/**
* Efficiently resolve term identifiers in Bigdata {@link IBindingSet}s to RDF
* {@link BigdataValue}s.
*
* @see ChunkedMaterializationOp
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id: BigdataSolutionResolverator.java 3448 2010-08-18 20:55:58Z thompsonbry $
*/
public class BigdataBindingSetResolverator
extends
AbstractChunkedResolverator<IBindingSet, IBindingSet, AbstractTripleStore> {
private final static Logger log = Logger
.getLogger(BigdataBindingSetResolverator.class);
private final UUID queryId;
@SuppressWarnings("rawtypes")
private final IVariable[] required;
private final int termsChunkSize;
private final int blobsChunkSize;
/**
*
* @param db
* Used to resolve term identifiers to {@link Value} objects.
* @param src
* The source iterator (will be closed when this iterator is
* closed).
* @param queryId
* The query {@link UUID} (for logging on the
* {@link SolutionsLog}).
* @param required
* The variables to be resolved (optional). When
* <code>null</code>, all variables will be resolved.
*
* FIXME must accept reverse bnodes map (from term identifier to
* blank nodes) for resolution of blank nodes within a Sesame
* connection context.
*/
public BigdataBindingSetResolverator(final AbstractTripleStore db,
final IChunkedOrderedIterator<IBindingSet> src,
final UUID queryId,//
final IVariable[] required, final int chunkOfChunksCapacity,
final int chunkCapacity, final long chunkTimeout,
final int termsChunkSize,
final int blobsChunkSize) {
super(db, src, new BlockingBuffer<IBindingSet[]>(chunkOfChunksCapacity,
chunkCapacity, chunkTimeout, TimeUnit.MILLISECONDS));
this.queryId = queryId;
this.required = required;
this.termsChunkSize = termsChunkSize;
this.blobsChunkSize = blobsChunkSize;
// System.err.println("required: " + (required != null ? Arrays.toString(required) : "null"));
}
/**
* Strengthens the return type.
*/
public BigdataBindingSetResolverator start(final ExecutorService service) {
return (BigdataBindingSetResolverator) super.start(service);
}
/**
* Resolve a chunk of {@link IBindingSet}s into a chunk of
* {@link IBindingSet}s in which term identifiers have been resolved to
* {@link BigdataValue}s.
*/
@Override
protected IBindingSet[] resolveChunk(final IBindingSet[] chunk) {
return resolveChunk(/*required, */state.getLexiconRelation(), chunk);
}
/**
* Resolve a chunk of {@link IBindingSet}s into a chunk of
* {@link IBindingSet}s in which term identifiers have been resolved to
* {@link BigdataValue}s.
*
* @param required
* The variable(s) to be materialized. When <code>null</code>,
* everything will be materialized. If a zero length array is
* given, then NOTHING will be materialized and the outputs
* solutions will be empty.
* @param lex
* The lexicon reference.
* @param chunk
* The chunk of solutions whose variables will be materialized.
*
* @return A new chunk of solutions in which those variables have been
* materialized.
*/
private IBindingSet[] resolveChunk(//final IVariable<?>[] required,
final LexiconRelation lex,//
final IBindingSet[] chunk//
) {
return resolveChunk(queryId, lex, chunk, required, termsChunkSize,
blobsChunkSize);
}
/**
* Public entry point for batch resolution.
*
* @param queryId
* The query {@link UUID} (for logging on the
* {@link SolutionsLog}).
* @param lex
* The {@link LexiconRelation}.
* @param chunk
* The {@link IBindingSet}[] chunk.
* @param required
* The variables which need to be materialized.
* @param termsChunkSize
* The chunk size for materialization of {@link TermId}s.
* @param blobsChunkSize
* The chunk size for materialization of {@link BlobIV}s.
*
* @return The resolved {@link IBindingSet}[] chunk.
*/
/*
* Note: I've made this static to support chunked resolution outside of the
* producer/consumer pattern, but there never seems to be a use case for it.
* Each time it turns out that the BigdataValueReplacer is the right thing
* to use.
*/
static private IBindingSet[] resolveChunk(
final UUID queryId,
final LexiconRelation lex,//
final IBindingSet[] chunk,//
final IVariable<?>[] required,//
final int termsChunkSize,//
final int blobsChunkSize//
) {
final long begin = System.currentTimeMillis();
if (log.isDebugEnabled())
log.debug("Fetched chunk: size=" + chunk.length + ", chunk="
+ Arrays.toString(chunk));
/*
* Create a collection of the distinct term identifiers used in this
* chunk.
*/
/*
* Estimate the capacity of the hash map based on the #of variables to
* materialize per solution and the #of solutions.
*/
final int initialCapacity = required == null ? chunk.length
: ((required.length == 0) ? 1 : chunk.length * required.length);
final Collection<IV<?, ?>> ids = new HashSet<IV<?, ?>>(initialCapacity);
for (IBindingSet solution : chunk) {
final IBindingSet bindingSet = solution;
// System.err.println(solution);
assert bindingSet != null;
if (required == null) {
@SuppressWarnings("rawtypes")
final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSet
.iterator();
while (itr.hasNext()) {
@SuppressWarnings("rawtypes")
final Map.Entry<IVariable, IConstant> entry = itr.next();
final IV<?,?> iv = (IV<?,?>) entry.getValue().get();
if (iv == null) {
throw new RuntimeException("NULL? : var=" + entry.getKey()
+ ", " + bindingSet);
}
handleIV(iv, ids);
// if (iv.hasValue())
// continue;
//
// ids.add(iv);
}
} else {
for (IVariable<?> v : required) {
final IConstant<?> c = bindingSet.get(v);
if (c == null) {
continue;
}
final IV<?,?> iv = (IV<?,?>) c.get();
if (iv == null) {
throw new RuntimeException("NULL? : var=" + v
+ ", " + bindingSet);
}
// if (iv.hasValue())
// continue;
//
// ids.add(iv);
handleIV(iv, ids);
}
}
}
// System.err.println("resolving: " + Arrays.toString(ids.toArray()));
if (log.isDebugEnabled())
log.debug("Resolving " + ids.size() + " IVs, required="
+ Arrays.toString(required));
// batch resolve term identifiers to terms.
final Map<IV<?, ?>, BigdataValue> terms = lex.getTerms(ids,
termsChunkSize, blobsChunkSize);
/*
* Assemble a chunk of resolved elements.
*/
{
final IBindingSet[] chunk2 = new IBindingSet[chunk.length];
int i = 0;
for (IBindingSet e : chunk) {
final IBindingSet f = getBindingSet(e, required, terms);
chunk2[i++] = f;
}
if (SolutionsLog.INFO) {
SolutionsLog.log(queryId, null/* bop */,
-1/* bopId */, -1/* partitionId */, chunk2);
}
final long elapsed = System.currentTimeMillis() - begin;
if (log.isDebugEnabled())
log.debug("Resolved chunk: size=" + chunk2.length + ", chunk="
+ Arrays.toString(chunk2));
if (log.isInfoEnabled())
log.info("Resolved chunk: size=" + chunk2.length + ", elapsed="
+ elapsed);
// return the chunk of resolved elements.
return chunk2;
}
}
/**
* Add the IV to the list of terms to materialize, and also
* delegate to {@link #handleSid(SidIV, Collection, boolean)} if it's a
* SidIV.
*/
static private void handleIV(final IV<?, ?> iv,
final Collection<IV<?, ?>> ids) {
if (iv instanceof SidIV) {
handleSid((SidIV<?>) iv, ids);
}
ids.add(iv);
}
/**
* Sids need to be handled specially because their individual ISPO
* components might need materialization as well.
*/
static private void handleSid(final SidIV<?> sid,
final Collection<IV<?, ?>> ids) {
final ISPO spo = sid.getInlineValue();
handleIV(spo.s(), ids);
handleIV(spo.p(), ids);
handleIV(spo.o(), ids);
if (spo.c() != null) {
handleIV(spo.c(), ids);
}
}
/**
* Resolve the term identifiers in the {@link IBindingSet} using the map
* populated when we fetched the current chunk and return the
* {@link IBindingSet} for that solution in which term identifiers have been
* resolved to their corresponding {@link BigdataValue}s.
*
* @param solution
* A solution whose {@link Long}s will be interpreted as term
* identifiers and resolved to the corresponding
* {@link BigdataValue}s.
*
* @return The corresponding {@link IBindingSet} in which the term
* identifiers have been resolved to {@link BigdataValue}s.
*
* @throws IllegalStateException
* if the {@link IBindingSet} was not materialized with the
* {@link IBindingSet}.
*/
static private IBindingSet getBindingSet(final IBindingSet solution,
final IVariable<?>[] required,
final Map<IV<?,?>, BigdataValue> terms) {
if (solution == null)
throw new IllegalArgumentException();
if (terms == null)
throw new IllegalArgumentException();
final IBindingSet bindingSet;
if (required == null) {
bindingSet = solution;
} else {
bindingSet = solution.copy(required);
}
@SuppressWarnings("rawtypes")
final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSet
.iterator();
while (itr.hasNext()) {
@SuppressWarnings("rawtypes")
final Map.Entry<IVariable, IConstant> entry = itr.next();
final Object boundValue = entry.getValue().get();
if (!(boundValue instanceof IV<?,?>)) {
continue;
}
final IV<?,?> iv = (IV<?,?>) boundValue;
if (iv.hasValue())
continue;
final BigdataValue value = terms.get(iv);
if (value == null) {
throw new RuntimeException("Could not resolve: iv=" + iv);
}
/*
* Replace the binding.
*
* FIXME This probably needs to strip out the BigdataSail#NULL_GRAPH
* since that should not become bound.
*/
bindingSet.set(entry.getKey(), new Constant<BigdataValue>(
value));
}
return bindingSet;
}
}