package com.bigdata.rdf.sparql.ast.eval;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import org.apache.log4j.Logger;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.ap.Predicate;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IIndex;
import com.bigdata.counters.CAT;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPOAccessPath;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.rule.IAccessPathExpander;
import com.bigdata.service.IBigdataClient;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.concurrent.LatchedExecutor;
import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.IFilter;
import cutthecrap.utils.striterators.Striterator;
/**
* Parallel subquery for a default graph access path. An expander pattern is
* used to ensure that the "DISTINCT SPO" constraint is applied across the
* subqueries rather than to each subquery individually.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class DGExpander implements IAccessPathExpander<ISPO> {
protected static final Logger log = Logger.getLogger(DGExpander.class);
private static final long serialVersionUID = 1L;
final private int maxParallel;
@SuppressWarnings("rawtypes")
final private Collection<IV> graphs;
final private long estimatedRangeCount;
/**
*
* @param maxParallel
* @param graphs
* A dense ordered array of {@link IV}s.
* @param estimatedRangeCount
* The estimated range count for the subquery operation across
* those graphs.
*
* @todo A parallelism limitation is placed on the ClientIndexView through
* the
* {@link IBigdataClient.Options#CLIENT_MAX_PARALLEL_TASKS_PER_REQUEST}
* . We should be able to override that through annotations on a query
* plan.
*/
@SuppressWarnings("rawtypes")
public DGExpander(final int maxParallel, final Collection<IV> graphs,
final long estimatedRangeCount) {
this.maxParallel = maxParallel;
this.graphs = graphs;
this.estimatedRangeCount = estimatedRangeCount;
}
public boolean backchain() {
return false;
}
public boolean runFirst() {
return false;
}
public IAccessPath<ISPO> getAccessPath(final IAccessPath<ISPO> accessPath) {
return new DefaultGraphParallelEvaluationAccessPath(
(SPOAccessPath) accessPath);
}
public String toString() {
return super.toString() + "{maxParallel=" + maxParallel + ",ngraphs="
+ graphs.size() + ", estimatedRangeCount="
+ estimatedRangeCount + ", graphs=" + graphs + "}";
}
/**
* Inner class evaluates the access path for each context using limited
* parallelism, discarding the context argument for each {@link ISPO}, and
* filtering out duplicate triples based on their (s,p,o) term identifiers.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
* Thompson</a>
*/
private final class DefaultGraphParallelEvaluationAccessPath implements
IAccessPath<ISPO> {
/**
* The original access path.
*/
private final SPOAccessPath sourceAccessPath;
final private Executor executor;
public String toString() {
return super.toString() + "{baseAccessPath="
+ sourceAccessPath.toString() + "}";
}
/**
* @param accessPath
* The original access path.
*/
public DefaultGraphParallelEvaluationAccessPath(
final SPOAccessPath accessPath) {
this.sourceAccessPath = accessPath;
this.executor = new LatchedExecutor(accessPath.getIndexManager()
.getExecutorService(), maxParallel);
}
@Override
public IIndex getIndex() {
return sourceAccessPath.getIndex();
}
@Override
public IKeyOrder<ISPO> getKeyOrder() {
return sourceAccessPath.getKeyOrder();
}
@Override
public IPredicate<ISPO> getPredicate() {
return sourceAccessPath.getPredicate();
}
@Override
public boolean isEmpty() {
// System.err.println("Testing isEmpty(): "+getPredicate());
final IChunkedOrderedIterator<ISPO> itr = iterator(0L/* offset */,
1/* limit */, 1/* capacity */);
try {
return !itr.hasNext();
} finally {
itr.close();
}
}
// @Override
// public ITupleIterator<ISPO> rangeIterator() {
//
// return sourceAccessPath.rangeIterator();
//
// }
/**
* Unsupported operation.
* <p>
* Note: this could be implemented by delegation but it is not used from
* the context of SPARQL which lacks SELECT ... INSERT or SELECT ...
* DELETE constructions, at least at this time.
*/
@Override
public long removeAll() {
throw new UnsupportedOperationException();
}
@Override
public IChunkedOrderedIterator<ISPO> iterator() {
return iterator(0L/* offset */, 0L/* limit */, 0/* capacity */);
}
// public IChunkedOrderedIterator<ISPO> iterator(final int limit,
// final int capacity) {
//
// return iterator(0L/* offset */, limit, capacity);
//
// }
/**
* This is the common entry point for all iterator implementations.
*
* @todo Consider an alternative implementation using fully parallel
* evaluation of the access paths and a merge sort to combine
* chunks drawn from each access path, and then an iterator which
* skips over duplicates by considering the last returned (s,p,o).
* We need to: (a) allocate a buffer each time we draw from the
* current chunks based on the total size of the current chunks;
* and (b) we can only draw keys from the current chunks up to the
* min(nextKey) for each chunk. The min(nextKey) constraint is
* necessary to ensure that a merge sort will get rid of
* duplicates. Without that constraint it is possible that a
* latter chunk from some access path will report an (s,p,o) that
* has already be visited. (The constraint allows us to use a
* closed world assumption to filter duplicates after the merge
* sort.)
*/
@Override
public IChunkedOrderedIterator<ISPO> iterator(final long offset,
final long limit, final int capacity) {
final ICloseableIterator<ISPO> src = new InnerIterator1(offset,
limit, capacity);
// if (src instanceof IChunkedOrderedIterator<?>) {
//
// return (IChunkedOrderedIterator<ISPO>) src;
//
// }
return new ChunkedWrappedIterator<ISPO>(src);
}
/**
* Iterator implementation based on limited parallelism over the
* iterators for the {@link IAccessPath} associated with each graph in
* the default graphs set and using a {@link BTree} to filter out
* duplicate (s,p,o) tuples.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
* Thompson</a>
* @version $Id: DefaultGraphSolutionExpander.java 3678 2010-09-29
* 15:48:34Z thompsonbry $
*/
private class InnerIterator1 implements ICloseableIterator<ISPO> {
// private final long offset;
//
// private final long limit;
//
// private final int capacity;
private final CAT nAPsWithHits = new CAT();
private final CAT nhits = new CAT();
/**
* @todo buffer chunks of {@link #ISPO}s for more efficiency (lock
* amortization) and better alignment with the chunked source
* iterators. (It used to be that the only issue was
* {@link #hasNext()} having to maintain a chunk of known
* distinct tuples to be visited, but I think that wrapping
* the DISTINCT filter around the DGExpander fixed that
* problem.)
*/
private final BlockingBuffer<ISPO> buffer;
/**
* The source iterator.
*/
private final ICloseableIterator<ISPO> src;
/**
* @param offset
* @param limit
* @param capacity
*/
public InnerIterator1(final long offset, final long limit,
final int capacity) {
// this.offset = offset;
//
// this.limit = limit;
//
// this.capacity = capacity;
this.buffer = new BlockingBuffer<ISPO>(sourceAccessPath
.getChunkCapacity());
FutureTask<Void> future = null;
try {
/*
* Note: We do NOT get() this Future. This task will run
* asynchronously.
*
* The Future is canceled IF (hopefully WHEN) the iterator
* is closed.
*
* If the task itself throws an error, then it will use
* buffer#abort(cause) to notify the buffer of the cause (it
* will be passed along to the iterator) and to close the
* buffer (the iterator will notice that the buffer has been
* closed as well as that the cause was set on the buffer).
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a>
*/
// Wrap task as FutureTask.
future = new FutureTask<Void>(newRunIteratorsTask(buffer));
// set the future on the BlockingBuffer.
buffer.setFuture(future);
// submit task for execution.
sourceAccessPath.getIndexManager().getExecutorService()
.submit(future);
/*
* The outer access path will impose the "DISTINCT SPO"
* constraint.
*/
// /*
// * Wrap the asynchronous iterator with one that imposes
// * a distinct (s,p,o) filter.
// */
// src = sourceAccessPath.getRelation()
// .distinctSPOIterator(buffer.iterator());
final IFilter filter = sourceAccessPath.getPredicate()
.getAccessPathFilter();
if (filter != null) {
src = new ChunkedWrappedIterator<ISPO>(new Striterator(
buffer.iterator()).addFilter(filter));
} else {
src = buffer.iterator();
}
} catch (Throwable ex) {
try {
buffer.close();
if (future != null) {
future.cancel(true/* mayInterruptIfRunning */);
}
} catch (Throwable t) {
log.error(t, t);
}
throw new RuntimeException(ex);
}
}
public void close() {
/*
* Close the iterator, interrupting the running task if
* necessary.
*/
src.close();
}
public boolean hasNext() {
if (!src.hasNext()) {
if (log.isInfoEnabled())
log.info("#graphs=" + graphs.size() + ", nhits="
+ nhits + ", apsWithHints=" + nAPsWithHits
+ ", pred=" + getPredicate());
return false;
}
return true;
}
public ISPO next() {
return src.next();
}
public void remove() {
throw new UnsupportedOperationException();
}
/**
* Return task which will submit tasks draining the iterators for
* each access path onto the caller's buffer.
*
* @param buffer
* The elements drained from the iterators will be added
* to this buffer.
*
* @return The task whose future is set on the buffer.
*/
private Callable<Void> newRunIteratorsTask(
final BlockingBuffer<ISPO> buffer) {
return new RunIteratorsTask(buffer);
}
/**
* Inner {@link Callable} queues up the {@link DrainIteratorTask}s
* on the {@link Executor}.
*/
private final class RunIteratorsTask implements Callable<Void> {
private final BlockingBuffer<ISPO> buffer;
public RunIteratorsTask(final BlockingBuffer<ISPO> buffer) {
this.buffer = buffer;
}
/**
* Outer callable submits tasks for execution.
*/
public Void call() throws Exception {
final List<Future<Void>> tasks = new LinkedList<Future<Void>>();
try {
// Schedule tasks.
for (IV<?, ?> termId : graphs) {
final FutureTask<Void> ft = new FutureTask<Void>(
new DrainIteratorTask(termId));
tasks.add(ft);
executor.execute(ft);
}
// Wait for the futures.
for (Future<Void> f : tasks) {
f.get();
}
} catch (Throwable ex) {
for (Future<Void> f : tasks)
f.cancel(true/* mayInterruptIfRunning */);
throw new RuntimeException(ex);
} finally {
// nothing more can be written onto the buffer.
buffer.close();
}
return null;
}
}
/**
* Inner callable runs an iterator for a specific access path,
* draining the iterator onto the blocking buffer.
* <p>
* Note: don't pass the top-level offset, limit, capacity into the
* per-graph AP iterator or it will skip over offset results per
* graph! The limit needs to be imposed on the data pulled from the
* blocking buffer, not here.
*/
private final class DrainIteratorTask implements Callable<Void> {
final IV<?, ?> termId;
public DrainIteratorTask(final IV<?, ?> termId) {
if (termId == null)
throw new IllegalArgumentException();
this.termId = termId;
}
public Void call() throws Exception {
if (log.isDebugEnabled())
log.debug("Running iterator: c="
+ termId);
/*
* Clear various annotations from source predicate.
*
* expander: we are the expander.
*
* accessPathFilter: This wraps the DGExpander. It should
* not be applied to each subquery.
*
* keyOrder: The right index can change as soon as we bind
* [c].
*/
final Predicate<ISPO> sourcePred = (Predicate<ISPO>) sourceAccessPath
.getPredicate()
.clearAnnotations(
new String[] {
IPredicate.Annotations.ACCESS_PATH_EXPANDER,
IPredicate.Annotations.ACCESS_PATH_FILTER,
// IPredicate.Annotations.KEY_ORDER
});
// Bind the graph onto the context position variable.
final Predicate<ISPO> asBound = sourcePred.asBound(
(IVariable<?>) sourcePred.get(3/* cvar */),
new Constant<IV<?, ?>>(termId));
// Obtain the access path for the asBound predicate.
final IAccessPath<ISPO> asBoundAP = sourceAccessPath
.getRelation().getAccessPath(asBound);
// System.err.println(asBoundAP.toString());
final IChunkedOrderedIterator<ISPO> itr = asBoundAP
.iterator();
// Note: deprecated SPOAccessPath method.
// final IChunkedOrderedIterator<ISPO> itr =
// sourceAccessPath
// .bindContext(termId).iterator();
try {
long n = 0;
while (itr.hasNext()) {
// @todo chunk at a time processing.
final ISPO spo = itr.next();
buffer.add(spo);
// System.err.println(spo);
n++;
}
if (log.isDebugEnabled())
log.debug("Ran iterator: c="
+ termId + ", nvisited=" + n);
if(n>0) {
nhits.add(n);
nAPsWithHits.increment();
}
} finally {
itr.close();
}
return null;
}
} // class DrainIteratorTask
} // class InnerIterator
/**
* Return the estimated range count.
*/
@Override
public long rangeCount(final boolean exactIsIgnored) {
return estimatedRangeCount;
}
} // class DefaultGraphAccessPath
}