package com.bigdata.bop.solutions;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.IBind;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.solutions.SliceOp.Annotations;
import com.bigdata.rdf.error.SparqlTypeErrorException;
import com.bigdata.rdf.internal.IV;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* An in-memory merge sort for binding sets. The operator is pipelined. Each
* time it runs, it evaluates the value expressions on which the ordering will
* be imposed, binding the results on the incoming solutions and buffers the
* as-bound solution for eventual sorting. The sort is applied only once the
* last chunk of source solutions has been observed.
* <p>
* Computing the value expressions first is not only an efficiency, but is also
* required in order to detect type errors. When a type error is detected for a
* value expression the corresponding input solution is kept but with no new
* bindings, see trac-765. Since the
* computed value expressions must become bound on the solutions to be sorted,
* the caller is responsible for wrapping any value expression more complex than
* a variable or a constant with an {@link IBind} onto an anonymous variable.
* All such variables will be dropped when the solutions are written out. Since
* this operator must be able to compare all {@link IV}s in all
* {@link IBindingSet}s, it depends on the materialization of non-inline
* {@link IV}s and the ability of the value comparator to handle comparisons
* between materialized non-inline {@link IV}s and inline {@link IV}s.
*
* TODO External memory ORDER BY operator.
* <p>
* SPARQL ORDER BY semantics are complex and evaluating a SPARQL ORDER BY is
* further complicated by the schema flexibility of the value to be sorted. The
* simplest path to a true external memory sort operator would be to buffer and
* manage paging for blocks of inline IVs without materialized RDF values s and
* non-inline IVs with materialized RDF values.
* <p>
* An operator could also be written which buffers the solutions on the native
* heap but the as-bound values which will be used to order those solutions are
* either buffered on the JVM heap or materialized onto the JVM heap when the
* sort is executed. This could scale better than a pure JVM heap version, but
* only to the extent that the keys are smaller than the total solutions. The
* solutions would probably be written as serialized binding sets on the memory
* manager such that each solution has its own int32 address. That address can
* then be paired with the as-bound key to be sorted on the JVM heap.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z
* thompsonbry $
*/
public class MemorySortOp extends SortOp {
private static final transient Logger log = Logger.getLogger(MemorySortOp.class);
/**
*
*/
private static final long serialVersionUID = 1L;
/**
* Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
*/
public MemorySortOp(final MemorySortOp op) {
super(op);
}
/**
* Required shallow copy constructor.
*/
public MemorySortOp(final BOp[] args,
final Map<String, Object> annotations) {
super(args, annotations);
switch (getEvaluationContext()) {
case CONTROLLER:
break;
default:
throw new UnsupportedOperationException(
Annotations.EVALUATION_CONTEXT + "="
+ getEvaluationContext());
}
if (!isLastPassRequested()) {
throw new UnsupportedOperationException(Annotations.LAST_PASS
+ "=" + isLastPassRequested());
}
// ORDER_BY must preserve order.
if (isReorderSolutions())
throw new UnsupportedOperationException(
Annotations.REORDER_SOLUTIONS + "=" + isReorderSolutions());
// required parameter.
getValueComparator();
// validate required parameter.
for (ISortOrder<?> s : getSortOrder()) {
final IValueExpression<?> expr = s.getExpr();
if(expr instanceof IVariableOrConstant<?>)
continue;
if(expr instanceof IBind<?>)
continue;
throw new IllegalArgumentException(
"Value expression not wrapped by bind: " + expr);
}
}
@Override
public FutureTask<Void> eval(final BOpContext<IBindingSet> context) {
return new FutureTask<Void>(new SortTask(this, context));
}
/**
* Task executing on the node.
*/
static private class SortTask implements Callable<Void> {
private final MemorySortOp op;
private final BOpContext<IBindingSet> context;
private final BOpStats stats;
private final ISortOrder<?>[] sortOrder;
/**
* The {@link IQueryAttributes} for the {@link IRunningQuery} off which
* we will hang the named solution set.
*/
private final IQueryAttributes attrs;
/**
* The solutions. A reference to this object is stored on the
* {@link IQueryAttributes}.
*/
private transient LinkedList<IBindingSet> solutions;
/**
* The name of the key under which the {@link #solutions} are stored in
* the {@link IQueryAttributes}.
*/
private final String key;
@SuppressWarnings("unchecked")
SortTask(final MemorySortOp op,
final BOpContext<IBindingSet> context) {
this.op = op;
this.context = context;
this.stats = context.getStats();
this.sortOrder = op.getSortOrder();
this.attrs = context.getQueryAttributes();
this.key = Integer.toString(op.getId());
solutions = (LinkedList<IBindingSet>) attrs.get(key);
if(solutions == null) {
solutions = new LinkedList<IBindingSet>();
if (attrs.putIfAbsent(key, solutions) != null)
throw new AssertionError();
}
}
void release() {
if (log.isInfoEnabled())
log.info("Releasing state");
attrs.remove(key);
solutions = null;
}
@Override
public Void call() throws Exception {
final ICloseableIterator<IBindingSet[]> itr = context
.getSource();
final IBlockingBuffer<IBindingSet[]> sink = context.getSink();
final boolean lastInvocation = context.isLastInvocation();
try {
acceptSolutions(itr);
if (lastInvocation) {
doOrderBy(sink);
}
} catch(Throwable t) {
log.error(t,t);
throw new RuntimeException(t);
} finally {
if (lastInvocation) {
// Discard the operator's internal state.
release();
}
sink.close();
}
// Done.
return null;
}
/**
* Evaluate the value expressions for each input solution, drop any
* solution for which there is a type error, and buffer the as-bound
* solutions.
*
* @param itr
* The source solutions.
*/
private void acceptSolutions(
final ICloseableIterator<IBindingSet[]> itr) {
try {
while (itr.hasNext()) {
final IBindingSet[] a = itr.next();
stats.chunksIn.increment();
stats.unitsIn.add(a.length);
for (IBindingSet bset : a) {
// Note: Necessary scope for type error reporting.
IValueExpression<?> expr = null;
try {
for (ISortOrder<?> s : sortOrder) {
/*
* Evaluate. A BIND() will have side-effect on
* [bset].
*/
(expr = s.getExpr()).get(bset);
}
} catch (SparqlTypeErrorException ex) {
// log type error, do not drop solution (see trac 765).
TypeErrorLog.handleTypeError(ex, expr, stats);
}
// add to the set of solutions to be sorted.
solutions.add(bset);
} // next source solution
// /*
// * Note: By synchronizing on [stats] here we are able to run
// * concurrent evaluation tasks for this operator which
// * compute the as-bound values.
// */
// synchronized (stats) {
// for (IBindingSet bset : a) {
// }
// }
}
if (log.isInfoEnabled())
log.info("Buffered " + solutions.size()
+ " solutions so far");
} finally {
itr.close();
}
} // acceptSolutions
/**
* Sort the solutions based on the as-bound value expressions.
*
* @param sink
* Where to write the results.
*/
private void doOrderBy(final IBlockingBuffer<IBindingSet[]> sink) {
if (log.isInfoEnabled())
log.info("Sorting.");
final IBindingSet[] all = solutions.toArray(new IBindingSet[0]);
@SuppressWarnings({ "rawtypes", "unchecked" })
final Comparator<IBindingSet> c = new BindingSetComparator(
sortOrder, op.getValueComparator());
// sort.
{
final long begin = System.currentTimeMillis();
Arrays.sort(all, c);
final long elapsed = System.currentTimeMillis() - begin;
if (log.isInfoEnabled())
log.info("Sorted " + all.length + " solutions in "
+ elapsed + "ms.");
}
// Drop variables for computed value expressions.
for(IBindingSet bset : all) {
for(ISortOrder<?> s : sortOrder) {
final IValueExpression<?> expr = s.getExpr();
if(expr instanceof IBind) {
bset.clear(((IBind<?>) expr).getVar());
}
}
}
// write output and flush.
sink.add(all);
sink.flush();
}
} // ChunkTask
} // MemorySortOp