HTreeDistinctBindingSetsOp.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.bop.solutions;

import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.HTreeAnnotations;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.ISingleThreadedOp;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.controller.INamedSolutionSetRef;
import com.bigdata.bop.controller.NamedSetAnnotations;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.bop.join.HTreeHashJoinUtility;
import com.bigdata.bop.join.JoinTypeEnum;
import com.bigdata.htree.HTree;
import com.bigdata.rdf.internal.IV;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;

import cutthecrap.utils.striterators.ICloseableIterator;

/**
 * A pipelined DISTINCT operator based on the persistence capable {@link HTree}
 * suitable for very large solution sets. Only the variables which are used to
 * determine the DISTINCT solutions are projected from the operator. The
 * operator is specific to the RDF data model (it relies on encoded {@link IV}
 * s).
 * <p>
 * Note: This implementation is a single-threaded pipelined operator which
 * inspects each chunk of solutions as they arrive and those solutions which are
 * distinct for each chunk passed on.
 * <p>
 * Note: {@link PipelineOp.Annotations#MAX_MEMORY} is currently ignored by this
 * operator. This value could be used to trigger the switch to an external
 * memory DISTINCT (on a backing store) or to fail a query which attempts to put
 * too much data into the native heap. Right now, it will just keep adding data
 * on the native heap and eventually the machine will begin to swap.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 */
public class HTreeDistinctBindingSetsOp extends PipelineOp implements
        ISingleThreadedOp {

//	private final static transient Logger log = Logger
//			.getLogger(DistinctBindingSetsWithHTreeOp.class);
	
    /**
     * 
     */
    private static final long serialVersionUID = 1L;

	public interface Annotations extends PipelineOp.Annotations,
			HTreeAnnotations, DistinctAnnotations, NamedSetAnnotations {

//        /**
//         * The name of {@link IQueryAttributes} attribute under which the
//         * {@link HTreeHashJoinState} for this operator is stored. The attribute
//         * name includes the query UUID. The query UUID must be extracted and
//         * used to lookup the {@link IRunningQuery} to which the solution set
//         * was attached.
//         * 
//         * @see NamedSolutionSetRef
//         */
//        final String NAMED_SET_REF = HTreeNamedSubqueryOp.Annotations.NAMED_SET_REF;

	}

    /**
     * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
     */
    public HTreeDistinctBindingSetsOp(final HTreeDistinctBindingSetsOp op) {
        super(op);
    }

    /**
     * Required shallow copy constructor.
     */
    public HTreeDistinctBindingSetsOp(final BOp[] args,
            final Map<String, Object> annotations) {

		super(args, annotations);

		switch (getEvaluationContext()) {
        case CONTROLLER:
        case HASHED:
            break;
		default:
			throw new UnsupportedOperationException(
					Annotations.EVALUATION_CONTEXT + "="
							+ getEvaluationContext());
		}

		assertMaxParallelOne();

//		// shared state is used to share the hash table.
//		if (!isSharedState()) {
//			throw new UnsupportedOperationException(Annotations.SHARED_STATE
//					+ "=" + isSharedState());
//		}

        final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) getRequiredProperty(Annotations.NAMED_SET_REF);

		final IVariable<?>[] vars = (IVariable[]) getProperty(Annotations.VARIABLES);

		if (vars == null || vars.length == 0)
			throw new IllegalArgumentException();

    }

    public HTreeDistinctBindingSetsOp(final BOp[] args, NV... annotations) {

        this(args, NV.asMap(annotations));
        
    }

    public FutureTask<Void> eval(final BOpContext<IBindingSet> context) {

        return new FutureTask<Void>(new DistinctTask(this, context));
        
    }

    /**
     * Task executing on the node.
     */
    static private class DistinctTask implements Callable<Void> {

        private final HTreeDistinctBindingSetsOp op;
        
        private final BOpContext<IBindingSet> context;

        private final HTreeHashJoinUtility state;
        
        DistinctTask(final HTreeDistinctBindingSetsOp op,
                final BOpContext<IBindingSet> context) {
            
            this.op = op;
            
            this.context = context;

            /** Metadata to identify the named solution set. */
            final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op
                    .getRequiredProperty(Annotations.NAMED_SET_REF);

            /*
             * First, see if the map already exists.
             * 
             * Note: Since the operator is not thread-safe, we do not need to
             * use a putIfAbsent pattern here.
             */

            /*
			 * Lookup the attributes for the query on which we will hang the
			 * solution set. See BLZG-1493 (if queryId is null, use the query
			 * attributes for this running query).
			 */
			final IQueryAttributes attrs = context.getQueryAttributes(namedSetRef.getQueryId());

            HTreeHashJoinUtility state = (HTreeHashJoinUtility) attrs
                    .get(namedSetRef);

            if (state == null) {

                state = new HTreeHashJoinUtility(
                        context.getMemoryManager(namedSetRef.getQueryId()), op,
                        JoinTypeEnum.Filter);

                if (attrs.putIfAbsent(namedSetRef, state) != null)
                    throw new AssertionError();

            }

            this.state = state;
            
        }
        @Override
        public Void call() throws Exception {

            final BOpStats stats = context.getStats();

            final ICloseableIterator<IBindingSet[]> itr = context
                    .getSource();

            final IBlockingBuffer<IBindingSet[]> sink = context.getSink();

            try {

                final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer = new UnsyncLocalOutputBuffer<IBindingSet>(
                        op.getChunkCapacity(), sink);

                state.filterSolutions(itr, stats, unsyncBuffer);

                unsyncBuffer.flush();
                
                sink.flush();
                
                // done.
                return null;
                
            } finally {

                if (context.isLastInvocation()) {

                    state.release();

                }

                sink.close();

            }

        }

    } // class DistinctTask

}