/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Aug 25, 2010 */ package com.bigdata.bop.join; import java.math.BigInteger; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.engine.BOpStats; import com.bigdata.rdf.internal.impl.literal.XSDIntegerIV; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer; /** * This operator reports the fast-range count for an as-bound {@link IPredicate} * . The cost of this operator is two key probes. Unlike a normal access path, * this operator does not bind variables to data in tuples in the underlying * index. Instead it binds a pre-identified variable to the aggregate (COUNT) of * the tuple range spanned by the {@link IPredicate}. * * @see <a href="http://trac.blazegraph.com/ticket/1037" > Rewrite SELECT * COUNT(...) (DISTINCT|REDUCED) {single-triple-pattern} as ESTCARD </a> * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class FastRangeCountOp<E> extends PipelineOp { /** * */ private static final long serialVersionUID = 1L; public interface Annotations extends AccessPathJoinAnnotations { /** * The name of the variable that will be bound to the fast-range count * of the access path associated with the predicate. */ String COUNT_VAR = FastRangeCountOp.class.getName() + ".countVar"; } /** * Deep copy constructor. * * @param op */ public FastRangeCountOp(final FastRangeCountOp<E> op) { super(op); } /** * Shallow copy constructor. * * @param args * @param annotations */ public FastRangeCountOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); // MUST be given. getRequiredProperty(Annotations.COUNT_VAR); getRequiredProperty(Annotations.PREDICATE); if(isOptional()) { /* * TODO OPTIONAL is not implemented for this operator. * * Note: For this operator, an OPTIONAL join would be *nearly* * identical to a normal join. This is because the OPTIONAL join * succeeds where the normal join would fail due to no matched * tuples. However, since this join is computing an aggregate * (COUNT), the count is simply zero for the OPTIONAL join case. * However, we would still produce a solution in the case where the * countVar was bound on input to this operator and the range-count * produced a different value for the countVar. */ throw new UnsupportedOperationException(); } } public FastRangeCountOp(final BOp[] args, final NV... annotations) { this(args, NV.asMap(annotations)); } /** * @see Annotations#COUNT_VAR */ protected IVariable<?> getCountVar() { return (IVariable<?>) getRequiredProperty(Annotations.COUNT_VAR); } /** * @see Annotations#SELECT */ protected IVariable<?>[] getSelect() { return getProperty(Annotations.SELECT, null/* defaultValue */); } /** * @see Annotations#CONSTRAINTS */ protected IConstraint[] constraints() { return getProperty(Annotations.CONSTRAINTS, null/* defaultValue */); } @SuppressWarnings("unchecked") public IPredicate<E> getPredicate() { return (IPredicate<E>) getRequiredProperty(Annotations.PREDICATE); } /** * Return the value of {@link IPredicate#isOptional()} for the * {@link IPredicate} associated with this join. * * @see IPredicate.Annotations#OPTIONAL */ private boolean isOptional() { // return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL); return getPredicate().isOptional(); } @Override public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { return new FutureTask<Void>(new ChunkTask<E>(this, context)); } /** * Copy the source to the sink. */ static protected class ChunkTask<E> implements Callable<Void> { protected final FastRangeCountOp<E> op; protected final BOpContext<IBindingSet> context; /** * The variable that gets bound to the fast range count. */ protected final IVariable<?> countVar; /** * The source for the elements to be joined. */ protected final IPredicate<E> predicate; /** * The relation associated with the {@link #predicate} operand. */ protected final IRelation<E> relation; protected ChunkTask(final FastRangeCountOp<E> op, final BOpContext<IBindingSet> context) { this.op = op; this.context = context; this.countVar = op.getCountVar(); this.predicate = op.getPredicate(); this.relation = context.getRelation(predicate); } @Override public Void call() throws Exception { final BOpStats stats = context.getStats(); // Convert source solutions to array (assumes low cardinality). final IBindingSet[] leftSolutions = BOpUtility.toArray( context.getSource(), stats); // default sink final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); /* * This is at most 1:1 operator. Each source solution produces one * output solution (the range-count for the as-bound predicate on * the corresponding access path). The only way in which we can get * less than a 1:1 join hit ratio is if the countVar is bound on * input and the actual range count does not unify with the incoming * binding for that variable. */ final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer = new UnsyncLocalOutputBuffer<IBindingSet>( leftSolutions.length/* capacity */, sink); final IVariable<?>[] selectVars = op.getSelect(); final IConstraint[] constraints = op.constraints(); try { /* * TODO If there are multiple left solutions (from the pipeline) * then we could generate their fromKeys and order them to * improve cache locality. See PipelineJoin for an example of * how this is done. */ // For each source solution. for (IBindingSet bindingSet : leftSolutions) { // constrain the predicate to the given bindings. IPredicate<E> asBound = predicate.asBound(bindingSet); if (asBound == null) { /* * This can happen for a SIDS mode join if some of the * (s,p,o,[c]) and SID are bound on entry and they can not * be unified. For example, the s position might be * inconsistent with the Subject that can be decoded from * the SID binding. * * @see #815 (RDR query does too much work) */ continue; } // if (partitionId != -1) { // // /* // * Constrain the predicate to the desired index // * partition. // * // * Note: we do this for scale-out joins since the // * access path will be evaluated by a JoinTask // * dedicated to this index partition, which is part // * of how we give the JoinTask to gain access to the // * local index object for an index partition. // */ // // asBound = asBound.setPartitionId(partitionId); // // } final long rangeCount = determineRangeCount( asBound ); // New binding set. final IBindingSet right = new ListBindingSet(); /* * Bind the countVar. * * Note: per the spec, SPARQL expects an xsd:integer here. */ right.set(countVar, new Constant<XSDIntegerIV>( new XSDIntegerIV(BigInteger.valueOf(rangeCount)))); // See if the solutions join. final IBindingSet outSolution = BOpContext.bind(// bindingSet,// left right,// constraints,// selectVars// ); if (outSolution != null) { // Output the solution. unsyncBuffer.add(outSolution); } } // flush the unsync buffer. unsyncBuffer.flush(); // flush the sink. sink.flush(); // Done. return null; } finally { sink.close(); context.getSource().close(); } } protected long determineRangeCount( final IPredicate<E> pred ) { /** * The {@link IAccessPath} corresponding to the asBound * {@link IPredicate} for this join dimension. The asBound * {@link IPredicate} is {@link IAccessPath#getPredicate()}. * * Note: The exact range count using will be two key probes * unless the index supports delete markers or there is a * filter attached to the access path. * * Note: This will throw an exception if either of those * conditions is true (the index supports delete markers or * there is a filter attached to the access path). The * exception is thrown since those conditions change the * cost of this operator from O(2) (the cost of TWO key * probes) to the O(fast-range-count) (the cost of a * key-range scan). Thus, generating this operator when * those conditions are violated leads to incorrect * reasoning about the cost of the operator. */ final IAccessPath<E> accessPath = context.getAccessPath( relation, pred ); if (accessPath.getPredicate().getIndexLocalFilter() != null) { // index has local filter. requires scan. throw new AssertionError(); } if (accessPath.getPredicate().getAccessPathFilter() != null) { // access path filter exists. requires scan. throw new AssertionError(); } /* * Request an exact range count. * * Note: This will be 2 key probes since we have verified * that there are no filters imposed on the access path. */ return accessPath.rangeCount(true/* exact */); } } // class ChunkTask }