package com.bigdata.relation.rule.eval.pipeline; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; import java.io.Serializable; import java.text.DateFormat; import java.util.concurrent.atomic.AtomicLong; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.eval.IRuleState; import com.bigdata.relation.rule.eval.RuleStats; import com.bigdata.relation.rule.eval.pipeline.JoinTask.AccessPathTask; /** * Statistics about processing for a single join dimension as reported by a * single {@link JoinTask}. Each {@link JoinTask} handles a single index * partition, so the {@link JoinStats} for those index partitions need to be * aggregated by the {@link JoinMasterTask}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class JoinStats implements Serializable { /** * */ private static final long serialVersionUID = 9028650921831777131L; /** * The timestamp associated with the start of execution for the join * dimension. This is not aggregated. The timestamp is assigned when the * {@link JoinStats} object is created. That corresponds either to the start * of the distributed {@link JoinMasterTask} execution (aggregated level) or * to the start of some specific {@link JoinTask} (detail level). */ public final long startTime; /** * The index partition for which these statistics were collected or -1 * if the statistics are aggregated across index partitions. */ public final int partitionId; /** * The index in the evaluation order whose statistics are reported here. */ public final int orderIndex; /** * The maximum observed fan in for this join dimension (maximum #of sources * observed writing on any join task for this join dimension). Since join * tasks may be closed and new join tasks re-opened for the same query, join * dimension and index partition, and since each join task for the same join * dimension could, in principle, have a different fan in based on the * actual binding sets propagated this is not necessarily the "actual" fan * in for the join dimension. */ public int fanIn; /** * The maximum observed fan out for this join dimension (maximum #of sinks * on which any join task is writing for this join dimension). Since join * tasks may be closed and new join tasks re-opened for the same query, join * dimension and index partition, and since each join task for the same join * dimension could, in principle, have a different fan out based on the * actual binding sets propagated this is not necessarily the "actual" fan * out for the join dimension. */ public int fanOut; /** * The #of index partitions for which join tasks were created for this join * dimension. This is computed by explicitly tracking the distinct index * partition identifiers reported for the join dimension. This is the "real" * fan out for the prior join dimension. */ public int partitionCount; /** * Map used to track the #of distinct partition identifiers for this join * dimension. */ transient private IntSet partitionIds; /** * The #of binding set chunks read from all source {@link JoinTask}s. */ public long bindingSetChunksIn; /** The #of binding sets read from all source {@link JoinTask}s. */ public long bindingSetsIn; /** * The #of {@link IAccessPath}s read. This will differ from * {@link #bindingSetIn} iff the same {@link IBindingSet} is read from * more than one source and the {@link JoinTask} is able to recognize * the duplication and collapse it by removing the duplicate(s). */ public long accessPathCount; /** * The #of duplicate {@link IAccessPath}s that were eliminated by a * {@link JoinTask}. Duplicate {@link IAccessPath}s arise when the * source {@link JoinTask}(s) generate the bindings on the * {@link IPredicate} for a join dimension. Duplicates are detected by a * {@link JoinTask} when it generates chunk of distinct * {@link AccessPathTask}s from a chunk of {@link IBindingSet}s read * from its source(s) {@link JoinTask}s. * <p> * Note: While the {@link IPredicate}s for those tasks may have the * same bindings, the source {@link IBindingSet}s typically (always?) * have variety not represented in the bound {@link IPredicate} and * therefore are combined under a single {@link AccessPathTask}. This * reduces redundant reads on an {@link IAccessPath} while producing * exactly the same output {@link IBindingSet}s that would have been * produced if we did not identify the duplicate {@link IAccessPath}s. */ public long accessPathDups; /** #of chunks visited over all access paths. */ public long chunkCount; /** #of elements visited over all chunks. */ public long elementCount; /** * The #of {@link IBindingSet}s written onto the next join dimension * (aka the #of solutions written iff this is the last join dimension). * <p> * Note: An {@link IBindingSet} can be written onto more than one index * partition for the next join dimension, so one generated * {@link IBindingSet} MAY result in N GTE ONE "binding sets out". This * occurs when the {@link IAccessPath} required to read on the next * {@link IPredicate} in the evaluation order spans more than one index * partition. */ public long bindingSetsOut; /** * The #of {@link IBindingSet} chunks written onto the next join * dimension (aka the #of solutions written iff this is the last join * dimension in the evaluation order). */ public long bindingSetChunksOut; /** * The mutationCount is the #of solutions output by a {@link JoinTask}(s) * for the last join dimension of a mutation operation that were not * already present in the target relation. This value is always zero * (0L) for query. * <p> * Note: The mutationCount MUST be obtained from {@link IBuffer#flush()} * for the buffer on which the {@link JoinTask}(s) for the last join * dimension write their solutions. For mutation, this buffer is * obligated to report the #of elements whose state was changed in the * target relation. Failure to correctly obey this contract can result * in non-termination of fix point closure operations. * * @see RuleStats#mutationCount */ public AtomicLong mutationCount = new AtomicLong(); /** * Ctor variant used by the {@link JoinMasterTask} to aggregate * statistics across the index partitions for a given join dimension. * * @param orderIndex * The index in the evaluation order. */ public JoinStats(final int orderIndex) { this(-1, orderIndex); } /** * Ctor variant used by a {@link JoinTask} to self-report. * * @param partitionId * The index partition identifier. * @param orderIndex * The index in the evaluation order. */ public JoinStats(final int partitionId, final int orderIndex) { this.startTime = System.currentTimeMillis(); this.partitionId = partitionId; this.orderIndex = orderIndex; fanIn = fanOut = 0; // either zero or one depending on the ctor. partitionCount = partitionId == -1 ? 0 : 1; bindingSetChunksIn = bindingSetsIn = 0L; accessPathCount = accessPathDups = 0L; chunkCount = elementCount = bindingSetsOut = 0L; bindingSetChunksOut = 0L; } synchronized void add(final JoinStats o) { if (this.orderIndex != o.orderIndex) throw new IllegalArgumentException(); if (partitionIds == null) { /* * Track the distinct partition identifiers for which join tasks * were created for this join dimension. This gives us the real * fanOut of the distributed join. However, this is the fanOut * across the entire execution of the join. The maximum concurrent * fanOut is just [fanOut]. */ partitionIds = new IntOpenHashSet(); } if (partitionIds.add(o.partitionId)) { // one more distinct partition identifier. partitionCount++; } if (o.fanIn > this.fanIn) { // maximum reported fanIn for this join dimension. this.fanIn = o.fanIn; } if (o.fanOut > this.fanOut) { // maximum reported fanOut for this join dimension. this.fanOut += o.fanOut; } this.bindingSetChunksIn += o.bindingSetChunksIn; this.bindingSetsIn += o.bindingSetsIn; this.accessPathCount += o.accessPathCount; this.accessPathDups += o.accessPathDups; this.chunkCount += o.chunkCount; this.elementCount += o.elementCount; this.bindingSetsOut += o.bindingSetsOut; this.bindingSetChunksOut += o.bindingSetChunksOut; this.mutationCount.addAndGet(o.mutationCount.get()); } public String toString() { final StringBuilder sb = new StringBuilder("JoinStats"); sb.append("{ orderIndex="+orderIndex); sb.append(", partitionId="+partitionId); sb.append(", fanIn="+fanIn); sb.append(", fanOut="+fanOut); sb.append(", partitionIdCount="+partitionCount); sb.append(", bindingSetChunksIn="+bindingSetChunksIn); sb.append(", bindingSetsIn="+bindingSetsIn); sb.append(", accessPathCount="+accessPathCount); sb.append(", accessPathDups="+accessPathDups); sb.append(", chunkCount="+chunkCount); sb.append(", elementCount="+elementCount); sb.append(", bindingSetsOut="+bindingSetsOut); sb.append(", bindingSetChunksOut="+bindingSetChunksOut); sb.append(", mutationCount="+mutationCount); sb.append("}"); return sb.toString(); } static private final transient String sep = ", "; /** * Formats the array of {@link JoinStats} into a CSV table view. * * @param rule * The {@link IRule} whose {@link JoinStats} are being reported. * @param ruleState * Contains details about evaluation order for the * {@link IPredicate}s in the tail of the <i>rule</i>, the access * paths that were used, etc. * @param a * The {@link JoinStats}. * * @return The table view. */ public static StringBuilder toString(final IRule rule, final IRuleState ruleState, final JoinStats[] a) { /* * Note: This is the same format that is used for the performance * counters. This makes it easier to correlate what is going on in the * query execution log with the performance counter data. */ final DateFormat dateFormat = DateFormat.getDateTimeInstance( DateFormat.MEDIUM/* date */, DateFormat.MEDIUM/* time */); final int[] order = ruleState.getPlan().getOrder(); final StringBuilder sb = new StringBuilder(); // Note: orderIndex is also known as the evalOrder. sb.append("startTime, rule, orderIndex, keyOrder, nvars, rangeCount, fanIn, fanOut, partitionCount, bindingSetChunksIn, bindingSetsIn, accessPathCount, accessPathDups, chunkCount, elementCount, bindingSetsOut, bindingSetChunksOut, mutationCount, tailIndex, tailPredicate"); sb.append("\n"); int i = 0; for(JoinStats s : a) { final int tailIndex = order[i++]; final String ruleNameStr = "\"" + rule.getName().replace(',', ' ') + "\""; sb.append(dateFormat.format(s.startTime).replace(sep, " ")+sep); sb.append(ruleNameStr + sep); sb.append(Integer.toString(s.orderIndex)+sep); // sb.append(Integer.toString(s.partitionId)+sep); // always -1 when aggregated. sb.append(ruleState.getKeyOrder()[tailIndex].toString().replace(sep, " ")+sep); sb.append(ruleState.getNVars()[tailIndex]+sep); sb.append(ruleState.getPlan().rangeCount(tailIndex)+sep); sb.append(Integer.toString(s.fanIn)+sep); sb.append(Integer.toString(s.fanOut)+sep); sb.append(Integer.toString(s.partitionCount)+sep); sb.append(Long.toString(s.bindingSetChunksIn)+sep); sb.append(Long.toString(s.bindingSetsIn)+sep); sb.append(Long.toString(s.accessPathCount)+sep); sb.append(Long.toString(s.accessPathDups)+sep); sb.append(Long.toString(s.chunkCount)+sep); sb.append(Long.toString(s.elementCount)+sep); sb.append(Long.toString(s.bindingSetsOut)+sep); sb.append(Long.toString(s.bindingSetChunksOut)+sep); sb.append(Long.toString(s.mutationCount.get())+sep); sb.append(Integer.toString(tailIndex)+sep); sb.append(rule.getTail(tailIndex).toString().replace(sep, " ")+"\n"); } return sb; } }