/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl.aggregate; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import javax.inject.Named; import org.apache.drill.common.expression.ErrorCollector; import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.ExpressionPosition; import org.apache.drill.common.expression.FieldReference; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.exec.compile.sig.RuntimeOverridden; import org.apache.drill.exec.exception.ClassTransformationException; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.expr.TypeHelper; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.ops.MetricDef; import org.apache.drill.exec.ops.OperatorStats; import org.apache.drill.exec.physical.config.HashAggregate; import org.apache.drill.exec.physical.impl.common.ChainedHashTable; import org.apache.drill.exec.physical.impl.common.HashTable; import org.apache.drill.exec.physical.impl.common.HashTableConfig; import org.apache.drill.exec.physical.impl.common.HashTableStats; import org.apache.drill.exec.physical.impl.common.IndexPointer; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.RecordBatch.IterOutcome; import org.apache.drill.exec.record.TypedFieldId; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.vector.AllocationHelper; import org.apache.drill.exec.vector.FixedWidthVector; import org.apache.drill.exec.vector.ObjectVector; import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.VariableWidthVector; public abstract class HashAggTemplate implements HashAggregator { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HashAggregator.class); // private static final long ALLOCATOR_INITIAL_RESERVATION = 1 * 1024 * 1024; // private static final long ALLOCATOR_MAX_RESERVATION = 20L * 1000 * 1000 * 1000; private static final int VARIABLE_WIDTH_VALUE_SIZE = 50; private static final boolean EXTRA_DEBUG_1 = false; private static final boolean EXTRA_DEBUG_2 = false; // private static final String TOO_BIG_ERROR = // "Couldn't add value to an empty batch. This likely means that a single value is too long for a varlen field."; // private boolean newSchema = false; private int underlyingIndex = 0; private int currentIndex = 0; private IterOutcome outcome; // private int outputCount = 0; private int numGroupedRecords = 0; private int outBatchIndex = 0; private int lastBatchOutputCount = 0; private RecordBatch incoming; // private BatchSchema schema; private HashAggBatch outgoing; private VectorContainer outContainer; // private FragmentContext context; private BufferAllocator allocator; // private HashAggregate hashAggrConfig; private HashTable htable; private ArrayList<BatchHolder> batchHolders; private IndexPointer htIdxHolder; // holder for the Hashtable's internal index returned by put() private IndexPointer outStartIdxHolder; private IndexPointer outNumRecordsHolder; private int numGroupByOutFields = 0; // Note: this should be <= number of group-by fields ErrorCollector collector = new ErrorCollectorImpl(); private MaterializedField[] materializedValueFields; private boolean allFlushed = false; private boolean buildComplete = false; private OperatorStats stats = null; private HashTableStats htStats = new HashTableStats(); public enum Metric implements MetricDef { NUM_BUCKETS, NUM_ENTRIES, NUM_RESIZING, RESIZING_TIME; // duplicate for hash ag @Override public int metricId() { return ordinal(); } } public class BatchHolder { private VectorContainer aggrValuesContainer; // container for aggr values (workspace variables) private int maxOccupiedIdx = -1; private int batchOutputCount = 0; private int capacity = Integer.MAX_VALUE; private boolean allocatedNextBatch = false; @SuppressWarnings("resource") public BatchHolder() { aggrValuesContainer = new VectorContainer(); boolean success = false; try { ValueVector vector; for (int i = 0; i < materializedValueFields.length; i++) { MaterializedField outputField = materializedValueFields[i]; // Create a type-specific ValueVector for this value vector = TypeHelper.getNewVector(outputField, allocator); // Try to allocate space to store BATCH_SIZE records. Key stored at index i in HashTable has its workspace // variables (such as count, sum etc) stored at index i in HashAgg. HashTable and HashAgg both have // BatchHolders. Whenever a BatchHolder in HashAgg reaches its capacity, a new BatchHolder is added to // HashTable. If HashAgg can't store BATCH_SIZE records in a BatchHolder, it leaves empty slots in current // BatchHolder in HashTable, causing the HashTable to be space inefficient. So it is better to allocate space // to fit as close to as BATCH_SIZE records. if (vector instanceof FixedWidthVector) { ((FixedWidthVector) vector).allocateNew(HashTable.BATCH_SIZE); } else if (vector instanceof VariableWidthVector) { ((VariableWidthVector) vector).allocateNew(HashTable.VARIABLE_WIDTH_VECTOR_SIZE * HashTable.BATCH_SIZE, HashTable.BATCH_SIZE); } else if (vector instanceof ObjectVector) { ((ObjectVector) vector).allocateNew(HashTable.BATCH_SIZE); } else { vector.allocateNew(); } capacity = Math.min(capacity, vector.getValueCapacity()); aggrValuesContainer.add(vector); } success = true; } finally { if (!success) { aggrValuesContainer.clear(); } } } private boolean updateAggrValues(int incomingRowIdx, int idxWithinBatch) { updateAggrValuesInternal(incomingRowIdx, idxWithinBatch); maxOccupiedIdx = Math.max(maxOccupiedIdx, idxWithinBatch); return true; } private void setup() { setupInterior(incoming, outgoing, aggrValuesContainer); } private void outputValues(IndexPointer outStartIdxHolder, IndexPointer outNumRecordsHolder) { outStartIdxHolder.value = batchOutputCount; outNumRecordsHolder.value = 0; for (int i = batchOutputCount; i <= maxOccupiedIdx; i++) { outputRecordValues(i, batchOutputCount); if (EXTRA_DEBUG_2) { logger.debug("Outputting values to output index: {}", batchOutputCount); } batchOutputCount++; outNumRecordsHolder.value++; } } private void clear() { aggrValuesContainer.clear(); } private int getNumGroups() { return maxOccupiedIdx + 1; } private int getNumPendingOutput() { return getNumGroups() - batchOutputCount; } // Code-generated methods (implemented in HashAggBatch) @RuntimeOverridden public void setupInterior(@Named("incoming") RecordBatch incoming, @Named("outgoing") RecordBatch outgoing, @Named("aggrValuesContainer") VectorContainer aggrValuesContainer) { } @RuntimeOverridden public void updateAggrValuesInternal(@Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) { } @RuntimeOverridden public void outputRecordValues(@Named("htRowIdx") int htRowIdx, @Named("outRowIdx") int outRowIdx) { } } @Override public void setup(HashAggregate hashAggrConfig, HashTableConfig htConfig, FragmentContext context, OperatorStats stats, BufferAllocator allocator, RecordBatch incoming, HashAggBatch outgoing, LogicalExpression[] valueExprs, List<TypedFieldId> valueFieldIds, TypedFieldId[] groupByOutFieldIds, VectorContainer outContainer) throws SchemaChangeException, ClassTransformationException, IOException { if (valueExprs == null || valueFieldIds == null) { throw new IllegalArgumentException("Invalid aggr value exprs or workspace variables."); } if (valueFieldIds.size() < valueExprs.length) { throw new IllegalArgumentException("Wrong number of workspace variables."); } // this.context = context; this.stats = stats; this.allocator = allocator; this.incoming = incoming; // this.schema = incoming.getSchema(); this.outgoing = outgoing; this.outContainer = outContainer; // this.hashAggrConfig = hashAggrConfig; // currently, hash aggregation is only applicable if there are group-by expressions. // For non-grouped (a.k.a Plain) aggregations that don't involve DISTINCT, there is no // need to create hash table. However, for plain aggregations with DISTINCT .. // e.g SELECT COUNT(DISTINCT a1) FROM t1 ; // we need to build a hash table on the aggregation column a1. // TODO: This functionality will be added later. if (hashAggrConfig.getGroupByExprs().size() == 0) { throw new IllegalArgumentException("Currently, hash aggregation is only applicable if there are group-by " + "expressions."); } this.htIdxHolder = new IndexPointer(); this.outStartIdxHolder = new IndexPointer(); this.outNumRecordsHolder = new IndexPointer(); materializedValueFields = new MaterializedField[valueFieldIds.size()]; if (valueFieldIds.size() > 0) { int i = 0; FieldReference ref = new FieldReference("dummy", ExpressionPosition.UNKNOWN, valueFieldIds.get(0).getIntermediateType()); for (TypedFieldId id : valueFieldIds) { materializedValueFields[i++] = MaterializedField.create(ref.getAsNamePart().getName(), id.getIntermediateType()); } } ChainedHashTable ht = new ChainedHashTable(htConfig, context, allocator, incoming, null /* no incoming probe */, outgoing); this.htable = ht.createAndSetupHashTable(groupByOutFieldIds); numGroupByOutFields = groupByOutFieldIds.length; batchHolders = new ArrayList<BatchHolder>(); // First BatchHolder is created when the first put request is received. doSetup(incoming); } @Override public AggOutcome doWork() { try { // Note: Keeping the outer and inner try blocks here to maintain some similarity with // StreamingAggregate which does somethings conditionally in the outer try block. // In the future HashAggregate may also need to perform some actions conditionally // in the outer try block. outside: while (true) { // loop through existing records, aggregating the values as necessary. if (EXTRA_DEBUG_1) { logger.debug("Starting outer loop of doWork()..."); } for (; underlyingIndex < incoming.getRecordCount(); incIndex()) { if (EXTRA_DEBUG_2) { logger.debug("Doing loop with values underlying {}, current {}", underlyingIndex, currentIndex); } checkGroupAndAggrValues(currentIndex); } if (EXTRA_DEBUG_1) { logger.debug("Processed {} records", underlyingIndex); } try { while (true) { // Cleanup the previous batch since we are done processing it. for (VectorWrapper<?> v : incoming) { v.getValueVector().clear(); } IterOutcome out = outgoing.next(0, incoming); if (EXTRA_DEBUG_1) { logger.debug("Received IterOutcome of {}", out); } switch (out) { case OUT_OF_MEMORY: case NOT_YET: this.outcome = out; return AggOutcome.RETURN_OUTCOME; case OK_NEW_SCHEMA: if (EXTRA_DEBUG_1) { logger.debug("Received new schema. Batch has {} records.", incoming.getRecordCount()); } // newSchema = true; this.cleanup(); // TODO: new schema case needs to be handled appropriately return AggOutcome.UPDATE_AGGREGATOR; case OK: resetIndex(); if (incoming.getRecordCount() == 0) { continue; } else { checkGroupAndAggrValues(currentIndex); incIndex(); if (EXTRA_DEBUG_1) { logger.debug("Continuing outside loop"); } continue outside; } case NONE: // outcome = out; buildComplete = true; updateStats(htable); // output the first batch; remaining batches will be output // in response to each next() call by a downstream operator outputCurrentBatch(); // return setOkAndReturn(); return AggOutcome.RETURN_OUTCOME; case STOP: default: outcome = out; return AggOutcome.CLEANUP_AND_RETURN; } } } finally { // placeholder... } } } finally { } } private void allocateOutgoing(int records) { // Skip the keys and only allocate for outputting the workspace values // (keys will be output through splitAndTransfer) Iterator<VectorWrapper<?>> outgoingIter = outContainer.iterator(); for (int i = 0; i < numGroupByOutFields; i++) { outgoingIter.next(); } while (outgoingIter.hasNext()) { @SuppressWarnings("resource") ValueVector vv = outgoingIter.next().getValueVector(); // MajorType type = vv.getField().getType(); /* * In build schema we use the allocation model that specifies exact record count * so we need to stick with that allocation model until DRILL-2211 is resolved. Using * 50 as the average bytes per value as is used in HashTable. */ AllocationHelper.allocatePrecomputedChildCount(vv, records, VARIABLE_WIDTH_VALUE_SIZE, 0); } } @Override public IterOutcome getOutcome() { return outcome; } @Override public int getOutputCount() { // return outputCount; return lastBatchOutputCount; } @Override public void cleanup() { if (htable != null) { htable.clear(); htable = null; } htIdxHolder = null; materializedValueFields = null; outStartIdxHolder = null; outNumRecordsHolder = null; if (batchHolders != null) { for (BatchHolder bh : batchHolders) { bh.clear(); } batchHolders.clear(); batchHolders = null; } } // private final AggOutcome setOkAndReturn() { // this.outcome = IterOutcome.OK; // for (VectorWrapper<?> v : outgoing) { // v.getValueVector().getMutator().setValueCount(outputCount); // } // return AggOutcome.RETURN_OUTCOME; // } private final void incIndex() { underlyingIndex++; if (underlyingIndex >= incoming.getRecordCount()) { currentIndex = Integer.MAX_VALUE; return; } currentIndex = getVectorIndex(underlyingIndex); } private final void resetIndex() { underlyingIndex = -1; incIndex(); } private void addBatchHolder() { BatchHolder bh = newBatchHolder(); batchHolders.add(bh); if (EXTRA_DEBUG_1) { logger.debug("HashAggregate: Added new batch; num batches = {}.", batchHolders.size()); } bh.setup(); } // Overridden in the generated class when created as plain Java code. protected BatchHolder newBatchHolder() { return new BatchHolder(); } @Override public IterOutcome outputCurrentBatch() { if (outBatchIndex >= batchHolders.size()) { this.outcome = IterOutcome.NONE; return outcome; } // get the number of records in the batch holder that are pending output int numPendingOutput = batchHolders.get(outBatchIndex).getNumPendingOutput(); if (numPendingOutput == 0) { this.outcome = IterOutcome.NONE; return outcome; } allocateOutgoing(numPendingOutput); batchHolders.get(outBatchIndex).outputValues(outStartIdxHolder, outNumRecordsHolder); int numOutputRecords = outNumRecordsHolder.value; if (EXTRA_DEBUG_1) { logger.debug("After output values: outStartIdx = {}, outNumRecords = {}", outStartIdxHolder.value, outNumRecordsHolder.value); } this.htable.outputKeys(outBatchIndex, this.outContainer, outStartIdxHolder.value, outNumRecordsHolder.value); // set the value count for outgoing batch value vectors for (VectorWrapper<?> v : outgoing) { v.getValueVector().getMutator().setValueCount(numOutputRecords); } // outputCount += numOutputRecords; this.outcome = IterOutcome.OK; logger.debug("HashAggregate: Output current batch index {} with {} records.", outBatchIndex, numOutputRecords); lastBatchOutputCount = numOutputRecords; outBatchIndex++; if (outBatchIndex == batchHolders.size()) { allFlushed = true; logger.debug("HashAggregate: All batches flushed."); // cleanup my internal state since there is nothing more to return this.cleanup(); } return this.outcome; } @Override public boolean allFlushed() { return allFlushed; } @Override public boolean buildComplete() { return buildComplete; } public int numGroupedRecords() { return numGroupedRecords; } // Check if a group is present in the hash table; if not, insert it in the hash table. // The htIdxHolder contains the index of the group in the hash table container; this same // index is also used for the aggregation values maintained by the hash aggregate. private void checkGroupAndAggrValues(int incomingRowIdx) { if (incomingRowIdx < 0) { throw new IllegalArgumentException("Invalid incoming row index."); } /** for debugging Object tmp = (incoming).getValueAccessorById(0, BigIntVector.class).getValueVector(); BigIntVector vv0 = null; BigIntHolder holder = null; if (tmp != null) { vv0 = ((BigIntVector) tmp); holder = new BigIntHolder(); holder.value = vv0.getAccessor().get(incomingRowIdx) ; } */ htable.put(incomingRowIdx, htIdxHolder, 1 /* retry count */); int currentIdx = htIdxHolder.value; // get the batch index and index within the batch if (currentIdx >= batchHolders.size() * HashTable.BATCH_SIZE) { addBatchHolder(); } BatchHolder bh = batchHolders.get((currentIdx >>> 16) & HashTable.BATCH_MASK); int idxWithinBatch = currentIdx & HashTable.BATCH_MASK; // Check if we have almost filled up the workspace vectors and add a batch if necessary if ((idxWithinBatch == (bh.capacity - 1)) && (bh.allocatedNextBatch == false)) { htable.addNewKeyBatch(); addBatchHolder(); bh.allocatedNextBatch = true; } if (bh.updateAggrValues(incomingRowIdx, idxWithinBatch)) { numGroupedRecords++; } } private void updateStats(HashTable htable) { htable.getStats(htStats); this.stats.setLongStat(Metric.NUM_BUCKETS, htStats.numBuckets); this.stats.setLongStat(Metric.NUM_ENTRIES, htStats.numEntries); this.stats.setLongStat(Metric.NUM_RESIZING, htStats.numResizing); this.stats.setLongStat(Metric.RESIZING_TIME, htStats.resizingTime); } // Code-generated methods (implemented in HashAggBatch) public abstract void doSetup(@Named("incoming") RecordBatch incoming); public abstract int getVectorIndex(@Named("recordIndex") int recordIndex); public abstract boolean resetValues(); }