/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.vector; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.DataOutputBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javolution.util.FastBitSet; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; /** * Vectorized GROUP BY operator implementation. Consumes the vectorized input and * stores the aggregate operators' intermediate states. Emits row mode output. * */ public class VectorGroupByOperator extends Operator<GroupByDesc> implements VectorizationContextRegion { private static final Logger LOG = LoggerFactory.getLogger( VectorGroupByOperator.class.getName()); private VectorGroupByDesc vectorDesc; /** * This is the vector of aggregators. They are stateless and only implement * the algorithm of how to compute the aggregation. state is kept in the * aggregation buffers and is our responsibility to match the proper state for each key. */ private VectorAggregateExpression[] aggregators; /** * Key vector expressions. */ private VectorExpression[] keyExpressions; private int outputKeyLength; private boolean isVectorOutput; // Create a new outgoing vectorization context because column name map will change. private VectorizationContext vOutContext = null; // The above members are initialized by the constructor and must not be // transient. //--------------------------------------------------------------------------- private transient VectorExpressionWriter[] keyOutputWriters; /** * The aggregation buffers to use for the current batch. */ private transient VectorAggregationBufferBatch aggregationBatchInfo; /** * The current batch key wrappers. * The very same instance gets reused for all batches. */ private transient VectorHashKeyWrapperBatch keyWrappersBatch; private transient Object[] forwardCache; private transient VectorizedRowBatch outputBatch; private transient VectorizedRowBatchCtx vrbCtx; private transient VectorAssignRow vectorAssignRow; /* * Grouping sets members. */ private transient boolean groupingSetsPresent; // The field bits (i.e. which fields to include) or "id" for each grouping set. private transient int[] groupingSets; // The position in the column keys of the dummy grouping set id column. private transient int groupingSetsPosition; // The planner puts a constant field in for the dummy grouping set id. We will overwrite it // as we process the grouping sets. private transient ConstantVectorExpression groupingSetsDummyVectorExpression; // We translate the grouping set bit field into a boolean arrays. private transient boolean[][] allGroupingSetsOverrideIsNulls; private transient int numEntriesHashTable; private transient long maxHashTblMemory; private transient long maxMemory; private float memoryThreshold; /** * Interface for processing mode: global, hash, unsorted streaming, or group batch */ private static interface IProcessingMode { public void initialize(Configuration hconf) throws HiveException; public void startGroup() throws HiveException; public void endGroup() throws HiveException; public void processBatch(VectorizedRowBatch batch) throws HiveException; public void close(boolean aborted) throws HiveException; } /** * Base class for all processing modes */ private abstract class ProcessingModeBase implements IProcessingMode { // Overridden and used in sorted reduce group batch processing mode. @Override public void startGroup() throws HiveException { // Do nothing. } @Override public void endGroup() throws HiveException { // Do nothing. } protected abstract void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException; @Override public void processBatch(VectorizedRowBatch batch) throws HiveException { if (!groupingSetsPresent) { doProcessBatch(batch, false, null); return; } // We drive the doProcessBatch logic with the same batch but different // grouping set id and null variation. // PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... final int size = groupingSets.length; for (int i = 0; i < size; i++) { // NOTE: We are overwriting the constant vector value... groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]); groupingSetsDummyVectorExpression.evaluate(batch); doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]); } } /** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared * by calling {@link #prepareBatchAggregationBufferSets} first. */ protected void processAggregators(VectorizedRowBatch batch) throws HiveException { // We now have a vector of aggregation buffer sets to use for each row // We can start computing the aggregates. // If the number of distinct keys in the batch is 1 we can // use the optimized code path of aggregateInput VectorAggregationBufferRow[] aggregationBufferSets = aggregationBatchInfo.getAggregationBuffers(); if (aggregationBatchInfo.getDistinctBufferSetCount() == 1) { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = aggregationBufferSets[0].getAggregationBuffers(); for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers[i], batch); } } else { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInputSelection( aggregationBufferSets, i, batch); } } } /** * allocates a new aggregation buffer set. */ protected VectorAggregationBufferRow allocateAggregationBuffer() throws HiveException { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[aggregators.length]; for (int i=0; i < aggregators.length; ++i) { aggregationBuffers[i] = aggregators[i].getNewAggregationBuffer(); aggregators[i].reset(aggregationBuffers[i]); } VectorAggregationBufferRow bufferSet = new VectorAggregationBufferRow(aggregationBuffers); return bufferSet; } } /** * Global aggregates (no GROUP BY clause, no keys) * This mode is very simple, there are no keys to consider, and only flushes one row at closing * The one row must flush even if no input was seen (NULLs) */ private class ProcessingModeGlobalAggregate extends ProcessingModeBase { /** * In global processing mode there is only one set of aggregation buffers */ private VectorAggregationBufferRow aggregationBuffers; @Override public void initialize(Configuration hconf) throws HiveException { aggregationBuffers = allocateAggregationBuffer(); LOG.info("using global aggregation processing mode"); } @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers.getAggregationBuffer(i), batch); } } @Override public void close(boolean aborted) throws HiveException { if (!aborted) { writeSingleRow(null, aggregationBuffers); } } } /** * Hash Aggregate mode processing */ private class ProcessingModeHashAggregate extends ProcessingModeBase { /** * The global key-aggregation hash map. */ private Map<KeyWrapper, VectorAggregationBufferRow> mapKeysAggregationBuffers; /** * Total per hashtable entry fixed memory (does not depend on key/agg values). */ private long fixedHashEntrySize; /** * Average per hashtable entry variable size memory (depends on key/agg value). */ private int avgVariableSize; /** * Number of entries added to the hashtable since the last check if it should flush. */ private int numEntriesSinceCheck; /** * Sum of batch size processed (ie. rows). */ private long sumBatchSize; /** * Max number of entries in the vector group by aggregation hashtables. * Exceeding this will trigger a flush irrelevant of memory pressure condition. */ private int maxHtEntries = 1000000; /** * The number of new entries that must be added to the hashtable before a memory size check. */ private int checkInterval = 10000; /** * Percent of entries to flush when memory threshold exceeded. */ private float percentEntriesToFlush = 0.1f; /** * A soft reference used to detect memory pressure */ private SoftReference<Object> gcCanary = new SoftReference<Object>(new Object()); /** * Counts the number of time the gcCanary died and was resurrected */ private long gcCanaryFlushes = 0L; /** * Count of rows since the last check for changing from aggregate to streaming mode */ private long lastModeCheckRowCount = 0; /** * Minimum factor for hash table to reduce number of entries * If this is not met, the processing switches to streaming mode */ private float minReductionHashAggr; /** * Number of rows processed between checks for minReductionHashAggr factor * TODO: there is overlap between numRowsCompareHashAggr and checkInterval */ private long numRowsCompareHashAggr; @Override public void initialize(Configuration hconf) throws HiveException { // hconf is null in unit testing if (null != hconf) { this.percentEntriesToFlush = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT); this.checkInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL); this.maxHtEntries = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_MAXENTRIES); this.minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); this.numRowsCompareHashAggr = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL); } else { this.percentEntriesToFlush = HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT.defaultFloatVal; this.checkInterval = HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL.defaultIntVal; this.maxHtEntries = HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_MAXENTRIES.defaultIntVal; this.minReductionHashAggr = HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION.defaultFloatVal; this.numRowsCompareHashAggr = HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL.defaultIntVal; } sumBatchSize = 0; mapKeysAggregationBuffers = new HashMap<KeyWrapper, VectorAggregationBufferRow>(); computeMemoryLimits(); LOG.debug("using hash aggregation processing mode"); } @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { if (!groupingSetsPresent || isFirstGroupingSet) { // Evaluate the key expressions once. for(int i = 0; i < keyExpressions.length; ++i) { keyExpressions[i].evaluate(batch); } } // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed if (!groupingSetsPresent) { keyWrappersBatch.evaluateBatch(batch); } else { keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); } // Next we locate the aggregation buffer set for each key prepareBatchAggregationBufferSets(batch); // Finally, evaluate the aggregators processAggregators(batch); //Flush if memory limits were reached // We keep flushing until the memory is under threshold int preFlushEntriesCount = numEntriesHashTable; while (shouldFlush(batch)) { flush(false); if(gcCanary.get() == null) { gcCanaryFlushes++; gcCanary = new SoftReference<Object>(new Object()); } //Validate that some progress is being made if (!(numEntriesHashTable < preFlushEntriesCount)) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("Flush did not progress: %d entries before, %d entries after", preFlushEntriesCount, numEntriesHashTable)); } break; } preFlushEntriesCount = numEntriesHashTable; } if (sumBatchSize == 0 && 0 != batch.size) { // Sample the first batch processed for variable sizes. updateAvgVariableSize(batch); } sumBatchSize += batch.size; lastModeCheckRowCount += batch.size; // Check if we should turn into streaming mode checkHashModeEfficiency(); } @Override public void close(boolean aborted) throws HiveException { if (!aborted) { flush(true); } } /** * Locates the aggregation buffer sets to use for each key in the current batch. * The keyWrappersBatch must have evaluated the current batch first. */ private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException { // The aggregation batch vector needs to know when we start a new batch // to bump its internal version. aggregationBatchInfo.startBatch(); if (batch.size == 0) { return; } // We now have to probe the global hash and find-or-allocate // the aggregation buffers to use for each key present in the batch VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); final int n = keyExpressions.length == 0 ? 1 : batch.size; // note - the row mapping is not relevant when aggregationBatchInfo::getDistinctBufferSetCount() == 1 for (int i=0; i < n; ++i) { VectorHashKeyWrapper kw = keyWrappers[i]; VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw); if (null == aggregationBuffer) { // the probe failed, we must allocate a set of aggregation buffers // and push the (keywrapper,buffers) pair into the hash. // is very important to clone the keywrapper, the one we have from our // keyWrappersBatch is going to be reset/reused on next batch. aggregationBuffer = allocateAggregationBuffer(); mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; } aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i); } } /** * Computes the memory limits for hash table flush (spill). */ private void computeMemoryLimits() { JavaDataModel model = JavaDataModel.get(); fixedHashEntrySize = model.hashMapEntry() + keyWrappersBatch.getKeysFixedSize() + aggregationBatchInfo.getAggregatorsFixedSize(); MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); maxMemory = memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = conf.getMemoryThreshold(); // Tests may leave this unitialized, so better set it to 1 if (memoryThreshold == 0.0f) { memoryThreshold = 1.0f; } maxHashTblMemory = (int)(maxMemory * memoryThreshold); if (LOG.isDebugEnabled()) { LOG.debug(String.format("maxMemory:%dMb (%d * %f) fixSize:%d (key:%d agg:%d)", maxHashTblMemory/1024/1024, maxMemory/1024/1024, memoryThreshold, fixedHashEntrySize, keyWrappersBatch.getKeysFixedSize(), aggregationBatchInfo.getAggregatorsFixedSize())); } } /** * Flushes the entries in the hash table by emiting output (forward). * When parameter 'all' is true all the entries are flushed. * @param all * @throws HiveException */ private void flush(boolean all) throws HiveException { int entriesToFlush = all ? numEntriesHashTable : (int)(numEntriesHashTable * this.percentEntriesToFlush); int entriesFlushed = 0; if (LOG.isDebugEnabled()) { LOG.debug(String.format( "Flush %d %s entries:%d fixed:%d variable:%d (used:%dMb max:%dMb) gcCanary:%s", entriesToFlush, all ? "(all)" : "", numEntriesHashTable, fixedHashEntrySize, avgVariableSize, numEntriesHashTable * (fixedHashEntrySize + avgVariableSize)/1024/1024, maxHashTblMemory/1024/1024, gcCanary.get() == null ? "dead" : "alive")); } /* Iterate the global (keywrapper,aggregationbuffers) map and emit a row for each key */ Iterator<Map.Entry<KeyWrapper, VectorAggregationBufferRow>> iter = mapKeysAggregationBuffers.entrySet().iterator(); while(iter.hasNext()) { Map.Entry<KeyWrapper, VectorAggregationBufferRow> pair = iter.next(); writeSingleRow((VectorHashKeyWrapper) pair.getKey(), pair.getValue()); if (!all) { iter.remove(); --numEntriesHashTable; if (++entriesFlushed >= entriesToFlush) { break; } } } if (all) { mapKeysAggregationBuffers.clear(); numEntriesHashTable = 0; } if (all && LOG.isDebugEnabled()) { LOG.debug(String.format("GC canary caused %d flushes", gcCanaryFlushes)); } } /** * Returns true if the memory threshold for the hash table was reached. */ private boolean shouldFlush(VectorizedRowBatch batch) { if (batch.size == 0) { return false; } //numEntriesSinceCheck is the number of entries added to the hash table // since the last time we checked the average variable size if (numEntriesSinceCheck >= this.checkInterval) { // Were going to update the average variable row size by sampling the current batch updateAvgVariableSize(batch); numEntriesSinceCheck = 0; } if (numEntriesHashTable > this.maxHtEntries || numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) { return true; } if (gcCanary.get() == null) { return true; } return false; } /** * Updates the average variable size of the hash table entries. * The average is only updates by probing the batch that added the entry in the hash table * that caused the check threshold to be reached. */ private void updateAvgVariableSize(VectorizedRowBatch batch) { int keyVariableSize = keyWrappersBatch.getVariableSize(batch.size); int aggVariableSize = aggregationBatchInfo.getVariableSize(batch.size); // This assumes the distribution of variable size keys/aggregates in the input // is the same as the distribution of variable sizes in the hash entries avgVariableSize = (int)((avgVariableSize * sumBatchSize + keyVariableSize +aggVariableSize) / (sumBatchSize + batch.size)); } /** * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor * @throws HiveException */ private void checkHashModeEfficiency() throws HiveException { if (lastModeCheckRowCount > numRowsCompareHashAggr) { lastModeCheckRowCount = 0; if (LOG.isDebugEnabled()) { LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr))); } if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) { flush(true); changeToStreamingMode(); } } } } /** * Streaming processing mode on ALREADY GROUPED data. Each input VectorizedRowBatch may * have a mix of different keys. Intermediate values are flushed each time key changes. */ private class ProcessingModeStreaming extends ProcessingModeBase { /** * The aggregation buffers used in streaming mode */ private VectorAggregationBufferRow currentStreamingAggregators; /** * The current key, used in streaming mode */ private VectorHashKeyWrapper streamingKey; /** * The keys that needs to be flushed at the end of the current batch */ private final VectorHashKeyWrapper[] keysToFlush = new VectorHashKeyWrapper[VectorizedRowBatch.DEFAULT_SIZE]; /** * The aggregates that needs to be flushed at the end of the current batch */ private final VectorAggregationBufferRow[] rowsToFlush = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE]; /** * A pool of VectorAggregationBufferRow to avoid repeated allocations */ private VectorUtilBatchObjectPool<VectorAggregationBufferRow> streamAggregationBufferRowPool; @Override public void initialize(Configuration hconf) throws HiveException { streamAggregationBufferRowPool = new VectorUtilBatchObjectPool<VectorAggregationBufferRow>( VectorizedRowBatch.DEFAULT_SIZE, new VectorUtilBatchObjectPool.IAllocator<VectorAggregationBufferRow>() { @Override public VectorAggregationBufferRow alloc() throws HiveException { return allocateAggregationBuffer(); } @Override public void free(VectorAggregationBufferRow t) { // Nothing to do } }); LOG.info("using unsorted streaming aggregation processing mode"); } @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { if (!groupingSetsPresent || isFirstGroupingSet) { // Evaluate the key expressions once. for(int i = 0; i < keyExpressions.length; ++i) { keyExpressions[i].evaluate(batch); } } // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed if (!groupingSetsPresent) { keyWrappersBatch.evaluateBatch(batch); } else { keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); } VectorHashKeyWrapper[] batchKeys = keyWrappersBatch.getVectorHashKeyWrappers(); if (streamingKey == null) { // This is the first batch we process after switching from hash mode currentStreamingAggregators = streamAggregationBufferRowPool.getFromPool(); streamingKey = (VectorHashKeyWrapper) batchKeys[0].copyKey(); } aggregationBatchInfo.startBatch(); int flushMark = 0; for(int i = 0; i < batch.size; ++i) { if (!batchKeys[i].equals(streamingKey)) { // We've encountered a new key, must save current one // We can't forward yet, the aggregators have not been evaluated rowsToFlush[flushMark] = currentStreamingAggregators; if (keysToFlush[flushMark] == null) { keysToFlush[flushMark] = (VectorHashKeyWrapper) streamingKey.copyKey(); } else { streamingKey.duplicateTo(keysToFlush[flushMark]); } currentStreamingAggregators = streamAggregationBufferRowPool.getFromPool(); batchKeys[i].duplicateTo(streamingKey); ++flushMark; } aggregationBatchInfo.mapAggregationBufferSet(currentStreamingAggregators, i); } // evaluate the aggregators processAggregators(batch); // Now flush/forward all keys/rows, except the last (current) one for (int i = 0; i < flushMark; ++i) { writeSingleRow(keysToFlush[i], rowsToFlush[i]); rowsToFlush[i].reset(); streamAggregationBufferRowPool.putInPool(rowsToFlush[i]); } } @Override public void close(boolean aborted) throws HiveException { if (!aborted && null != streamingKey) { writeSingleRow(streamingKey, currentStreamingAggregators); } } } /** * Sorted reduce group batch processing mode. Each input VectorizedRowBatch will have the * same key. On endGroup (or close), the intermediate values are flushed. * * We build the output rows one-at-a-time in the output vectorized row batch (outputBatch) * in 2 steps: * * 1) Just after startGroup, we copy the group key to the next position in the output batch, * but don't increment the size in the batch (yet). This is done with the copyGroupKey * method of VectorGroupKeyHelper. The next position is outputBatch.size * * We know the same key is used for the whole batch (i.e. repeating) since that is how * vectorized reduce-shuffle feeds the batches to us. * * 2) Later at endGroup after reduce-shuffle has fed us all the input batches for the group, * we fill in the aggregation columns in outputBatch at outputBatch.size. Our method * writeGroupRow does this and finally increments outputBatch.size. * */ private class ProcessingModeReduceMergePartial extends ProcessingModeBase { private boolean inGroup; private boolean first; /** * The group vector key helper. */ VectorGroupKeyHelper groupKeyHelper; /** * The group vector aggregation buffers. */ private VectorAggregationBufferRow groupAggregators; /** * Buffer to hold string values. */ private DataOutputBuffer buffer; @Override public void initialize(Configuration hconf) throws HiveException { inGroup = false; // We do not include the dummy grouping set column in the output. So we pass outputKeyLength // instead of keyExpressions.length groupKeyHelper = new VectorGroupKeyHelper(outputKeyLength); groupKeyHelper.init(keyExpressions); groupAggregators = allocateAggregationBuffer(); buffer = new DataOutputBuffer(); LOG.info("using sorted group batch aggregation processing mode"); } @Override public void startGroup() throws HiveException { inGroup = true; first = true; } @Override public void endGroup() throws HiveException { if (inGroup && !first) { writeGroupRow(groupAggregators, buffer); groupAggregators.reset(); } inGroup = false; } @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. first = false; // Evaluate the key expressions of just this first batch to get the correct key. for (int i = 0; i < outputKeyLength; i++) { keyExpressions[i].evaluate(batch); } groupKeyHelper.copyGroupKey(batch, outputBatch, buffer); } // Aggregate this batch. for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(groupAggregators.getAggregationBuffer(i), batch); } } @Override public void close(boolean aborted) throws HiveException { if (!aborted && inGroup && !first) { writeGroupRow(groupAggregators, buffer); } } } /** * Current processing mode. Processing mode can change (eg. hash -> streaming). */ private transient IProcessingMode processingMode; private static final long serialVersionUID = 1L; public VectorGroupByOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(ctx); GroupByDesc desc = (GroupByDesc) conf; this.conf = desc; vectorDesc = (VectorGroupByDesc) desc.getVectorDesc(); keyExpressions = vectorDesc.getKeyExpressions(); aggregators = vectorDesc.getAggregators(); isVectorOutput = vectorDesc.isVectorOutput(); vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), /* vContextEnvironment */ vContext); } /** Kryo ctor. */ @VisibleForTesting public VectorGroupByOperator() { super(); } public VectorGroupByOperator(CompilationOpContext ctx) { super(ctx); } private void setupGroupingSets() { groupingSetsPresent = conf.isGroupingSetsPresent(); if (!groupingSetsPresent) { groupingSets = null; groupingSetsPosition = -1; groupingSetsDummyVectorExpression = null; allGroupingSetsOverrideIsNulls = null; return; } groupingSets = ArrayUtils.toPrimitive(conf.getListGroupingSets().toArray(new Integer[0])); groupingSetsPosition = conf.getGroupingSetPosition(); allGroupingSetsOverrideIsNulls = new boolean[groupingSets.length][]; int pos = 0; for (int groupingSet: groupingSets) { // Create the mapping corresponding to the grouping set // Assume all columns are null, except the dummy column is always non-null. boolean[] groupingSetsOverrideIsNull = new boolean[keyExpressions.length]; Arrays.fill(groupingSetsOverrideIsNull, true); groupingSetsOverrideIsNull[groupingSetsPosition] = false; // Add keys of this grouping set. FastBitSet bitset = GroupByOperator.groupingSet2BitSet(groupingSet, groupingSetsPosition); for (int keyPos = bitset.nextClearBit(0); keyPos < groupingSetsPosition; keyPos = bitset.nextClearBit(keyPos+1)) { groupingSetsOverrideIsNull[keyPos] = false; } allGroupingSetsOverrideIsNulls[pos] = groupingSetsOverrideIsNull; pos++; } // The last key column is the dummy grouping set id. // // Figure out which (scratch) column was used so we can overwrite the dummy id. groupingSetsDummyVectorExpression = (ConstantVectorExpression) keyExpressions[groupingSetsPosition]; } @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>(); List<ExprNodeDesc> keysDesc = conf.getKeys(); try { List<String> outputFieldNames = conf.getOutputColumnNames(); // grouping id should be pruned, which is the last of key columns // see ColumnPrunerGroupByProc outputKeyLength = conf.pruneGroupingSetId() ? keyExpressions.length - 1 : keyExpressions.length; keyOutputWriters = new VectorExpressionWriter[outputKeyLength]; for(int i = 0; i < outputKeyLength; ++i) { keyOutputWriters[i] = VectorExpressionWriterFactory. genVectorExpressionWritable(keysDesc.get(i)); objectInspectors.add(keyOutputWriters[i].getObjectInspector()); } for (int i = 0; i < aggregators.length; ++i) { aggregators[i].init(conf.getAggregators().get(i)); objectInspectors.add(aggregators[i].getOutputObjectInspector()); } keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); aggregationBatchInfo = new VectorAggregationBufferBatch(); aggregationBatchInfo.compileAggregationBatchInfo(aggregators); LOG.info("VectorGroupByOperator is vector output {}", isVectorOutput); outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( outputFieldNames, objectInspectors); if (isVectorOutput) { vrbCtx = new VectorizedRowBatchCtx(); vrbCtx.init((StructObjectInspector) outputObjInspector, vOutContext.getScratchColumnTypeNames()); outputBatch = vrbCtx.createVectorizedRowBatch(); vectorAssignRow = new VectorAssignRow(); vectorAssignRow.init((StructObjectInspector) outputObjInspector, vOutContext.getProjectedColumns()); } } catch (HiveException he) { throw he; } catch (Throwable e) { throw new HiveException(e); } forwardCache = new Object[outputKeyLength + aggregators.length]; setupGroupingSets(); switch (vectorDesc.getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeGlobalAggregate(); break; case HASH: processingMode = this.new ProcessingModeHashAggregate(); break; case MERGE_PARTIAL: Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeReduceMergePartial(); break; case STREAMING: processingMode = this.new ProcessingModeStreaming(); break; default: throw new RuntimeException("Unsupported vector GROUP BY processing mode " + vectorDesc.getProcessingMode().name()); } processingMode.initialize(hconf); } /** * changes the processing mode to streaming * This is done at the request of the hash agg mode, if the number of keys * exceeds the minReductionHashAggr factor * @throws HiveException */ private void changeToStreamingMode() throws HiveException { processingMode = this.new ProcessingModeStreaming(); processingMode.initialize(null); LOG.trace("switched to streaming mode"); } @Override public void startGroup() throws HiveException { processingMode.startGroup(); // We do not call startGroup on operators below because we are batching rows in // an output batch and the semantics will not work. // super.startGroup(); } @Override public void endGroup() throws HiveException { processingMode.endGroup(); // We do not call endGroup on operators below because we are batching rows in // an output batch and the semantics will not work. // super.endGroup(); } @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (batch.size > 0) { processingMode.processBatch(batch); } } /** * Emits a single row, made from the key and the row aggregation buffers values * kw is null if keyExpressions.length is 0 * @param kw * @param agg * @throws HiveException */ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow agg) throws HiveException { int fi = 0; if (!isVectorOutput) { // Output row. for (int i = 0; i < outputKeyLength; ++i) { forwardCache[fi++] = keyWrappersBatch.getWritableKeyValue ( kw, i, keyOutputWriters[i]); } for (int i = 0; i < aggregators.length; ++i) { forwardCache[fi++] = aggregators[i].evaluateOutput(agg.getAggregationBuffer(i)); } forward(forwardCache, outputObjInspector); } else { // Output keys and aggregates into the output batch. for (int i = 0; i < outputKeyLength; ++i) { vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, keyWrappersBatch.getWritableKeyValue (kw, i, keyOutputWriters[i])); } for (int i = 0; i < aggregators.length; ++i) { vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, aggregators[i].evaluateOutput(agg.getAggregationBuffer(i))); } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); } } } /** * Emits a (reduce) group row, made from the key (copied in at the beginning of the group) and * the row aggregation buffers values * @param agg * @param buffer * @throws HiveException */ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer) throws HiveException { int fi = outputKeyLength; // Start after group keys. for (int i = 0; i < aggregators.length; ++i) { vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, aggregators[i].evaluateOutput(agg.getAggregationBuffer(i))); } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); buffer.reset(); } } private void flushOutput() throws HiveException { forward(outputBatch, null); outputBatch.reset(); } @Override public void closeOp(boolean aborted) throws HiveException { processingMode.close(aborted); if (!aborted && isVectorOutput && outputBatch.size > 0) { flushOutput(); } } public VectorExpression[] getKeyExpressions() { return keyExpressions; } public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } public VectorAggregateExpression[] getAggregators() { return aggregators; } public void setAggregators(VectorAggregateExpression[] aggregators) { this.aggregators = aggregators; } @Override public VectorizationContext getOuputVectorizationContext() { return vOutContext; } @Override public OperatorType getType() { return OperatorType.GROUPBY; } @Override public String getName() { return getOperatorName(); } static public String getOperatorName() { return "GBY"; } }