VectorAggregationBufferBatch.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector;

import java.util.Arrays;

import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.util.JavaDataModel;

/**
 * This maps a batch to the aggregation buffers sets to use for each row (key)
 *
 */
public class VectorAggregationBufferBatch {

  /**
   * Batch sized array of aggregation buffer sets.
   * The array is preallocated and is reused for each batch, but the individual entries
   * will reference different aggregation buffer set from batch to batch.
   * the array is not reset between batches, content past this.index will be stale.
   */
  private final VectorAggregationBufferRow[] aggregationBuffers;

  /**
   * Same as aggregationBuffers but only distinct buffers
   */
  private final VectorAggregationBufferRow[] distinctAggregationBuffers;

  /**
   * versioning number gets incremented on each batch. This allows us to cache the selection
   * mapping info in the aggregation buffer set themselves while still being able to
   * detect stale info.
   */
  private int version;

  /**
   * Get the number of distinct aggregation buffer sets (ie. keys) used in current batch.
   */
  private int distinctCount;

  /**
   * Memory consumed by a set of aggregation buffers
   */
  private long aggregatorsFixedSize;

  /**
   * Array of indexes for aggregators that have variable size
   */
  private int[] variableSizeAggregators;;

  /**
   * returns True if any of the aggregators has a variable size
   * @return
   */
  public boolean getHasVariableSize() {
    return variableSizeAggregators.length > 0;
  }

  /**
   * Returns the fixed size consumed by the aggregation buffers
   * @return
   */
  public long getAggregatorsFixedSize() {
    return aggregatorsFixedSize;
  }

  /**
   * the array of aggregation buffers for the current batch.
   * content past the {@link #getDistinctBufferSetCount()} index
   * is stale from previous batches.
   * @return
   */
  public VectorAggregationBufferRow[] getAggregationBuffers() {
    return aggregationBuffers;
  }

  /**
   * number of distinct aggregation buffer sets (ie. keys) in the current batch.
   * @return
   */
  public int getDistinctBufferSetCount () {
    return distinctCount;
  }


  public VectorAggregationBufferBatch() {
    aggregationBuffers = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE];
    distinctAggregationBuffers = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE];
  }

  /**
   * resets the internal aggregation buffers sets index and increments the versioning
   * used to optimize the selection vector population.
   */
  public void startBatch() {
    version++;
    distinctCount = 0;
  }

  /**
   * assigns the given aggregation buffer set to a given batch row (by row number).
   * populates the selection vector appropriately. This is where the versioning numbers
   * play a role in determining if the index cached on the aggregation buffer set is stale.
   */
  public void mapAggregationBufferSet(VectorAggregationBufferRow bufferSet, int row) {
    if (version != bufferSet.getVersion()) {
      bufferSet.setVersionAndIndex(version, distinctCount);
      distinctAggregationBuffers[distinctCount] = bufferSet;
      ++distinctCount;
    }
    aggregationBuffers[row] = bufferSet;
  }

  public void compileAggregationBatchInfo(VectorAggregateExpression[] aggregators) {
    JavaDataModel model = JavaDataModel.get();
    int[] variableSizeAggregators = new int[aggregators.length];
    int indexVariableSizes = 0;

    aggregatorsFixedSize = JavaDataModel.alignUp(
        model.object() +
        model.primitive1()*2 +
        model.ref(),
        model.memoryAlign());

    aggregatorsFixedSize += model.lengthForObjectArrayOfSize(aggregators.length);
    for(int i=0;i<aggregators.length;++i) {
      VectorAggregateExpression aggregator = aggregators[i];
      aggregatorsFixedSize += aggregator.getAggregationBufferFixedSize();
      if (aggregator.hasVariableSize()) {
        variableSizeAggregators[indexVariableSizes] = i;
        ++indexVariableSizes;
      }
    }
    this.variableSizeAggregators = Arrays.copyOfRange(
        variableSizeAggregators, 0, indexVariableSizes);
  }

  public int getVariableSize(int batchSize) {
    int variableSize = 0;
    for (int i=0; i< variableSizeAggregators.length; ++i) {
      for(int r=0; r<distinctCount; ++r) {
         VectorAggregationBufferRow buf = distinctAggregationBuffers[r];
         variableSize += buf.getAggregationBuffer(variableSizeAggregators[i]).getVariableSize();
      }
    }
    return (variableSize * batchSize)/distinctCount;
  }

}