/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation.builder;
import com.facebook.presto.memory.AbstractAggregatedMemoryContext;
import com.facebook.presto.memory.LocalMemoryContext;
import com.facebook.presto.operator.HashCollisionsCounter;
import com.facebook.presto.operator.MergeHashSort;
import com.facebook.presto.operator.OperatorContext;
import com.facebook.presto.operator.aggregation.AccumulatorFactory;
import com.facebook.presto.spi.Page;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.spiller.Spiller;
import com.facebook.presto.spiller.SpillerFactory;
import com.facebook.presto.sql.gen.JoinCompiler;
import com.facebook.presto.sql.planner.plan.AggregationNode;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ListenableFuture;
import io.airlift.units.DataSize;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.util.concurrent.Futures.immediateFuture;
import static io.airlift.concurrent.MoreFutures.getFutureValue;
import static java.lang.Math.max;
public class SpillableHashAggregationBuilder
implements HashAggregationBuilder
{
private InMemoryHashAggregationBuilder hashAggregationBuilder;
private final SpillerFactory spillerFactory;
private final List<AccumulatorFactory> accumulatorFactories;
private final AggregationNode.Step step;
private final int expectedGroups;
private final List<Type> groupByTypes;
private final List<Integer> groupByChannels;
private final Optional<Integer> hashChannel;
private final OperatorContext operatorContext;
private final long memorySizeBeforeSpill;
private final long memoryLimitForMergeWithMemory;
private Optional<Spiller> spiller = Optional.empty();
private Optional<MergingHashAggregationBuilder> merger = Optional.empty();
private Optional<MergeHashSort> mergeHashSort = Optional.empty();
private ListenableFuture<?> spillInProgress = immediateFuture(null);
private final LocalMemoryContext aggregationMemoryContext;
private final LocalMemoryContext spillMemoryContext;
private final JoinCompiler joinCompiler;
private long hashCollisions;
private double expectedHashCollisions;
public SpillableHashAggregationBuilder(
List<AccumulatorFactory> accumulatorFactories,
AggregationNode.Step step,
int expectedGroups,
List<Type> groupByTypes,
List<Integer> groupByChannels,
Optional<Integer> hashChannel,
OperatorContext operatorContext,
DataSize memoryLimitBeforeSpill,
DataSize memoryLimitForMergeWithMemory,
SpillerFactory spillerFactory,
JoinCompiler joinCompiler)
{
this.accumulatorFactories = accumulatorFactories;
this.step = step;
this.expectedGroups = expectedGroups;
this.groupByTypes = groupByTypes;
this.groupByChannels = groupByChannels;
this.hashChannel = hashChannel;
this.operatorContext = operatorContext;
this.memorySizeBeforeSpill = memoryLimitBeforeSpill.toBytes();
this.memoryLimitForMergeWithMemory = memoryLimitForMergeWithMemory.toBytes();
this.spillerFactory = spillerFactory;
this.joinCompiler = joinCompiler;
AbstractAggregatedMemoryContext systemMemoryContext = operatorContext.getSystemMemoryContext();
this.aggregationMemoryContext = systemMemoryContext.newLocalMemoryContext();
this.spillMemoryContext = systemMemoryContext.newLocalMemoryContext();
rebuildHashAggregationBuilder();
}
@Override
public void processPage(Page page)
{
checkState(hasPreviousSpillCompletedSuccessfully(), "Previous spill hasn't yet finished");
hashAggregationBuilder.processPage(page);
if (shouldSpill(getSizeInMemory())) {
spillToDisk();
}
}
@Override
public void updateMemory()
{
aggregationMemoryContext.setBytes(getSizeInMemory());
if (spillInProgress.isDone()) {
spillMemoryContext.setBytes(0L);
}
}
public long getSizeInMemory()
{
// TODO: we could skip memory reservation for hashAggregationBuilder.getGroupIdsSortingSize()
// if before building result from hashAggregationBuilder we would convert it to "read only" version.
// Read only version of GroupByHash from hashAggregationBuilder could be compacted by dropping
// most of it's field, freeing up some memory that could be used for sorting.
return hashAggregationBuilder.getSizeInMemory() + hashAggregationBuilder.getGroupIdsSortingSize();
}
@Override
public void recordHashCollisions(HashCollisionsCounter hashCollisionsCounter)
{
hashCollisionsCounter.recordHashCollision(hashCollisions, expectedHashCollisions);
hashCollisions = 0;
expectedHashCollisions = 0;
}
@Override
public boolean isFull()
{
return false;
}
@Override
public ListenableFuture<?> isBlocked()
{
return spillInProgress;
}
private boolean hasPreviousSpillCompletedSuccessfully()
{
if (isBlocked().isDone()) {
// check for exception from previous spill for early failure
getFutureValue(spillInProgress);
return true;
}
else {
return false;
}
}
private boolean shouldSpill(long memorySize)
{
return (memorySizeBeforeSpill > 0 && memorySize > memorySizeBeforeSpill);
}
private boolean shouldMergeWithMemory(long memorySize)
{
return memorySize < memoryLimitForMergeWithMemory;
}
@Override
public Iterator<Page> buildResult()
{
checkState(hasPreviousSpillCompletedSuccessfully(), "Previous spill hasn't yet finished");
if (!spiller.isPresent()) {
return hashAggregationBuilder.buildResult();
}
try {
if (shouldMergeWithMemory(getSizeInMemory())) {
return mergeFromDiskAndMemory();
}
else {
spillToDisk().get();
return mergeFromDisk();
}
}
catch (InterruptedException | ExecutionException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
}
@Override
public void close()
{
if (merger.isPresent()) {
merger.get().close();
}
if (spiller.isPresent()) {
spiller.get().close();
}
if (mergeHashSort.isPresent()) {
mergeHashSort.get().close();
}
}
private ListenableFuture<?> spillToDisk()
{
checkState(hasPreviousSpillCompletedSuccessfully(), "Previous spill hasn't yet finished");
hashAggregationBuilder.setOutputPartial();
if (!spiller.isPresent()) {
spiller = Optional.of(spillerFactory.create(
hashAggregationBuilder.buildTypes(),
operatorContext.getSpillContext(),
operatorContext.getSystemMemoryContext().newAggregatedMemoryContext()));
}
long spillMemoryUsage = getSizeInMemory();
// start spilling process with current content of the hashAggregationBuilder builder...
spillInProgress = spiller.get().spill(hashAggregationBuilder.buildHashSortedResult());
// ... and immediately create new hashAggregationBuilder so effectively memory ownership
// over hashAggregationBuilder is transferred from this thread to a spilling thread
rebuildHashAggregationBuilder();
// First decrease memory usage of aggregation context...
aggregationMemoryContext.setBytes(getSizeInMemory());
// And then transfer this memory to spill context
// TODO: is there an easy way to do this atomically?
spillMemoryContext.setBytes(spillMemoryUsage);
return spillInProgress;
}
private Iterator<Page> mergeFromDiskAndMemory()
{
checkState(spiller.isPresent());
hashAggregationBuilder.setOutputPartial();
mergeHashSort = Optional.of(new MergeHashSort(operatorContext.getSystemMemoryContext().newAggregatedMemoryContext()));
Iterator<Page> mergedSpilledPages = mergeHashSort.get().merge(
groupByTypes,
hashAggregationBuilder.buildIntermediateTypes(),
ImmutableList.<Iterator<Page>>builder()
.addAll(spiller.get().getSpills())
.add(hashAggregationBuilder.buildHashSortedResult())
.build());
return mergeSortedPages(mergedSpilledPages, max(memorySizeBeforeSpill - memoryLimitForMergeWithMemory, 1L));
}
private Iterator<Page> mergeFromDisk()
{
checkState(spiller.isPresent());
mergeHashSort = Optional.of(new MergeHashSort(operatorContext.getSystemMemoryContext().newAggregatedMemoryContext()));
Iterator<Page> mergedSpilledPages = mergeHashSort.get().merge(
groupByTypes,
hashAggregationBuilder.buildIntermediateTypes(),
spiller.get().getSpills());
return mergeSortedPages(mergedSpilledPages, memorySizeBeforeSpill);
}
private Iterator<Page> mergeSortedPages(Iterator<Page> sortedPages, long memorySizeBeforeSpill)
{
merger = Optional.of(new MergingHashAggregationBuilder(
accumulatorFactories,
step,
expectedGroups,
groupByTypes,
hashChannel,
operatorContext,
sortedPages,
operatorContext.getSystemMemoryContext().newLocalMemoryContext(),
memorySizeBeforeSpill,
hashAggregationBuilder.getKeyChannels(),
joinCompiler));
return merger.get().buildResult();
}
private void rebuildHashAggregationBuilder()
{
if (hashAggregationBuilder != null) {
hashCollisions += hashAggregationBuilder.getHashCollisions();
expectedHashCollisions += hashAggregationBuilder.getExpectedHashCollisions();
}
this.hashAggregationBuilder = new InMemoryHashAggregationBuilder(
accumulatorFactories,
step,
expectedGroups,
groupByTypes,
groupByChannels,
hashChannel,
operatorContext,
DataSize.succinctBytes(0),
joinCompiler);
}
}