/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.operator.aggregation; import com.facebook.presto.array.IntBigArray; import com.facebook.presto.array.LongBigArray; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.block.BlockBuilderStatus; import com.facebook.presto.spi.block.InterleavedBlockBuilder; import com.facebook.presto.spi.type.Type; import com.facebook.presto.type.TypeUtils; import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; import org.openjdk.jol.info.ClassLayout; import static com.facebook.presto.ExceededMemoryLimitException.exceededLocalLimit; import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INSUFFICIENT_RESOURCES; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.google.common.base.Preconditions.checkArgument; import static io.airlift.units.DataSize.Unit.MEGABYTE; import static it.unimi.dsi.fastutil.HashCommon.arraySize; import static it.unimi.dsi.fastutil.HashCommon.murmurHash3; import static java.util.Objects.requireNonNull; public class TypedHistogram { private static final int INSTANCE_SIZE = ClassLayout.parseClass(TypedHistogram.class).instanceSize(); private static final float FILL_RATIO = 0.9f; private static final long FOUR_MEGABYTES = new DataSize(4, MEGABYTE).toBytes(); private int hashCapacity; private int maxFill; private int mask; private final Type type; private final BlockBuilder values; private IntBigArray hashPositions; private final LongBigArray counts; public TypedHistogram(Type type, int expectedSize) { this.type = type; checkArgument(expectedSize > 0, "expectedSize must be greater than zero"); hashCapacity = arraySize(expectedSize, FILL_RATIO); maxFill = calculateMaxFill(hashCapacity); mask = hashCapacity - 1; values = this.type.createBlockBuilder(new BlockBuilderStatus(), hashCapacity); hashPositions = new IntBigArray(-1); hashPositions.ensureCapacity(hashCapacity); counts = new LongBigArray(); counts.ensureCapacity(hashCapacity); } public TypedHistogram(Block block, Type type, int expectedSize) { this(type, expectedSize); requireNonNull(block, "block is null"); for (int i = 0; i < block.getPositionCount(); i += 2) { add(i, block, BIGINT.getLong(block, i + 1)); } } public long getEstimatedSize() { return INSTANCE_SIZE + values.getRetainedSizeInBytes() + counts.sizeOf() + hashPositions.sizeOf(); } private Block getValues() { return values.build(); } private LongBigArray getCounts() { return counts; } public Block serialize() { Block valuesBlock = values.build(); BlockBuilder blockBuilder = new InterleavedBlockBuilder(ImmutableList.of(type, BIGINT), new BlockBuilderStatus(), valuesBlock.getPositionCount() * 2); for (int i = 0; i < valuesBlock.getPositionCount(); i++) { type.appendTo(valuesBlock, i, blockBuilder); BIGINT.writeLong(blockBuilder, counts.get(i)); } return blockBuilder.build(); } public void addAll(TypedHistogram other) { Block otherValues = other.getValues(); LongBigArray otherCounts = other.getCounts(); for (int i = 0; i < otherValues.getPositionCount(); i++) { long count = otherCounts.get(i); if (count > 0) { add(i, otherValues, count); } } } public void add(int position, Block block, long count) { int hashPosition = getHashPosition(TypeUtils.hashPosition(type, block, position), mask); // look for an empty slot or a slot containing this key while (true) { if (hashPositions.get(hashPosition) == -1) { break; } if (type.equalTo(block, position, values, hashPositions.get(hashPosition))) { counts.add(hashPositions.get(hashPosition), count); return; } // increment position and mask to handle wrap around hashPosition = (hashPosition + 1) & mask; } addNewGroup(hashPosition, position, block, count); } private void addNewGroup(int hashPosition, int position, Block block, long count) { hashPositions.set(hashPosition, values.getPositionCount()); counts.set(values.getPositionCount(), count); type.appendTo(block, position, values); // increase capacity, if necessary if (values.getPositionCount() >= maxFill) { rehash(); } if (getEstimatedSize() > FOUR_MEGABYTES) { throw exceededLocalLimit(new DataSize(4, MEGABYTE)); } } private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getHashPosition(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); } private static int getHashPosition(long rawHash, int mask) { return ((int) murmurHash3(rawHash)) & mask; } private static int calculateMaxFill(int hashSize) { checkArgument(hashSize > 0, "hashSize must be greater than 0"); int maxFill = (int) Math.ceil(hashSize * FILL_RATIO); if (maxFill == hashSize) { maxFill--; } checkArgument(hashSize > maxFill, "hashSize must be larger than maxFill"); return maxFill; } }