/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.operator; import com.facebook.presto.array.LongBigArray; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PageBuilder; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.type.Type; import com.facebook.presto.sql.gen.JoinCompiler; import com.facebook.presto.type.BigintOperators; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import io.airlift.slice.XxHash64; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OperationsPerInvocation; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; import org.openjdk.jmh.runner.options.VerboseMode; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static it.unimi.dsi.fastutil.HashCommon.arraySize; @SuppressWarnings("MethodMayBeStatic") @State(Scope.Thread) @OutputTimeUnit(TimeUnit.NANOSECONDS) @Fork(2) @Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @BenchmarkMode(Mode.AverageTime) public class BenchmarkGroupByHash { private static final int POSITIONS = 10_000_000; private static final String GROUP_COUNT_STRING = "3000000"; private static final int GROUP_COUNT = Integer.parseInt(GROUP_COUNT_STRING); private static final int EXPECTED_SIZE = 10_000; private static final JoinCompiler JOIN_COMPILER = new JoinCompiler(); @Benchmark @OperationsPerInvocation(POSITIONS) public Object groupByHashPreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), data.getHashChannel(), EXPECTED_SIZE, false, JOIN_COMPILER); data.getPages().forEach(groupByHash::getGroupIds); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); } @Benchmark @OperationsPerInvocation(POSITIONS) public Object addPagePreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), data.getHashChannel(), EXPECTED_SIZE, false, JOIN_COMPILER); data.getPages().forEach(groupByHash::addPage); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); } @Benchmark @OperationsPerInvocation(POSITIONS) public Object bigintGroupByHash(SingleChannelBenchmarkData data) { GroupByHash groupByHash = new BigintGroupByHash(0, data.getHashEnabled(), EXPECTED_SIZE); data.getPages().forEach(groupByHash::addPage); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); } @Benchmark @OperationsPerInvocation(POSITIONS) public long baseline(BaselinePagesData data) { int hashSize = arraySize(GROUP_COUNT, 0.9f); int mask = hashSize - 1; long[] table = new long[hashSize]; Arrays.fill(table, -1); long groupIds = 0; for (Page page : data.getPages()) { Block block = page.getBlock(0); int positionCount = block.getPositionCount(); for (int position = 0; position < positionCount; position++) { long value = block.getLong(position, 0); int tablePosition = (int) (value & mask); while (table[tablePosition] != -1 && table[tablePosition] != value) { tablePosition++; } if (table[tablePosition] == -1) { table[tablePosition] = value; groupIds++; } } } return groupIds; } @Benchmark @OperationsPerInvocation(POSITIONS) public long baselineBigArray(BaselinePagesData data) { int hashSize = arraySize(GROUP_COUNT, 0.9f); int mask = hashSize - 1; LongBigArray table = new LongBigArray(-1); table.ensureCapacity(hashSize); long groupIds = 0; for (Page page : data.getPages()) { Block block = page.getBlock(0); int positionCount = block.getPositionCount(); for (int position = 0; position < positionCount; position++) { long value = BIGINT.getLong(block, position); int tablePosition = (int) XxHash64.hash(value) & mask; while (table.get(tablePosition) != -1 && table.get(tablePosition) != value) { tablePosition++; } if (table.get(tablePosition) == -1) { table.set(tablePosition, value); groupIds++; } } } return groupIds; } private static List<Page> createPages(int positionCount, int groupCount, List<Type> types, boolean hashEnabled) { int channelCount = types.size(); ImmutableList.Builder<Page> pages = ImmutableList.builder(); if (hashEnabled) { types = ImmutableList.copyOf(Iterables.concat(types, ImmutableList.of(BIGINT))); } PageBuilder pageBuilder = new PageBuilder(types); for (int position = 0; position < positionCount; position++) { int rand = ThreadLocalRandom.current().nextInt(groupCount); pageBuilder.declarePosition(); for (int numChannel = 0; numChannel < channelCount; numChannel++) { BIGINT.writeLong(pageBuilder.getBlockBuilder(numChannel), rand); } if (hashEnabled) { BIGINT.writeLong(pageBuilder.getBlockBuilder(channelCount), BigintOperators.hashCode(rand)); } if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pages.build(); } @SuppressWarnings("FieldMayBeFinal") @State(Scope.Thread) public static class BaselinePagesData { @Param("1") private int channelCount = 1; @Param("false") private boolean hashEnabled; @Param(GROUP_COUNT_STRING) private int groupCount; private List<Page> pages; @Setup public void setup() { pages = createPages(POSITIONS, groupCount, ImmutableList.of(BIGINT), hashEnabled); } public List<Page> getPages() { return pages; } } @SuppressWarnings("FieldMayBeFinal") @State(Scope.Thread) public static class SingleChannelBenchmarkData { @Param("1") private int channelCount = 1; @Param({"true", "false"}) private boolean hashEnabled = true; @Param(GROUP_COUNT_STRING) private int groupCount = GROUP_COUNT; private List<Page> pages; private List<Type> types; private int[] channels; @Setup public void setup() { pages = createPages(POSITIONS, GROUP_COUNT, ImmutableList.of(BIGINT), hashEnabled); types = Collections.nCopies(1, BIGINT); channels = new int[1]; for (int i = 0; i < 1; i++) { channels[i] = i; } } public List<Page> getPages() { return pages; } public List<Type> getTypes() { return types; } public boolean getHashEnabled() { return hashEnabled; } } @SuppressWarnings("FieldMayBeFinal") @State(Scope.Thread) public static class BenchmarkData { @Param({ "1", "5", "10", "15", "20" }) private int channelCount = 1; // todo add more group counts when JMH support programmatic ability to set OperationsPerInvocation @Param(GROUP_COUNT_STRING) private int groupCount = GROUP_COUNT; @Param({"true", "false"}) private boolean hashEnabled; private List<Page> pages; private Optional<Integer> hashChannel; private List<Type> types; private int[] channels; @Setup public void setup() { pages = createPages(POSITIONS, groupCount, Collections.nCopies(channelCount, BIGINT), hashEnabled); hashChannel = hashEnabled ? Optional.of(channelCount) : Optional.empty(); types = Collections.nCopies(channelCount, BIGINT); channels = new int[channelCount]; for (int i = 0; i < channelCount; i++) { channels[i] = i; } } public List<Page> getPages() { return pages; } public Optional<Integer> getHashChannel() { return hashChannel; } public List<Type> getTypes() { return types; } public int[] getChannels() { return channels; } } public static void main(String[] args) throws RunnerException { // assure the benchmarks are valid before running BenchmarkData data = new BenchmarkData(); data.setup(); new BenchmarkGroupByHash().groupByHashPreCompute(data); new BenchmarkGroupByHash().addPagePreCompute(data); SingleChannelBenchmarkData singleChannelBenchmarkData = new SingleChannelBenchmarkData(); singleChannelBenchmarkData.setup(); new BenchmarkGroupByHash().bigintGroupByHash(singleChannelBenchmarkData); Options options = new OptionsBuilder() .verbosity(VerboseMode.NORMAL) .include(".*" + BenchmarkGroupByHash.class.getSimpleName() + ".*") .build(); new Runner(options).run(); } }