/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.raptor.storage.organization; import com.facebook.presto.raptor.metadata.Table; import com.google.common.collect.ImmutableSet; import io.airlift.units.DataSize; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.Set; import static com.facebook.presto.raptor.storage.organization.ShardOrganizerUtil.createOrganizationSet; import static com.facebook.presto.raptor.storage.organization.ShardOrganizerUtil.getShardsByDaysBuckets; import static com.google.common.base.Preconditions.checkArgument; import static java.util.Comparator.comparing; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toCollection; public class CompactionSetCreator { private final DataSize maxShardSize; private final long maxShardRows; public CompactionSetCreator(DataSize maxShardSize, long maxShardRows) { checkArgument(maxShardRows > 0, "maxShardRows must be > 0"); this.maxShardSize = requireNonNull(maxShardSize, "maxShardSize is null"); this.maxShardRows = maxShardRows; } // Expects a pre-filtered collection of shards. // All shards provided to this method will be considered for creating a compaction set. public Set<OrganizationSet> createCompactionSets(Table tableInfo, Collection<ShardIndexInfo> shards) { Collection<Collection<ShardIndexInfo>> shardsByDaysBuckets = getShardsByDaysBuckets(tableInfo, shards); ImmutableSet.Builder<OrganizationSet> compactionSets = ImmutableSet.builder(); for (Collection<ShardIndexInfo> shardInfos : shardsByDaysBuckets) { compactionSets.addAll(buildCompactionSets(tableInfo, ImmutableSet.copyOf(shardInfos))); } return compactionSets.build(); } private Set<OrganizationSet> buildCompactionSets(Table tableInfo, Set<ShardIndexInfo> shardIndexInfos) { long tableId = tableInfo.getTableId(); List<ShardIndexInfo> shards = shardIndexInfos.stream() .sorted(getShardIndexInfoComparator(tableInfo)) .collect(toCollection(ArrayList::new)); long consumedBytes = 0; long consumedRows = 0; ImmutableSet.Builder<ShardIndexInfo> builder = ImmutableSet.builder(); ImmutableSet.Builder<OrganizationSet> compactionSets = ImmutableSet.builder(); for (ShardIndexInfo shard : shards) { if (((consumedBytes + shard.getUncompressedSize()) > maxShardSize.toBytes()) || (consumedRows + shard.getRowCount() > maxShardRows)) { // Finalize this compaction set, and start a new one for the rest of the shards Set<ShardIndexInfo> shardsToCompact = builder.build(); if (shardsToCompact.size() > 1) { compactionSets.add(createOrganizationSet(tableId, shardsToCompact)); } builder = ImmutableSet.builder(); consumedBytes = 0; consumedRows = 0; } builder.add(shard); consumedBytes += shard.getUncompressedSize(); consumedRows += shard.getRowCount(); } // create compaction set for the remaining shards of this day Set<ShardIndexInfo> shardsToCompact = builder.build(); if (shardsToCompact.size() > 1) { compactionSets.add(createOrganizationSet(tableId, shardsToCompact)); } return compactionSets.build(); } private static Comparator<ShardIndexInfo> getShardIndexInfoComparator(Table tableInfo) { if (!tableInfo.getTemporalColumnId().isPresent()) { return comparing(ShardIndexInfo::getUncompressedSize); } return comparing(info -> info.getTemporalRange().get(), comparing(ShardRange::getMinTuple) .thenComparing(ShardRange::getMaxTuple)); } }