/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.raptor.storage.organization; import com.facebook.presto.raptor.metadata.MetadataDao; import com.facebook.presto.raptor.metadata.ShardMetadata; import com.facebook.presto.raptor.metadata.Table; import com.facebook.presto.raptor.metadata.TableColumn; import com.facebook.presto.spi.type.TimestampType; import com.facebook.presto.spi.type.Type; import com.google.common.base.Joiner; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.Multimaps; import org.skife.jdbi.v2.IDBI; import java.sql.Connection; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.time.Duration; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.Set; import java.util.UUID; import static com.facebook.presto.raptor.metadata.DatabaseShardManager.maxColumn; import static com.facebook.presto.raptor.metadata.DatabaseShardManager.minColumn; import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable; import static com.facebook.presto.raptor.storage.ColumnIndexStatsUtils.jdbcType; import static com.facebook.presto.spi.type.DateType.DATE; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.collect.Iterables.partition; import static com.google.common.collect.Maps.uniqueIndex; import static io.airlift.slice.Slices.wrappedBuffer; import static java.lang.String.format; import static java.util.Collections.nCopies; import static java.util.stream.Collectors.toSet; public class ShardOrganizerUtil { private ShardOrganizerUtil() {} public static Collection<ShardIndexInfo> getOrganizationEligibleShards( IDBI dbi, MetadataDao metadataDao, Table tableInfo, Collection<ShardMetadata> shards, boolean includeSortColumns) { Map<Long, ShardMetadata> shardsById = uniqueIndex(shards, ShardMetadata::getShardId); long tableId = tableInfo.getTableId(); ImmutableList.Builder<String> columnsBuilder = ImmutableList.builder(); columnsBuilder.add("shard_id"); // include temporal columns if present Optional<TableColumn> temporalColumn = Optional.empty(); if (tableInfo.getTemporalColumnId().isPresent()) { long temporalColumnId = tableInfo.getTemporalColumnId().getAsLong(); temporalColumn = Optional.of(metadataDao.getTableColumn(tableId, temporalColumnId)); columnsBuilder.add(minColumn(temporalColumnId), maxColumn(temporalColumnId)); } // include sort columns if needed Optional<List<TableColumn>> sortColumns = Optional.empty(); if (includeSortColumns) { sortColumns = Optional.of(metadataDao.listSortColumns(tableId)); for (TableColumn column : sortColumns.get()) { columnsBuilder.add(minColumn(column.getColumnId()), maxColumn(column.getColumnId())); } } String columnToSelect = Joiner.on(",\n").join(columnsBuilder.build()); ImmutableList.Builder<ShardIndexInfo> indexInfoBuilder = ImmutableList.builder(); try (Connection connection = dbi.open().getConnection()) { for (List<ShardMetadata> partitionedShards : partition(shards, 1000)) { String shardIds = Joiner.on(",").join(nCopies(partitionedShards.size(), "?")); String sql = format("" + "SELECT %s\n" + "FROM %s\n" + "WHERE shard_id IN (%s)", columnToSelect, shardIndexTable(tableId), shardIds); try (PreparedStatement statement = connection.prepareStatement(sql)) { for (int i = 0; i < partitionedShards.size(); i++) { statement.setLong(i + 1, partitionedShards.get(i).getShardId()); } try (ResultSet resultSet = statement.executeQuery()) { while (resultSet.next()) { long shardId = resultSet.getLong("shard_id"); Optional<ShardRange> sortRange = Optional.empty(); if (includeSortColumns) { sortRange = getShardRange(sortColumns.get(), resultSet); if (!sortRange.isPresent()) { continue; } } Optional<ShardRange> temporalRange = Optional.empty(); if (temporalColumn.isPresent()) { temporalRange = getShardRange(ImmutableList.of(temporalColumn.get()), resultSet); if (!temporalRange.isPresent()) { continue; } } ShardMetadata shardMetadata = shardsById.get(shardId); indexInfoBuilder.add(toShardIndexInfo(shardMetadata, temporalRange, sortRange)); } } } } } catch (SQLException e) { throw Throwables.propagate(e); } return indexInfoBuilder.build(); } private static ShardIndexInfo toShardIndexInfo(ShardMetadata shardMetadata, Optional<ShardRange> temporalRange, Optional<ShardRange> sortRange) { return new ShardIndexInfo( shardMetadata.getTableId(), shardMetadata.getBucketNumber(), shardMetadata.getShardUuid(), shardMetadata.getRowCount(), shardMetadata.getUncompressedSize(), sortRange, temporalRange); } public static Collection<Collection<ShardIndexInfo>> getShardsByDaysBuckets(Table tableInfo, Collection<ShardIndexInfo> shards) { // Neither bucketed nor temporal, no partitioning required if (!tableInfo.getBucketCount().isPresent() && !tableInfo.getTemporalColumnId().isPresent()) { return ImmutableList.of(shards); } // if only bucketed, partition by bucket number if (tableInfo.getBucketCount().isPresent() && !tableInfo.getTemporalColumnId().isPresent()) { return Multimaps.index(shards, shard -> shard.getBucketNumber().getAsInt()).asMap().values(); } // if temporal, partition into days first ImmutableMultimap.Builder<Long, ShardIndexInfo> shardsByDaysBuilder = ImmutableMultimap.builder(); shards.stream() .filter(shard -> shard.getTemporalRange().isPresent()) .forEach(shard -> { long day = determineDay(shard.getTemporalRange().get()); shardsByDaysBuilder.put(day, shard); }); Collection<Collection<ShardIndexInfo>> byDays = shardsByDaysBuilder.build().asMap().values(); // if table is bucketed further partition by bucket number if (!tableInfo.getBucketCount().isPresent()) { return byDays; } ImmutableList.Builder<Collection<ShardIndexInfo>> sets = ImmutableList.builder(); for (Collection<ShardIndexInfo> s : byDays) { sets.addAll(Multimaps.index(s, ShardIndexInfo::getBucketNumber).asMap().values()); } return sets.build(); } private static long determineDay(ShardRange temporalRange) { Tuple min = temporalRange.getMinTuple(); Tuple max = temporalRange.getMaxTuple(); verify(min.getTypes().equals(max.getTypes())); Type type = getOnlyElement(min.getTypes()); verify(type.equals(DATE) || type.equals(TimestampType.TIMESTAMP)); if (type.equals(DATE)) { return ((Integer) getOnlyElement(min.getValues())).longValue(); } Long minValue = (Long) getOnlyElement(min.getValues()); Long maxValue = (Long) getOnlyElement(max.getValues()); return determineDay(minValue, maxValue); } private static long determineDay(long rangeStart, long rangeEnd) { long startDay = Duration.ofMillis(rangeStart).toDays(); long endDay = Duration.ofMillis(rangeEnd).toDays(); if (startDay == endDay) { return startDay; } if ((endDay - startDay) > 1) { // range spans multiple days, return the first full day return startDay + 1; } // range spans two days, return the day that has the larger time range long millisInStartDay = Duration.ofDays(endDay).toMillis() - rangeStart; long millisInEndDay = rangeEnd - Duration.ofDays(endDay).toMillis(); return (millisInStartDay >= millisInEndDay) ? startDay : endDay; } private static Optional<ShardRange> getShardRange(List<TableColumn> columns, ResultSet resultSet) throws SQLException { ImmutableList.Builder<Object> minValuesBuilder = ImmutableList.builder(); ImmutableList.Builder<Object> maxValuesBuilder = ImmutableList.builder(); ImmutableList.Builder<Type> typeBuilder = ImmutableList.builder(); for (TableColumn tableColumn : columns) { long columnId = tableColumn.getColumnId(); Type type = tableColumn.getDataType(); Object min = getValue(resultSet, type, minColumn(columnId)); Object max = getValue(resultSet, type, maxColumn(columnId)); if (min == null || max == null) { return Optional.empty(); } minValuesBuilder.add(min); maxValuesBuilder.add(max); typeBuilder.add(type); } List<Type> types = typeBuilder.build(); Tuple minTuple = new Tuple(types, minValuesBuilder.build()); Tuple maxTuple = new Tuple(types, maxValuesBuilder.build()); return Optional.of(ShardRange.of(minTuple, maxTuple)); } private static Object getValue(ResultSet resultSet, Type type, String columnName) throws SQLException { JDBCType jdbcType = jdbcType(type); Object value = getValue(resultSet, type, columnName, jdbcType); return resultSet.wasNull() ? null : value; } private static Object getValue(ResultSet resultSet, Type type, String columnName, JDBCType jdbcType) throws SQLException { switch (jdbcType) { case BOOLEAN: return resultSet.getBoolean(columnName); case INTEGER: return resultSet.getInt(columnName); case BIGINT: return resultSet.getLong(columnName); case DOUBLE: return resultSet.getDouble(columnName); case VARBINARY: return wrappedBuffer(resultSet.getBytes(columnName)).toStringUtf8(); } throw new IllegalArgumentException("Unhandled type: " + type); } static OrganizationSet createOrganizationSet(long tableId, Set<ShardIndexInfo> shardsToCompact) { Set<UUID> uuids = shardsToCompact.stream() .map(ShardIndexInfo::getShardUuid) .collect(toSet()); Set<OptionalInt> bucketNumber = shardsToCompact.stream() .map(ShardIndexInfo::getBucketNumber) .collect(toSet()); checkArgument(bucketNumber.size() == 1); return new OrganizationSet(tableId, uuids, getOnlyElement(bucketNumber)); } }