/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.raptor.storage.organization;
import com.facebook.presto.raptor.metadata.MetadataDao;
import com.facebook.presto.raptor.metadata.ShardMetadata;
import com.facebook.presto.raptor.metadata.Table;
import com.facebook.presto.raptor.metadata.TableColumn;
import com.facebook.presto.spi.type.TimestampType;
import com.facebook.presto.spi.type.Type;
import com.google.common.base.Joiner;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Multimaps;
import org.skife.jdbi.v2.IDBI;
import java.sql.Connection;
import java.sql.JDBCType;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.time.Duration;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.Set;
import java.util.UUID;
import static com.facebook.presto.raptor.metadata.DatabaseShardManager.maxColumn;
import static com.facebook.presto.raptor.metadata.DatabaseShardManager.minColumn;
import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable;
import static com.facebook.presto.raptor.storage.ColumnIndexStatsUtils.jdbcType;
import static com.facebook.presto.spi.type.DateType.DATE;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.common.collect.Iterables.partition;
import static com.google.common.collect.Maps.uniqueIndex;
import static io.airlift.slice.Slices.wrappedBuffer;
import static java.lang.String.format;
import static java.util.Collections.nCopies;
import static java.util.stream.Collectors.toSet;
public class ShardOrganizerUtil
{
private ShardOrganizerUtil() {}
public static Collection<ShardIndexInfo> getOrganizationEligibleShards(
IDBI dbi,
MetadataDao metadataDao,
Table tableInfo,
Collection<ShardMetadata> shards,
boolean includeSortColumns)
{
Map<Long, ShardMetadata> shardsById = uniqueIndex(shards, ShardMetadata::getShardId);
long tableId = tableInfo.getTableId();
ImmutableList.Builder<String> columnsBuilder = ImmutableList.builder();
columnsBuilder.add("shard_id");
// include temporal columns if present
Optional<TableColumn> temporalColumn = Optional.empty();
if (tableInfo.getTemporalColumnId().isPresent()) {
long temporalColumnId = tableInfo.getTemporalColumnId().getAsLong();
temporalColumn = Optional.of(metadataDao.getTableColumn(tableId, temporalColumnId));
columnsBuilder.add(minColumn(temporalColumnId), maxColumn(temporalColumnId));
}
// include sort columns if needed
Optional<List<TableColumn>> sortColumns = Optional.empty();
if (includeSortColumns) {
sortColumns = Optional.of(metadataDao.listSortColumns(tableId));
for (TableColumn column : sortColumns.get()) {
columnsBuilder.add(minColumn(column.getColumnId()), maxColumn(column.getColumnId()));
}
}
String columnToSelect = Joiner.on(",\n").join(columnsBuilder.build());
ImmutableList.Builder<ShardIndexInfo> indexInfoBuilder = ImmutableList.builder();
try (Connection connection = dbi.open().getConnection()) {
for (List<ShardMetadata> partitionedShards : partition(shards, 1000)) {
String shardIds = Joiner.on(",").join(nCopies(partitionedShards.size(), "?"));
String sql = format("" +
"SELECT %s\n" +
"FROM %s\n" +
"WHERE shard_id IN (%s)",
columnToSelect, shardIndexTable(tableId), shardIds);
try (PreparedStatement statement = connection.prepareStatement(sql)) {
for (int i = 0; i < partitionedShards.size(); i++) {
statement.setLong(i + 1, partitionedShards.get(i).getShardId());
}
try (ResultSet resultSet = statement.executeQuery()) {
while (resultSet.next()) {
long shardId = resultSet.getLong("shard_id");
Optional<ShardRange> sortRange = Optional.empty();
if (includeSortColumns) {
sortRange = getShardRange(sortColumns.get(), resultSet);
if (!sortRange.isPresent()) {
continue;
}
}
Optional<ShardRange> temporalRange = Optional.empty();
if (temporalColumn.isPresent()) {
temporalRange = getShardRange(ImmutableList.of(temporalColumn.get()), resultSet);
if (!temporalRange.isPresent()) {
continue;
}
}
ShardMetadata shardMetadata = shardsById.get(shardId);
indexInfoBuilder.add(toShardIndexInfo(shardMetadata, temporalRange, sortRange));
}
}
}
}
}
catch (SQLException e) {
throw Throwables.propagate(e);
}
return indexInfoBuilder.build();
}
private static ShardIndexInfo toShardIndexInfo(ShardMetadata shardMetadata, Optional<ShardRange> temporalRange, Optional<ShardRange> sortRange)
{
return new ShardIndexInfo(
shardMetadata.getTableId(),
shardMetadata.getBucketNumber(),
shardMetadata.getShardUuid(),
shardMetadata.getRowCount(),
shardMetadata.getUncompressedSize(),
sortRange,
temporalRange);
}
public static Collection<Collection<ShardIndexInfo>> getShardsByDaysBuckets(Table tableInfo, Collection<ShardIndexInfo> shards)
{
// Neither bucketed nor temporal, no partitioning required
if (!tableInfo.getBucketCount().isPresent() && !tableInfo.getTemporalColumnId().isPresent()) {
return ImmutableList.of(shards);
}
// if only bucketed, partition by bucket number
if (tableInfo.getBucketCount().isPresent() && !tableInfo.getTemporalColumnId().isPresent()) {
return Multimaps.index(shards, shard -> shard.getBucketNumber().getAsInt()).asMap().values();
}
// if temporal, partition into days first
ImmutableMultimap.Builder<Long, ShardIndexInfo> shardsByDaysBuilder = ImmutableMultimap.builder();
shards.stream()
.filter(shard -> shard.getTemporalRange().isPresent())
.forEach(shard -> {
long day = determineDay(shard.getTemporalRange().get());
shardsByDaysBuilder.put(day, shard);
});
Collection<Collection<ShardIndexInfo>> byDays = shardsByDaysBuilder.build().asMap().values();
// if table is bucketed further partition by bucket number
if (!tableInfo.getBucketCount().isPresent()) {
return byDays;
}
ImmutableList.Builder<Collection<ShardIndexInfo>> sets = ImmutableList.builder();
for (Collection<ShardIndexInfo> s : byDays) {
sets.addAll(Multimaps.index(s, ShardIndexInfo::getBucketNumber).asMap().values());
}
return sets.build();
}
private static long determineDay(ShardRange temporalRange)
{
Tuple min = temporalRange.getMinTuple();
Tuple max = temporalRange.getMaxTuple();
verify(min.getTypes().equals(max.getTypes()));
Type type = getOnlyElement(min.getTypes());
verify(type.equals(DATE) || type.equals(TimestampType.TIMESTAMP));
if (type.equals(DATE)) {
return ((Integer) getOnlyElement(min.getValues())).longValue();
}
Long minValue = (Long) getOnlyElement(min.getValues());
Long maxValue = (Long) getOnlyElement(max.getValues());
return determineDay(minValue, maxValue);
}
private static long determineDay(long rangeStart, long rangeEnd)
{
long startDay = Duration.ofMillis(rangeStart).toDays();
long endDay = Duration.ofMillis(rangeEnd).toDays();
if (startDay == endDay) {
return startDay;
}
if ((endDay - startDay) > 1) {
// range spans multiple days, return the first full day
return startDay + 1;
}
// range spans two days, return the day that has the larger time range
long millisInStartDay = Duration.ofDays(endDay).toMillis() - rangeStart;
long millisInEndDay = rangeEnd - Duration.ofDays(endDay).toMillis();
return (millisInStartDay >= millisInEndDay) ? startDay : endDay;
}
private static Optional<ShardRange> getShardRange(List<TableColumn> columns, ResultSet resultSet)
throws SQLException
{
ImmutableList.Builder<Object> minValuesBuilder = ImmutableList.builder();
ImmutableList.Builder<Object> maxValuesBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> typeBuilder = ImmutableList.builder();
for (TableColumn tableColumn : columns) {
long columnId = tableColumn.getColumnId();
Type type = tableColumn.getDataType();
Object min = getValue(resultSet, type, minColumn(columnId));
Object max = getValue(resultSet, type, maxColumn(columnId));
if (min == null || max == null) {
return Optional.empty();
}
minValuesBuilder.add(min);
maxValuesBuilder.add(max);
typeBuilder.add(type);
}
List<Type> types = typeBuilder.build();
Tuple minTuple = new Tuple(types, minValuesBuilder.build());
Tuple maxTuple = new Tuple(types, maxValuesBuilder.build());
return Optional.of(ShardRange.of(minTuple, maxTuple));
}
private static Object getValue(ResultSet resultSet, Type type, String columnName)
throws SQLException
{
JDBCType jdbcType = jdbcType(type);
Object value = getValue(resultSet, type, columnName, jdbcType);
return resultSet.wasNull() ? null : value;
}
private static Object getValue(ResultSet resultSet, Type type, String columnName, JDBCType jdbcType)
throws SQLException
{
switch (jdbcType) {
case BOOLEAN:
return resultSet.getBoolean(columnName);
case INTEGER:
return resultSet.getInt(columnName);
case BIGINT:
return resultSet.getLong(columnName);
case DOUBLE:
return resultSet.getDouble(columnName);
case VARBINARY:
return wrappedBuffer(resultSet.getBytes(columnName)).toStringUtf8();
}
throw new IllegalArgumentException("Unhandled type: " + type);
}
static OrganizationSet createOrganizationSet(long tableId, Set<ShardIndexInfo> shardsToCompact)
{
Set<UUID> uuids = shardsToCompact.stream()
.map(ShardIndexInfo::getShardUuid)
.collect(toSet());
Set<OptionalInt> bucketNumber = shardsToCompact.stream()
.map(ShardIndexInfo::getBucketNumber)
.collect(toSet());
checkArgument(bucketNumber.size() == 1);
return new OrganizationSet(tableId, uuids, getOnlyElement(bucketNumber));
}
}