/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.raptor.storage.organization;
import com.facebook.presto.raptor.metadata.ForMetadata;
import com.facebook.presto.raptor.metadata.MetadataDao;
import com.facebook.presto.raptor.metadata.ShardManager;
import com.facebook.presto.raptor.metadata.ShardMetadata;
import com.facebook.presto.raptor.metadata.Table;
import com.facebook.presto.raptor.storage.StorageManagerConfig;
import com.facebook.presto.spi.NodeManager;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import io.airlift.log.Logger;
import io.airlift.units.Duration;
import org.skife.jdbi.v2.IDBI;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.inject.Inject;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicBoolean;
import static com.facebook.presto.raptor.storage.organization.ShardOrganizerUtil.createOrganizationSet;
import static com.facebook.presto.raptor.storage.organization.ShardOrganizerUtil.getOrganizationEligibleShards;
import static com.facebook.presto.raptor.storage.organization.ShardOrganizerUtil.getShardsByDaysBuckets;
import static com.facebook.presto.raptor.util.DatabaseUtil.onDemandDao;
import static com.google.common.collect.Sets.difference;
import static com.google.common.collect.Sets.newConcurrentHashSet;
import static io.airlift.concurrent.MoreFutures.allAsList;
import static io.airlift.concurrent.Threads.daemonThreadsNamed;
import static java.lang.Math.max;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newScheduledThreadPool;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toSet;
public class ShardOrganizationManager
{
private static final Logger log = Logger.get(ShardOrganizationManager.class);
private final ScheduledExecutorService discoveryService = newScheduledThreadPool(1, daemonThreadsNamed("shard-organization-discovery"));
private final AtomicBoolean started = new AtomicBoolean();
private final IDBI dbi;
private final MetadataDao metadataDao;
private final ShardOrganizerDao organizerDao;
private final ShardManager shardManager;
private final boolean enabled;
private final long organizationIntervalMillis;
private final String currentNodeIdentifier;
private final ShardOrganizer organizer;
private final Set<Long> tablesInProgress = newConcurrentHashSet();
@Inject
public ShardOrganizationManager(
@ForMetadata IDBI dbi,
NodeManager nodeManager,
ShardManager shardManager,
ShardOrganizer organizer,
StorageManagerConfig config)
{
this(dbi,
nodeManager.getCurrentNode().getNodeIdentifier(),
shardManager,
organizer,
config.isOrganizationEnabled(),
config.getOrganizationInterval());
}
public ShardOrganizationManager(
IDBI dbi,
String currentNodeIdentifier,
ShardManager shardManager,
ShardOrganizer organizer,
boolean enabled,
Duration organizationInterval)
{
this.dbi = requireNonNull(dbi, "dbi is null");
this.metadataDao = onDemandDao(dbi, MetadataDao.class);
this.organizerDao = onDemandDao(dbi, ShardOrganizerDao.class);
this.organizer = requireNonNull(organizer, "organizer is null");
this.shardManager = requireNonNull(shardManager, "shardManager is null");
this.currentNodeIdentifier = requireNonNull(currentNodeIdentifier, "currentNodeIdentifier is null");
this.enabled = enabled;
requireNonNull(organizationInterval, "organizationInterval is null");
this.organizationIntervalMillis = max(1, organizationInterval.roundTo(MILLISECONDS));
}
@PostConstruct
public void start()
{
if (!enabled || started.getAndSet(true)) {
return;
}
startDiscovery();
}
@PreDestroy
public void shutdown()
{
discoveryService.shutdownNow();
}
private void startDiscovery()
{
discoveryService.scheduleWithFixedDelay(() -> {
try {
// jitter to avoid overloading database
SECONDS.sleep(ThreadLocalRandom.current().nextLong(1, 5 * 60));
log.info("Running shard organizer...");
submitJobs(discoverAndInitializeTablesToOrganize());
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
catch (Throwable t) {
log.error(t, "Error running shard organizer");
}
}, 0, 5, MINUTES);
}
@VisibleForTesting
Set<Long> discoverAndInitializeTablesToOrganize()
{
Set<Long> enabledTableIds = metadataDao.getOrganizationEligibleTables();
Set<TableOrganizationInfo> tableOrganizationInfo = organizerDao.getNodeTableOrganizationInfo(currentNodeIdentifier);
Map<Long, TableOrganizationInfo> organizationInfos = Maps.uniqueIndex(tableOrganizationInfo, TableOrganizationInfo::getTableId);
// If this is the first time organizing a table, initialize the organization info for it
difference(enabledTableIds, organizationInfos.keySet())
.forEach(tableId -> organizerDao.insertNode(currentNodeIdentifier, tableId));
ImmutableSet.Builder<Long> tableIds = ImmutableSet.builder();
for (Long tableId : enabledTableIds) {
TableOrganizationInfo info = organizationInfos.get(tableId);
if (info == null || shouldRunOrganization(info)) {
tableIds.add(tableId);
}
}
return tableIds.build();
}
private void submitJobs(Set<Long> tableIds)
{
tableIds.forEach(this::runOrganization);
}
private void runOrganization(long tableId)
{
Set<ShardMetadata> shardMetadatas = shardManager.getNodeShards(currentNodeIdentifier, tableId);
Table tableInfo = metadataDao.getTableInformation(tableId);
Set<ShardMetadata> filteredShards = shardMetadatas.stream()
.filter(shard -> !organizer.inProgress(shard.getShardUuid()))
.collect(toSet());
Collection<ShardIndexInfo> indexInfos = getOrganizationEligibleShards(dbi, metadataDao, tableInfo, filteredShards, true);
Set<OrganizationSet> organizationSets = createOrganizationSets(tableInfo, indexInfos);
if (organizationSets.isEmpty()) {
return;
}
log.info("Created %s organization set(s) from %s shards for table ID %s", organizationSets.size(), filteredShards.size(), tableId);
long lastStartTime = System.currentTimeMillis();
tablesInProgress.add(tableId);
ImmutableList.Builder<CompletableFuture<?>> futures = ImmutableList.builder();
for (OrganizationSet organizationSet : organizationSets) {
futures.add(organizer.enqueue(organizationSet));
}
allAsList(futures.build())
.whenComplete((value, throwable) -> {
tablesInProgress.remove(tableId);
organizerDao.updateLastStartTime(currentNodeIdentifier, tableId, lastStartTime);
});
}
private boolean shouldRunOrganization(TableOrganizationInfo info)
{
// skip if organization is in progress for this table
if (tablesInProgress.contains(info.getTableId())) {
return false;
}
if (!info.getLastStartTimeMillis().isPresent()) {
return true;
}
return (System.currentTimeMillis() - info.getLastStartTimeMillis().getAsLong()) >= organizationIntervalMillis;
}
@VisibleForTesting
static Set<OrganizationSet> createOrganizationSets(Table tableInfo, Collection<ShardIndexInfo> shards)
{
return getShardsByDaysBuckets(tableInfo, shards).stream()
.map(indexInfos -> getOverlappingOrganizationSets(tableInfo, indexInfos))
.flatMap(Collection::stream)
.collect(toSet());
}
private static Set<OrganizationSet> getOverlappingOrganizationSets(Table tableInfo, Collection<ShardIndexInfo> shards)
{
if (shards.size() <= 1) {
return ImmutableSet.of();
}
// Sort by low marker for the range
List<ShardIndexInfo> sortedShards = shards.stream()
.sorted((o1, o2) -> {
ShardRange sortRange1 = o1.getSortRange().get();
ShardRange sortRange2 = o2.getSortRange().get();
return ComparisonChain.start()
.compare(sortRange1.getMinTuple(), sortRange2.getMinTuple())
.compare(sortRange2.getMaxTuple(), sortRange1.getMaxTuple())
.result();
})
.collect(toList());
Set<OrganizationSet> organizationSets = new HashSet<>();
ImmutableSet.Builder<ShardIndexInfo> builder = ImmutableSet.builder();
builder.add(sortedShards.get(0));
int previousRange = 0;
int nextRange = previousRange + 1;
while (nextRange < sortedShards.size()) {
ShardRange sortRange1 = sortedShards.get(previousRange).getSortRange().get();
ShardRange sortRange2 = sortedShards.get(nextRange).getSortRange().get();
if (sortRange1.overlaps(sortRange2) && !sortRange1.adjacent(sortRange2)) {
builder.add(sortedShards.get(nextRange));
if (!sortRange1.encloses(sortRange2)) {
previousRange = nextRange;
}
}
else {
Set<ShardIndexInfo> indexInfos = builder.build();
if (indexInfos.size() > 1) {
organizationSets.add(createOrganizationSet(tableInfo.getTableId(), indexInfos));
}
builder = ImmutableSet.builder();
previousRange = nextRange;
builder.add(sortedShards.get(previousRange));
}
nextRange++;
}
Set<ShardIndexInfo> indexInfos = builder.build();
if (indexInfos.size() > 1) {
organizationSets.add(createOrganizationSet(tableInfo.getTableId(), indexInfos));
}
return organizationSets;
}
}