/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.raptor.storage;
import com.facebook.presto.client.NodeVersion;
import com.facebook.presto.metadata.PrestoNode;
import com.facebook.presto.raptor.NodeSupplier;
import com.facebook.presto.raptor.metadata.BucketNode;
import com.facebook.presto.raptor.metadata.ColumnInfo;
import com.facebook.presto.raptor.metadata.Distribution;
import com.facebook.presto.raptor.metadata.MetadataDao;
import com.facebook.presto.raptor.metadata.ShardManager;
import com.facebook.presto.raptor.storage.BucketBalancer.BucketAssignment;
import com.facebook.presto.raptor.storage.BucketBalancer.ClusterState;
import com.facebook.presto.spi.Node;
import com.facebook.presto.testing.TestingNodeManager;
import com.facebook.presto.type.TypeRegistry;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multiset;
import io.airlift.units.DataSize;
import io.airlift.units.Duration;
import org.skife.jdbi.v2.DBI;
import org.skife.jdbi.v2.Handle;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.net.URI;
import java.util.List;
import java.util.OptionalLong;
import java.util.stream.Collectors;
import static com.facebook.presto.raptor.metadata.Distribution.serializeColumnTypes;
import static com.facebook.presto.raptor.metadata.SchemaDaoUtil.createTablesWithRetry;
import static com.facebook.presto.raptor.metadata.TestDatabaseShardManager.createShardManager;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.testing.Assertions.assertGreaterThanOrEqual;
import static io.airlift.testing.Assertions.assertLessThanOrEqual;
import static java.util.concurrent.TimeUnit.DAYS;
import static org.testng.Assert.assertEquals;
@Test(singleThreaded = true)
public class TestBucketBalancer
{
private static final List<String> AVAILABLE_WORKERS = ImmutableList.of("node1", "node2", "node3", "node4", "node5");
private DBI dbi;
private Handle dummyHandle;
private ShardManager shardManager;
private TestingNodeManager nodeManager;
private MetadataDao metadataDao;
private BucketBalancer balancer;
@BeforeMethod
public void setup()
throws Exception
{
TypeRegistry typeRegistry = new TypeRegistry();
dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime());
dbi.registerMapper(new Distribution.Mapper(typeRegistry));
dummyHandle = dbi.open();
createTablesWithRetry(dbi);
metadataDao = dbi.onDemand(MetadataDao.class);
nodeManager = new TestingNodeManager(AVAILABLE_WORKERS.stream()
.map(TestBucketBalancer::createTestingNode)
.collect(Collectors.toList()));
NodeSupplier nodeSupplier = nodeManager::getWorkerNodes;
shardManager = createShardManager(dbi, nodeSupplier);
balancer = new BucketBalancer(nodeSupplier, shardManager, true, new Duration(1, DAYS), true, true, "test");
}
@AfterMethod(alwaysRun = true)
public void teardown()
{
if (dummyHandle != null) {
dummyHandle.close();
}
}
@Test
public void testSingleDistributionUnbalanced()
throws Exception
{
long distributionId = createDistribution("distA", 16);
createBucketedTable("testA", distributionId);
createBucketedTable("testB", distributionId);
createAssignments(distributionId, AVAILABLE_WORKERS, 10, 3, 1, 1, 1);
assertBalancing(balancer, 6);
}
@Test
public void testSingleDistributionSlightlyUnbalanced()
throws Exception
{
long distributionId = createDistribution("distA", 16);
createBucketedTable("testA", distributionId);
createBucketedTable("testB", distributionId);
createAssignments(distributionId, AVAILABLE_WORKERS, 4, 4, 3, 3, 2);
assertBalancing(balancer, 1);
}
@Test
public void testSingleDistributionBalanced()
throws Exception
{
long distributionId = createDistribution("distA", 16);
createBucketedTable("testA", distributionId);
createBucketedTable("testB", distributionId);
createAssignments(distributionId, AVAILABLE_WORKERS, 4, 3, 3, 3, 3);
assertBalancing(balancer, 0);
}
@Test
public void testSingleDistributionUnbalancedWithDeadNode()
throws Exception
{
long distributionId = createDistribution("distA", 16);
createBucketedTable("testA", distributionId);
createBucketedTable("testB", distributionId);
ImmutableList<String> nodes = ImmutableList.<String>builder().addAll(AVAILABLE_WORKERS).add("node6").build();
createAssignments(distributionId, nodes, 11, 1, 1, 1, 1, 1);
assertBalancing(balancer, 8);
}
@Test
public void testSingleDistributionUnbalancedWithNewNode()
throws Exception
{
long distributionId = createDistribution("distA", 16);
createBucketedTable("testA", distributionId);
createBucketedTable("testB", distributionId);
createAssignments(distributionId, AVAILABLE_WORKERS, 12, 1, 1, 1, 1);
nodeManager.addNode(createTestingNode("node6"));
assertBalancing(balancer, 9);
}
@Test
public void testMultipleDistributionUnbalanced()
throws Exception
{
long distributionA = createDistribution("distA", 17);
createBucketedTable("testA", distributionA);
createAssignments(distributionA, AVAILABLE_WORKERS, 11, 3, 1, 1, 1);
long distributionB = createDistribution("distB", 10);
createBucketedTable("testB", distributionB);
createAssignments(distributionB, AVAILABLE_WORKERS, 8, 2, 0, 0, 0);
long distributionC = createDistribution("distC", 4);
createBucketedTable("testC", distributionC);
createAssignments(distributionC, AVAILABLE_WORKERS, 2, 2, 0, 0, 0);
assertBalancing(balancer, 15);
}
@Test
public void testMultipleDistributionUnbalancedWithDiskSpace()
throws Exception
{
long distributionA = createDistribution("distA", 4);
createBucketedTable("testA", distributionA, DataSize.valueOf("4B"));
createAssignments(distributionA, AVAILABLE_WORKERS, 1, 1, 1, 1, 0);
long distributionB = createDistribution("distB", 4);
createBucketedTable("testB", distributionB, DataSize.valueOf("4B"));
createAssignments(distributionB, AVAILABLE_WORKERS, 1, 1, 1, 0, 1);
long distributionC = createDistribution("distC", 2);
createBucketedTable("testC", distributionC, DataSize.valueOf("2B"));
createAssignments(distributionC, AVAILABLE_WORKERS, 0, 0, 0, 2, 0);
assertBalancing(balancer, 1);
assertEquals(balancer.fetchClusterState().getAssignedBytes().values()
.stream()
.distinct()
.count(), 1);
}
@Test
public void testMultipleDistributionUnbalancedWithDiskSpace2()
throws Exception
{
long distributionA = createDistribution("distA", 4);
createBucketedTable("testA", distributionA, DataSize.valueOf("4B"));
createAssignments(distributionA, AVAILABLE_WORKERS, 1, 1, 1, 1, 0);
long distributionB = createDistribution("distB", 4);
createBucketedTable("testB", distributionB, DataSize.valueOf("4B"));
createAssignments(distributionB, AVAILABLE_WORKERS, 2, 1, 1, 0, 0);
assertBalancing(balancer, 1);
}
@Test
public void testMultipleDistributionUnbalancedWorstCase()
throws Exception
{
// we will end up with only one bucket on node1
long distributionA = createDistribution("distA", 4);
createBucketedTable("testA", distributionA, DataSize.valueOf("4B"));
createAssignments(distributionA, AVAILABLE_WORKERS, 4, 0, 0, 0, 0);
long distributionB = createDistribution("distB", 4);
createBucketedTable("testB", distributionB, DataSize.valueOf("4B"));
createAssignments(distributionB, AVAILABLE_WORKERS, 4, 0, 0, 0, 0);
long distributionC = createDistribution("distC", 4);
createBucketedTable("testC", distributionC, DataSize.valueOf("4B"));
createAssignments(distributionC, AVAILABLE_WORKERS, 4, 0, 0, 0, 0);
long distributionD = createDistribution("distD", 4);
createBucketedTable("testD", distributionD, DataSize.valueOf("4B"));
createAssignments(distributionD, AVAILABLE_WORKERS, 4, 0, 0, 0, 0);
long distributionE = createDistribution("distE", 4);
createBucketedTable("testE", distributionE, DataSize.valueOf("4B"));
createAssignments(distributionE, AVAILABLE_WORKERS, 4, 0, 0, 0, 0);
assertBalancing(balancer, 15);
}
private static void assertBalancing(BucketBalancer balancer, int expectedMoves)
{
int actualMoves = balancer.balance();
assertEquals(actualMoves, expectedMoves);
// check that number of buckets per node is within bounds
ClusterState clusterState = balancer.fetchClusterState();
for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
Multiset<String> allocationCounts = HashMultiset.create();
clusterState.getDistributionAssignments().get(distribution).stream()
.map(BucketAssignment::getNodeIdentifier)
.forEach(allocationCounts::add);
double bucketsPerNode = (1.0 * allocationCounts.size()) / clusterState.getActiveNodes().size();
for (String node : allocationCounts) {
assertGreaterThanOrEqual(allocationCounts.count(node), (int) Math.floor(bucketsPerNode), node + " has fewer buckets than expected");
assertLessThanOrEqual(allocationCounts.count(node), (int) Math.ceil(bucketsPerNode), node + " has more buckets than expected");
}
}
// check stability
assertEquals(balancer.balance(), 0);
}
private long createDistribution(String distributionName, int bucketCount)
{
MetadataDao dao = dbi.onDemand(MetadataDao.class);
long distributionId = dao.insertDistribution(distributionName, serializeColumnTypes(ImmutableList.of(BIGINT)), bucketCount);
shardManager.createBuckets(distributionId, bucketCount);
return distributionId;
}
private long createBucketedTable(String tableName, long distributionId)
{
return createBucketedTable(tableName, distributionId, DataSize.valueOf("0B"));
}
private long createBucketedTable(String tableName, long distributionId, DataSize compressedSize)
{
MetadataDao dao = dbi.onDemand(MetadataDao.class);
long tableId = dao.insertTable("test", tableName, false, false, distributionId, 0);
List<ColumnInfo> columnsA = ImmutableList.of(new ColumnInfo(1, BIGINT));
shardManager.createTable(tableId, columnsA, false, OptionalLong.empty());
metadataDao.updateTableStats(tableId, 1024, 1024 * 1024 * 1024, compressedSize.toBytes(), compressedSize.toBytes() * 2);
return tableId;
}
private List<BucketNode> createAssignments(long distributionId, List<String> nodes, int... buckets)
{
checkArgument(nodes.size() == buckets.length);
ImmutableList.Builder<BucketNode> assignments = ImmutableList.builder();
int bucketNumber = 0;
for (int i = 0; i < buckets.length; i++) {
for (int j = 0; j < buckets[i]; j++) {
shardManager.updateBucketAssignment(distributionId, bucketNumber, nodes.get(i));
assignments.add(bucketNode(bucketNumber, nodes.get(i)));
bucketNumber++;
}
}
return assignments.build();
}
private static BucketNode bucketNode(int bucketNumber, String nodeIdentifier)
{
return new BucketNode(bucketNumber, nodeIdentifier);
}
private static Node createTestingNode(String nodeIdentifier)
{
return new PrestoNode(nodeIdentifier, URI.create("http://test"), NodeVersion.UNKNOWN, false);
}
}