/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.executiongraph.failover;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory;
import org.apache.flink.runtime.client.JobExecutionException;
import org.apache.flink.runtime.executiongraph.ExecutionGraph;
import org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder;
import org.apache.flink.runtime.executiongraph.ExecutionVertex;
import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy;
import org.apache.flink.runtime.instance.SlotProvider;
import org.apache.flink.runtime.io.network.partition.ResultPartitionType;
import org.apache.flink.runtime.jobgraph.DistributionPattern;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.runtime.testingUtils.TestingUtils;
import org.apache.flink.runtime.testtasks.NoOpInvokable;
import org.apache.flink.util.TestLogger;
import org.junit.Test;
import java.util.Iterator;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
/**
* Tests that make sure that the building of pipelined connected failover regions works
* correctly.
*/
public class PipelinedFailoverRegionBuildingTest extends TestLogger {
/**
* Tests that validates that a graph with single unconnected vertices works correctly.
*
* <pre>
* (v1)
*
* (v2)
*
* (v3)
*
* ...
* </pre>
*/
@Test
public void testIndividualVertices() throws Exception {
final JobVertex source1 = new JobVertex("source1");
source1.setInvokableClass(NoOpInvokable.class);
source1.setParallelism(2);
final JobVertex source2 = new JobVertex("source2");
source2.setInvokableClass(NoOpInvokable.class);
source2.setParallelism(2);
final JobGraph jobGraph = new JobGraph("test job", source1, source2);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion sourceRegion11 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source1.getID()).getTaskVertices()[0]);
FailoverRegion sourceRegion12 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source1.getID()).getTaskVertices()[1]);
FailoverRegion targetRegion21 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source2.getID()).getTaskVertices()[0]);
FailoverRegion targetRegion22 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source2.getID()).getTaskVertices()[1]);
assertTrue(sourceRegion11 != sourceRegion12);
assertTrue(sourceRegion12 != targetRegion21);
assertTrue(targetRegion21 != targetRegion22);
}
/**
* Tests that validates that embarrassingly parallel chains of vertices work correctly.
*
* <pre>
* (a1) --> (b1)
*
* (a2) --> (b2)
*
* (a3) --> (b3)
*
* ...
* </pre>
*/
@Test
public void testEmbarrassinglyParallelCase() throws Exception {
int parallelism = 10000;
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(parallelism);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(parallelism);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(parallelism);
vertex2.connectNewDataSetAsInput(vertex1, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
vertex3.connectNewDataSetAsInput(vertex2, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion preRegion1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex1.getID()).getTaskVertices()[0]);
FailoverRegion preRegion2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex2.getID()).getTaskVertices()[0]);
FailoverRegion preRegion3 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[0]);
assertTrue(preRegion1 == preRegion2);
assertTrue(preRegion2 == preRegion3);
for (int i = 1; i < parallelism; ++i) {
FailoverRegion region1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex1.getID()).getTaskVertices()[i]);
FailoverRegion region2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex2.getID()).getTaskVertices()[i]);
FailoverRegion region3 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[i]);
assertTrue(region1 == region2);
assertTrue(region2 == region3);
assertTrue(preRegion1 != region1);
}
}
/**
* Tests that validates that a single pipelined component via a sequence of all-to-all
* connections works correctly.
*
* <pre>
* (a1) -+-> (b1) -+-> (c1)
* X X
* (a2) -+-> (b2) -+-> (c2)
* X X
* (a3) -+-> (b3) -+-> (c3)
*
* ...
* </pre>
*/
@Test
public void testOneComponentViaTwoExchanges() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(3);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(5);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(2);
vertex2.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex3.connectNewDataSetAsInput(vertex2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion region1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex1.getID()).getTaskVertices()[1]);
FailoverRegion region2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex2.getID()).getTaskVertices()[4]);
FailoverRegion region3 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[0]);
assertTrue(region1 == region2);
assertTrue(region2 == region3);
}
/**
* Tests that validates that a single pipelined component via a cascade of joins
* works correctly.
*
* <p>Non-parallelized view:
* <pre>
* (1)--+
* +--(5)-+
* (2)--+ |
* +--(7)
* (3)--+ |
* +--(6)-+
* (4)--+
* ...
* </pre>
*/
@Test
public void testOneComponentViaCascadeOfJoins() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(8);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(8);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(8);
final JobVertex vertex4 = new JobVertex("vertex4");
vertex4.setInvokableClass(NoOpInvokable.class);
vertex4.setParallelism(8);
final JobVertex vertex5 = new JobVertex("vertex5");
vertex5.setInvokableClass(NoOpInvokable.class);
vertex5.setParallelism(4);
final JobVertex vertex6 = new JobVertex("vertex6");
vertex6.setInvokableClass(NoOpInvokable.class);
vertex6.setParallelism(4);
final JobVertex vertex7 = new JobVertex("vertex7");
vertex7.setInvokableClass(NoOpInvokable.class);
vertex7.setParallelism(2);
vertex5.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex5.connectNewDataSetAsInput(vertex2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex6.connectNewDataSetAsInput(vertex3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex6.connectNewDataSetAsInput(vertex4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex7.connectNewDataSetAsInput(vertex5, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex7.connectNewDataSetAsInput(vertex6, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3, vertex4, vertex5, vertex6, vertex7);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
Iterator<ExecutionVertex> evs = eg.getAllExecutionVertices().iterator();
FailoverRegion preRegion = failoverStrategy.getFailoverRegion(evs.next());
while (evs.hasNext()) {
FailoverRegion region = failoverStrategy.getFailoverRegion(evs.next());
assertTrue(preRegion == region);
}
}
/**
* Tests that validates that a single pipelined component instance from one source
* works correctly.
*
* <p>Non-parallelized view:
* <pre>
* +--(1)
* +--(5)-+
* | +--(2)
* (7)--+
* | +--(3)
* +--(6)-+
* +--(4)
* ...
* </pre>
*/
@Test
public void testOneComponentInstanceFromOneSource() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(8);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(8);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(8);
final JobVertex vertex4 = new JobVertex("vertex4");
vertex4.setInvokableClass(NoOpInvokable.class);
vertex4.setParallelism(8);
final JobVertex vertex5 = new JobVertex("vertex5");
vertex5.setInvokableClass(NoOpInvokable.class);
vertex5.setParallelism(4);
final JobVertex vertex6 = new JobVertex("vertex6");
vertex6.setInvokableClass(NoOpInvokable.class);
vertex6.setParallelism(4);
final JobVertex vertex7 = new JobVertex("vertex7");
vertex7.setInvokableClass(NoOpInvokable.class);
vertex7.setParallelism(2);
vertex1.connectNewDataSetAsInput(vertex5, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex2.connectNewDataSetAsInput(vertex5, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex3.connectNewDataSetAsInput(vertex6, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex4.connectNewDataSetAsInput(vertex6, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex5.connectNewDataSetAsInput(vertex7, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex6.connectNewDataSetAsInput(vertex7, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = new JobGraph("test job", vertex7, vertex5, vertex6, vertex1, vertex2, vertex3, vertex4);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
Iterator<ExecutionVertex> evs = eg.getAllExecutionVertices().iterator();
FailoverRegion preRegion = failoverStrategy.getFailoverRegion(evs.next());
while (evs.hasNext()) {
FailoverRegion region = failoverStrategy.getFailoverRegion(evs.next());
assertTrue(preRegion == region);
}
}
/**
* <pre>
* (a1) -+-> (b1) -+-> (c1)
* X
* (a2) -+-> (b2) -+-> (c2)
* X
* (a3) -+-> (b3) -+-> (c3)
*
* ^ ^
* | |
* (pipelined) (blocking)
*
* </pre>
*/
@Test
public void testTwoComponentsViaBlockingExchange() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(3);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(2);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(2);
vertex2.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex3.connectNewDataSetAsInput(vertex2, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion region1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex1.getID()).getTaskVertices()[1]);
FailoverRegion region2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex2.getID()).getTaskVertices()[0]);
FailoverRegion region31 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[0]);
FailoverRegion region32 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[1]);
assertTrue(region1 == region2);
assertTrue(region2 != region31);
assertTrue(region32 != region31);
}
/**
* <pre>
* (a1) -+-> (b1) -+-> (c1)
* X X
* (a2) -+-> (b2) -+-> (c2)
* X X
* (a3) -+-> (b3) -+-> (c3)
*
* ^ ^
* | |
* (pipelined) (blocking)
* </pre>
*/
@Test
public void testTwoComponentsViaBlockingExchange2() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(3);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(2);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(2);
vertex2.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex3.connectNewDataSetAsInput(vertex2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion region1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex1.getID()).getTaskVertices()[1]);
FailoverRegion region2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex2.getID()).getTaskVertices()[0]);
FailoverRegion region31 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[0]);
FailoverRegion region32 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[1]);
assertTrue(region1 == region2);
assertTrue(region2 != region31);
assertTrue(region32 != region31);
}
/**
* Cascades of joins with partially blocking, partially pipelined exchanges:
* <pre>
* (1)--+
* +--(5)-+
* (2)--+ |
* (block)
* |
* +--(7)
* |
* (block)
* (3)--+ |
* +--(6)-+
* (4)--+
* ...
* </pre>
*
* Component 1: 1, 2, 5; component 2: 3,4,6; component 3: 7
*/
@Test
public void testMultipleComponentsViaCascadeOfJoins() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(8);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(8);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(8);
final JobVertex vertex4 = new JobVertex("vertex4");
vertex4.setInvokableClass(NoOpInvokable.class);
vertex4.setParallelism(8);
final JobVertex vertex5 = new JobVertex("vertex5");
vertex5.setInvokableClass(NoOpInvokable.class);
vertex5.setParallelism(4);
final JobVertex vertex6 = new JobVertex("vertex6");
vertex6.setInvokableClass(NoOpInvokable.class);
vertex6.setParallelism(4);
final JobVertex vertex7 = new JobVertex("vertex7");
vertex7.setInvokableClass(NoOpInvokable.class);
vertex7.setParallelism(2);
vertex5.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex5.connectNewDataSetAsInput(vertex2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex6.connectNewDataSetAsInput(vertex3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex6.connectNewDataSetAsInput(vertex4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex7.connectNewDataSetAsInput(vertex5, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
vertex7.connectNewDataSetAsInput(vertex6, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3, vertex4, vertex5, vertex6, vertex7);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion region1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex1.getID()).getTaskVertices()[0]);
FailoverRegion region2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex2.getID()).getTaskVertices()[5]);
FailoverRegion region5 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex5.getID()).getTaskVertices()[2]);
assertTrue(region1 == region2);
assertTrue(region1 == region5);
FailoverRegion region3 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex3.getID()).getTaskVertices()[0]);
FailoverRegion region4 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex4.getID()).getTaskVertices()[5]);
FailoverRegion region6 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex6.getID()).getTaskVertices()[2]);
assertTrue(region3 == region4);
assertTrue(region3 == region6);
FailoverRegion region71 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex7.getID()).getTaskVertices()[0]);
FailoverRegion region72 = failoverStrategy.getFailoverRegion(eg.getJobVertex(vertex7.getID()).getTaskVertices()[1]);
assertTrue(region71 != region72);
assertTrue(region1 != region71);
assertTrue(region1 != region72);
assertTrue(region3 != region71);
assertTrue(region3 != region72);
}
@Test
public void testDiamondWithMixedPipelinedAndBlockingExchanges() throws Exception {
final JobVertex vertex1 = new JobVertex("vertex1");
vertex1.setInvokableClass(NoOpInvokable.class);
vertex1.setParallelism(8);
final JobVertex vertex2 = new JobVertex("vertex2");
vertex2.setInvokableClass(NoOpInvokable.class);
vertex2.setParallelism(8);
final JobVertex vertex3 = new JobVertex("vertex3");
vertex3.setInvokableClass(NoOpInvokable.class);
vertex3.setParallelism(8);
final JobVertex vertex4 = new JobVertex("vertex4");
vertex4.setInvokableClass(NoOpInvokable.class);
vertex4.setParallelism(8);
vertex2.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
vertex3.connectNewDataSetAsInput(vertex1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex4.connectNewDataSetAsInput(vertex2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
vertex4.connectNewDataSetAsInput(vertex3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = new JobGraph("test job", vertex1, vertex2, vertex3, vertex4);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
Iterator<ExecutionVertex> evs = eg.getAllExecutionVertices().iterator();
FailoverRegion preRegion = failoverStrategy.getFailoverRegion(evs.next());
while (evs.hasNext()) {
FailoverRegion region = failoverStrategy.getFailoverRegion(evs.next());
assertTrue(preRegion == region);
}
}
/**
* This test checks that are strictly co-located vertices are in the same failover region,
* even through they are connected via a blocking pattern.
* This is currently an assumption / limitation of the scheduler.
*/
@Test
public void testBlockingAllToAllTopologyWithCoLocation() throws Exception {
final JobVertex source = new JobVertex("source");
source.setInvokableClass(NoOpInvokable.class);
source.setParallelism(10);
final JobVertex target = new JobVertex("target");
target.setInvokableClass(NoOpInvokable.class);
target.setParallelism(13);
target.connectNewDataSetAsInput(source, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
final SlotSharingGroup sharingGroup = new SlotSharingGroup();
source.setSlotSharingGroup(sharingGroup);
target.setSlotSharingGroup(sharingGroup);
source.setStrictlyCoLocatedWith(target);
final JobGraph jobGraph = new JobGraph("test job", source, target);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion region1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source.getID()).getTaskVertices()[0]);
FailoverRegion region2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(target.getID()).getTaskVertices()[0]);
// we use 'assertTrue' here rather than 'assertEquals' because we want to test
// for referential equality, to be on the safe side
assertTrue(region1 == region2);
}
/**
* This test checks that are strictly co-located vertices are in the same failover region,
* even through they are connected via a blocking pattern.
* This is currently an assumption / limitation of the scheduler.
*/
@Test
public void testPipelinedOneToOneTopologyWithCoLocation() throws Exception {
final JobVertex source = new JobVertex("source");
source.setInvokableClass(NoOpInvokable.class);
source.setParallelism(10);
final JobVertex target = new JobVertex("target");
target.setInvokableClass(NoOpInvokable.class);
target.setParallelism(10);
target.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
final SlotSharingGroup sharingGroup = new SlotSharingGroup();
source.setSlotSharingGroup(sharingGroup);
target.setSlotSharingGroup(sharingGroup);
source.setStrictlyCoLocatedWith(target);
final JobGraph jobGraph = new JobGraph("test job", source, target);
final ExecutionGraph eg = createExecutionGraph(jobGraph);
RestartPipelinedRegionStrategy failoverStrategy = (RestartPipelinedRegionStrategy) eg.getFailoverStrategy();
FailoverRegion sourceRegion1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source.getID()).getTaskVertices()[0]);
FailoverRegion sourceRegion2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(source.getID()).getTaskVertices()[1]);
FailoverRegion targetRegion1 = failoverStrategy.getFailoverRegion(eg.getJobVertex(target.getID()).getTaskVertices()[0]);
FailoverRegion targetRegion2 = failoverStrategy.getFailoverRegion(eg.getJobVertex(target.getID()).getTaskVertices()[1]);
// we use 'assertTrue' here rather than 'assertEquals' because we want to test
// for referential equality, to be on the safe side
assertTrue(sourceRegion1 == sourceRegion2);
assertTrue(sourceRegion2 == targetRegion1);
assertTrue(targetRegion1 == targetRegion2);
}
// ------------------------------------------------------------------------
// utilities
// ------------------------------------------------------------------------
private ExecutionGraph createExecutionGraph(JobGraph jobGraph) throws JobException, JobExecutionException {
// configure the pipelined failover strategy
final Configuration jobManagerConfig = new Configuration();
jobManagerConfig.setString(
JobManagerOptions.EXECUTION_FAILOVER_STRATEGY,
FailoverStrategyLoader.PIPELINED_REGION_RESTART_STRATEGY_NAME);
return ExecutionGraphBuilder.buildGraph(
null,
jobGraph,
jobManagerConfig,
TestingUtils.defaultExecutor(),
TestingUtils.defaultExecutor(),
mock(SlotProvider.class),
PipelinedFailoverRegionBuildingTest.class.getClassLoader(),
new StandaloneCheckpointRecoveryFactory(),
Time.seconds(10),
new NoRestartStrategy(),
new UnregisteredMetricsGroup(),
1000,
log);
}
}