/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.iterative.nephele;
import java.io.BufferedReader;
import java.util.Collection;
import eu.stratosphere.nephele.jobgraph.DistributionPattern;
import eu.stratosphere.runtime.io.channels.ChannelType;
import eu.stratosphere.test.util.RecordAPITestBase;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import eu.stratosphere.api.common.aggregators.LongSumAggregator;
import eu.stratosphere.api.common.operators.util.UserCodeClassWrapper;
import eu.stratosphere.api.common.typeutils.TypeComparatorFactory;
import eu.stratosphere.api.common.typeutils.TypePairComparatorFactory;
import eu.stratosphere.api.common.typeutils.TypeSerializerFactory;
import eu.stratosphere.api.java.record.functions.MapFunction;
import eu.stratosphere.api.java.record.io.CsvInputFormat;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.api.java.record.io.FileOutputFormat;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.jobgraph.JobGraph;
import eu.stratosphere.nephele.jobgraph.JobGraphDefinitionException;
import eu.stratosphere.nephele.jobgraph.JobInputVertex;
import eu.stratosphere.nephele.jobgraph.JobOutputVertex;
import eu.stratosphere.nephele.jobgraph.JobTaskVertex;
import eu.stratosphere.pact.runtime.iterative.convergence.WorksetEmptyConvergenceCriterion;
import eu.stratosphere.pact.runtime.iterative.task.IterationHeadPactTask;
import eu.stratosphere.pact.runtime.iterative.task.IterationIntermediatePactTask;
import eu.stratosphere.pact.runtime.iterative.task.IterationTailPactTask;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordComparatorFactory;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordPairComparatorFactory;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordSerializerFactory;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.BuildSecondCachedMatchDriver;
import eu.stratosphere.pact.runtime.task.DriverStrategy;
import eu.stratosphere.pact.runtime.task.CollectorMapDriver;
import eu.stratosphere.pact.runtime.task.JoinWithSolutionSetSecondDriver;
import eu.stratosphere.pact.runtime.task.GroupReduceDriver;
import eu.stratosphere.pact.runtime.task.chaining.ChainedCollectorMapDriver;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.pact.runtime.task.util.TaskConfig;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.MinimumComponentIDReduce;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.NeighborWithComponentIDJoin;
import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.UpdateComponentIdMatch;
import eu.stratosphere.test.testdata.ConnectedComponentsData;
import eu.stratosphere.types.LongValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;
/**
* Tests the various variants of iteration state updates for workset iterations:
* - unified solution set and workset tail update
* - separate solution set and workset tail updates
* - intermediate workset update and solution set tail
* - intermediate solution set update and workset tail
*/
@RunWith(Parameterized.class)
public class ConnectedComponentsNepheleITCase extends RecordAPITestBase {
private static final long SEED = 0xBADC0FFEEBEEFL;
private static final int NUM_VERTICES = 1000;
private static final int NUM_EDGES = 10000;
private static final int ITERATION_ID = 1;
private static final long MEM_PER_CONSUMER = 3;
protected String verticesPath;
protected String edgesPath;
protected String resultPath;
public ConnectedComponentsNepheleITCase(Configuration config) {
super(config);
}
@Parameters
public static Collection<Object[]> getConfigurations() {
Configuration config1 = new Configuration();
config1.setInteger("testcase", 1);
Configuration config2 = new Configuration();
config2.setInteger("testcase", 2);
Configuration config3 = new Configuration();
config3.setInteger("testcase", 3);
Configuration config4 = new Configuration();
config4.setInteger("testcase", 4);
return toParameterList(config1, config2, config3, config4);
}
@Override
protected void preSubmit() throws Exception {
verticesPath = createTempFile("vertices.txt", ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES));
edgesPath = createTempFile("edges.txt", ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED));
resultPath = getTempFilePath("results");
}
@Override
protected JobGraph getJobGraph() throws Exception {
int dop = 4;
int maxIterations = 100;
int type = config.getInteger("testcase", 0);
switch (type) {
case 1:
return createJobGraphUnifiedTails(verticesPath, edgesPath, resultPath, dop, maxIterations);
case 2:
return createJobGraphSeparateTails(verticesPath, edgesPath, resultPath, dop, maxIterations);
case 3:
return createJobGraphIntermediateWorksetUpdateAndSolutionSetTail(verticesPath, edgesPath, resultPath, dop,
maxIterations);
case 4:
return createJobGraphSolutionSetUpdateAndWorksetTail(verticesPath, edgesPath, resultPath, dop,
maxIterations);
default:
throw new RuntimeException("Broken test configuration");
}
}
@Override
protected void postSubmit() throws Exception {
for (BufferedReader reader : getResultReader(resultPath)) {
ConnectedComponentsData.checkOddEvenResult(reader);
}
}
public static final class IdDuplicator extends MapFunction {
private static final long serialVersionUID = 1L;
@Override
public void map(Record record, Collector<Record> out) throws Exception {
record.setField(1, record.getField(0, LongValue.class));
out.collect(record);
}
}
// -----------------------------------------------------------------------------------------------------------------
// Invariant vertices across all variants
// -----------------------------------------------------------------------------------------------------------------
private static JobInputVertex createVerticesInput(JobGraph jobGraph, String verticesPath, int numSubTasks,
TypeSerializerFactory<?> serializer,
TypeComparatorFactory<?> comparator) {
@SuppressWarnings("unchecked")
CsvInputFormat verticesInFormat = new CsvInputFormat(' ', LongValue.class);
JobInputVertex verticesInput = JobGraphUtils.createInput(verticesInFormat, verticesPath, "VerticesInput",
jobGraph, numSubTasks, numSubTasks);
TaskConfig verticesInputConfig = new TaskConfig(verticesInput.getConfiguration());
{
verticesInputConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
verticesInputConfig.setOutputSerializer(serializer);
// chained mapper that duplicates the id
TaskConfig chainedMapperConfig = new TaskConfig(new Configuration());
chainedMapperConfig.setStubWrapper(new UserCodeClassWrapper<IdDuplicator>(IdDuplicator.class));
chainedMapperConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
chainedMapperConfig.setInputLocalStrategy(0, LocalStrategy.NONE);
chainedMapperConfig.setInputSerializer(serializer, 0);
chainedMapperConfig.setOutputSerializer(serializer);
chainedMapperConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
chainedMapperConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
chainedMapperConfig.setOutputComparator(comparator, 0);
chainedMapperConfig.setOutputComparator(comparator, 1);
verticesInputConfig.addChainedTask(ChainedCollectorMapDriver.class, chainedMapperConfig, "ID Duplicator");
}
return verticesInput;
}
private static JobInputVertex createEdgesInput(JobGraph jobGraph, String edgesPath, int numSubTasks,
TypeSerializerFactory<?> serializer,
TypeComparatorFactory<?> comparator) {
// edges
@SuppressWarnings("unchecked")
CsvInputFormat edgesInFormat = new CsvInputFormat(' ', LongValue.class, LongValue.class);
JobInputVertex edgesInput = JobGraphUtils.createInput(edgesInFormat, edgesPath, "EdgesInput", jobGraph,
numSubTasks, numSubTasks);
TaskConfig edgesInputConfig = new TaskConfig(edgesInput.getConfiguration());
{
edgesInputConfig.setOutputSerializer(serializer);
edgesInputConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
edgesInputConfig.setOutputComparator(comparator, 0);
}
return edgesInput;
}
private static JobTaskVertex createIterationHead(JobGraph jobGraph, int numSubTasks,
TypeSerializerFactory<?> serializer,
TypeComparatorFactory<?> comparator,
TypePairComparatorFactory<?, ?> pairComparator) {
JobTaskVertex head = JobGraphUtils.createTask(IterationHeadPactTask.class, "Join With Edges (Iteration Head)",
jobGraph, numSubTasks, numSubTasks);
TaskConfig headConfig = new TaskConfig(head.getConfiguration());
{
headConfig.setIterationId(ITERATION_ID);
// initial input / workset
headConfig.addInputToGroup(0);
headConfig.setInputSerializer(serializer, 0);
headConfig.setInputComparator(comparator, 0);
headConfig.setInputLocalStrategy(0, LocalStrategy.NONE);
headConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
// regular plan input (second input to the join)
headConfig.addInputToGroup(1);
headConfig.setInputSerializer(serializer, 1);
headConfig.setInputComparator(comparator, 1);
headConfig.setInputLocalStrategy(1, LocalStrategy.NONE);
headConfig.setInputCached(1, true);
headConfig.setInputMaterializationMemory(1, MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);
// initial solution set input
headConfig.addInputToGroup(2);
headConfig.setInputSerializer(serializer, 2);
headConfig.setInputComparator(comparator, 2);
headConfig.setInputLocalStrategy(2, LocalStrategy.NONE);
headConfig.setIterationHeadSolutionSetInputIndex(2);
headConfig.setSolutionSetSerializer(serializer);
headConfig.setSolutionSetComparator(comparator);
// back channel / iterations
headConfig.setIsWorksetIteration();
headConfig.setBackChannelMemory(MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);
headConfig.setSolutionSetMemory(MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);
// output into iteration
headConfig.setOutputSerializer(serializer);
headConfig.addOutputShipStrategy(ShipStrategyType.PARTITION_HASH);
headConfig.setOutputComparator(comparator, 0);
// final output
TaskConfig headFinalOutConfig = new TaskConfig(new Configuration());
headFinalOutConfig.setOutputSerializer(serializer);
headFinalOutConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
headConfig.setIterationHeadFinalOutputConfig(headFinalOutConfig);
// the sync
headConfig.setIterationHeadIndexOfSyncOutput(2);
// the driver
headConfig.setDriver(BuildSecondCachedMatchDriver.class);
headConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
headConfig.setStubWrapper(
new UserCodeClassWrapper<NeighborWithComponentIDJoin>(NeighborWithComponentIDJoin.class));
headConfig.setDriverComparator(comparator, 0);
headConfig.setDriverComparator(comparator, 1);
headConfig.setDriverPairComparator(pairComparator);
headConfig.setMemoryDriver(MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);
headConfig.addIterationAggregator(
WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, LongSumAggregator.class);
}
return head;
}
private static JobTaskVertex createIterationIntermediate(JobGraph jobGraph, int numSubTasks,
TypeSerializerFactory<?> serializer,
TypeComparatorFactory<?> comparator) {
// --------------- the intermediate (reduce to min id) ---------------
JobTaskVertex intermediate = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
"Find Min Component-ID", jobGraph, numSubTasks, numSubTasks);
TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
{
intermediateConfig.setIterationId(ITERATION_ID);
intermediateConfig.addInputToGroup(0);
intermediateConfig.setInputSerializer(serializer, 0);
intermediateConfig.setInputComparator(comparator, 0);
intermediateConfig.setInputLocalStrategy(0, LocalStrategy.SORT);
intermediateConfig.setMemoryInput(0, MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);
intermediateConfig.setFilehandlesInput(0, 64);
intermediateConfig.setSpillingThresholdInput(0, 0.85f);
intermediateConfig.setOutputSerializer(serializer);
intermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
intermediateConfig.setDriver(GroupReduceDriver.class);
intermediateConfig.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
intermediateConfig.setDriverComparator(comparator, 0);
intermediateConfig.setStubWrapper(
new UserCodeClassWrapper<MinimumComponentIDReduce>(MinimumComponentIDReduce.class));
}
return intermediate;
}
private static JobOutputVertex createOutput(JobGraph jobGraph, String resultPath, int numSubTasks,
TypeSerializerFactory<?> serializer) {
JobOutputVertex output = JobGraphUtils.createFileOutput(jobGraph, "Final Output", numSubTasks, numSubTasks);
TaskConfig outputConfig = new TaskConfig(output.getConfiguration());
{
outputConfig.addInputToGroup(0);
outputConfig.setInputSerializer(serializer, 0);
outputConfig.setStubWrapper(new UserCodeClassWrapper<CsvOutputFormat>(CsvOutputFormat.class));
outputConfig.setStubParameter(FileOutputFormat.FILE_PARAMETER_KEY, resultPath);
Configuration outputUserConfig = outputConfig.getStubParameters();
outputUserConfig.setString(CsvOutputFormat.RECORD_DELIMITER_PARAMETER, "\n");
outputUserConfig.setString(CsvOutputFormat.FIELD_DELIMITER_PARAMETER, " ");
outputUserConfig.setClass(CsvOutputFormat.FIELD_TYPE_PARAMETER_PREFIX + 0, LongValue.class);
outputUserConfig.setInteger(CsvOutputFormat.RECORD_POSITION_PARAMETER_PREFIX + 0, 0);
outputUserConfig.setClass(CsvOutputFormat.FIELD_TYPE_PARAMETER_PREFIX + 1, LongValue.class);
outputUserConfig.setInteger(CsvOutputFormat.RECORD_POSITION_PARAMETER_PREFIX + 1, 1);
outputUserConfig.setInteger(CsvOutputFormat.NUM_FIELDS_PARAMETER, 2);
}
return output;
}
private static JobOutputVertex createFakeTail(JobGraph jobGraph, int numSubTasks) {
JobOutputVertex fakeTailOutput =
JobGraphUtils.createFakeOutput(jobGraph, "FakeTailOutput", numSubTasks, numSubTasks);
return fakeTailOutput;
}
private static JobOutputVertex createSync(JobGraph jobGraph, int numSubTasks, int maxIterations) {
JobOutputVertex sync = JobGraphUtils.createSync(jobGraph, numSubTasks);
TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
syncConfig.setNumberOfIterations(maxIterations);
syncConfig.setIterationId(ITERATION_ID);
syncConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME,
LongSumAggregator.class);
syncConfig.setConvergenceCriterion(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME,
WorksetEmptyConvergenceCriterion.class);
return sync;
}
// -----------------------------------------------------------------------------------------------------------------
// Unified solution set and workset tail update
// -----------------------------------------------------------------------------------------------------------------
public JobGraph createJobGraphUnifiedTails(
String verticesPath, String edgesPath, String resultPath, int numSubTasks, int maxIterations)
throws JobGraphDefinitionException
{
// -- init -------------------------------------------------------------------------------------------------
final TypeSerializerFactory<?> serializer = RecordSerializerFactory.get();
@SuppressWarnings("unchecked")
final TypeComparatorFactory<?> comparator =
new RecordComparatorFactory(new int[] { 0 }, new Class[] { LongValue.class }, new boolean[] { true });
final TypePairComparatorFactory<?, ?> pairComparator = RecordPairComparatorFactory.get();
JobGraph jobGraph = new JobGraph("Connected Components (Unified Tails)");
// -- invariant vertices -----------------------------------------------------------------------------------
JobInputVertex vertices = createVerticesInput(jobGraph, verticesPath, numSubTasks, serializer, comparator);
JobInputVertex edges = createEdgesInput(jobGraph, edgesPath, numSubTasks, serializer, comparator);
JobTaskVertex head = createIterationHead(jobGraph, numSubTasks, serializer, comparator, pairComparator);
JobTaskVertex intermediate = createIterationIntermediate(jobGraph, numSubTasks, serializer, comparator);
TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
JobOutputVertex output = createOutput(jobGraph, resultPath, numSubTasks, serializer);
JobOutputVertex fakeTail = createFakeTail(jobGraph, numSubTasks);
JobOutputVertex sync = createSync(jobGraph, numSubTasks, maxIterations);
// --------------- the tail (solution set join) ---------------
JobTaskVertex tail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationTail", jobGraph,
numSubTasks, numSubTasks);
TaskConfig tailConfig = new TaskConfig(tail.getConfiguration());
{
tailConfig.setIterationId(ITERATION_ID);
tailConfig.setIsWorksetIteration();
tailConfig.setIsWorksetUpdate();
tailConfig.setIsSolutionSetUpdate();
tailConfig.setIsSolutionSetUpdateWithoutReprobe();
// inputs and driver
tailConfig.addInputToGroup(0);
tailConfig.setInputSerializer(serializer, 0);
// output
tailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
tailConfig.setOutputSerializer(serializer);
// the driver
tailConfig.setDriver(JoinWithSolutionSetSecondDriver.class);
tailConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
tailConfig.setDriverComparator(comparator, 0);
tailConfig.setDriverPairComparator(pairComparator);
tailConfig.setStubWrapper(new UserCodeClassWrapper<UpdateComponentIdMatch>(UpdateComponentIdMatch.class));
}
// -- edges ------------------------------------------------------------------------------------------------
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(edges, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(head, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);
JobGraphUtils.connect(intermediate, tail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
tailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(tail, fakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
vertices.setVertexToShareInstancesWith(head);
edges.setVertexToShareInstancesWith(head);
intermediate.setVertexToShareInstancesWith(head);
tail.setVertexToShareInstancesWith(head);
output.setVertexToShareInstancesWith(head);
sync.setVertexToShareInstancesWith(head);
fakeTail.setVertexToShareInstancesWith(tail);
return jobGraph;
}
public JobGraph createJobGraphSeparateTails(
String verticesPath, String edgesPath, String resultPath, int numSubTasks, int maxIterations)
throws JobGraphDefinitionException
{
// -- init -------------------------------------------------------------------------------------------------
final TypeSerializerFactory<?> serializer = RecordSerializerFactory.get();
@SuppressWarnings("unchecked")
final TypeComparatorFactory<?> comparator =
new RecordComparatorFactory(new int[] { 0 }, new Class[] { LongValue.class }, new boolean[] { true });
final TypePairComparatorFactory<?, ?> pairComparator = RecordPairComparatorFactory.get();
JobGraph jobGraph = new JobGraph("Connected Components (Unified Tails)");
// input
JobInputVertex vertices = createVerticesInput(jobGraph, verticesPath, numSubTasks, serializer, comparator);
JobInputVertex edges = createEdgesInput(jobGraph, edgesPath, numSubTasks, serializer, comparator);
// head
JobTaskVertex head = createIterationHead(jobGraph, numSubTasks, serializer, comparator, pairComparator);
TaskConfig headConfig = new TaskConfig(head.getConfiguration());
headConfig.setWaitForSolutionSetUpdate();
// intermediate
JobTaskVertex intermediate = createIterationIntermediate(jobGraph, numSubTasks, serializer, comparator);
TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
// output and auxiliaries
JobOutputVertex output = createOutput(jobGraph, resultPath, numSubTasks, serializer);
JobOutputVertex ssFakeTail = createFakeTail(jobGraph, numSubTasks);
JobOutputVertex wsFakeTail = createFakeTail(jobGraph, numSubTasks);
JobOutputVertex sync = createSync(jobGraph, numSubTasks, maxIterations);
// ------------------ the intermediate (ss join) ----------------------
JobTaskVertex ssJoinIntermediate = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
"Solution Set Join", jobGraph, numSubTasks, numSubTasks);
TaskConfig ssJoinIntermediateConfig = new TaskConfig(ssJoinIntermediate.getConfiguration());
{
ssJoinIntermediateConfig.setIterationId(ITERATION_ID);
// inputs
ssJoinIntermediateConfig.addInputToGroup(0);
ssJoinIntermediateConfig.setInputSerializer(serializer, 0);
// output
ssJoinIntermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
ssJoinIntermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
ssJoinIntermediateConfig.setOutputComparator(comparator, 0);
ssJoinIntermediateConfig.setOutputComparator(comparator, 1);
ssJoinIntermediateConfig.setOutputSerializer(serializer);
// driver
ssJoinIntermediateConfig.setDriver(JoinWithSolutionSetSecondDriver.class);
ssJoinIntermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
ssJoinIntermediateConfig.setDriverComparator(comparator, 0);
ssJoinIntermediateConfig.setDriverPairComparator(pairComparator);
ssJoinIntermediateConfig.setStubWrapper(
new UserCodeClassWrapper<UpdateComponentIdMatch>(UpdateComponentIdMatch.class));
}
// -------------------------- ss tail --------------------------------
JobTaskVertex ssTail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationSolutionSetTail",
jobGraph, numSubTasks, numSubTasks);
TaskConfig ssTailConfig = new TaskConfig(ssTail.getConfiguration());
{
ssTailConfig.setIterationId(ITERATION_ID);
ssTailConfig.setIsSolutionSetUpdate();
ssTailConfig.setIsWorksetIteration();
// inputs and driver
ssTailConfig.addInputToGroup(0);
ssTailConfig.setInputSerializer(serializer, 0);
ssTailConfig.setInputAsynchronouslyMaterialized(0, true);
ssTailConfig.setInputMaterializationMemory(0, MEM_PER_CONSUMER * JobGraphUtils.MEGABYTE);
// output
ssTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
ssTailConfig.setOutputSerializer(serializer);
// the driver
ssTailConfig.setDriver(CollectorMapDriver.class);
ssTailConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
ssTailConfig.setStubWrapper(new UserCodeClassWrapper<DummyMapper>(DummyMapper.class));
}
// -------------------------- ws tail --------------------------------
JobTaskVertex wsTail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationWorksetTail",
jobGraph, numSubTasks, numSubTasks);
TaskConfig wsTailConfig = new TaskConfig(wsTail.getConfiguration());
{
wsTailConfig.setIterationId(ITERATION_ID);
wsTailConfig.setIsWorksetIteration();
wsTailConfig.setIsWorksetUpdate();
// inputs and driver
wsTailConfig.addInputToGroup(0);
wsTailConfig.setInputSerializer(serializer, 0);
// output
wsTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
wsTailConfig.setOutputSerializer(serializer);
// the driver
wsTailConfig.setDriver(CollectorMapDriver.class);
wsTailConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
wsTailConfig.setStubWrapper(new UserCodeClassWrapper<DummyMapper>(DummyMapper.class));
}
// --------------- the wiring ---------------------
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(edges, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(head, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);
JobGraphUtils.connect(intermediate, ssJoinIntermediate, ChannelType.NETWORK, DistributionPattern.POINTWISE);
ssJoinIntermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(ssJoinIntermediate, ssTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
ssTailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(ssJoinIntermediate, wsTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
wsTailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(ssTail, ssFakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(wsTail, wsFakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
vertices.setVertexToShareInstancesWith(head);
edges.setVertexToShareInstancesWith(head);
intermediate.setVertexToShareInstancesWith(head);
ssJoinIntermediate.setVertexToShareInstancesWith(head);
wsTail.setVertexToShareInstancesWith(head);
output.setVertexToShareInstancesWith(head);
sync.setVertexToShareInstancesWith(head);
ssTail.setVertexToShareInstancesWith(wsTail);
ssFakeTail.setVertexToShareInstancesWith(ssTail);
wsFakeTail.setVertexToShareInstancesWith(wsTail);
return jobGraph;
}
public JobGraph createJobGraphIntermediateWorksetUpdateAndSolutionSetTail(
String verticesPath, String edgesPath, String resultPath, int numSubTasks, int maxIterations)
throws JobGraphDefinitionException {
// -- init -------------------------------------------------------------------------------------------------
final TypeSerializerFactory<?> serializer = RecordSerializerFactory.get();
@SuppressWarnings("unchecked")
final TypeComparatorFactory<?> comparator =
new RecordComparatorFactory(new int[] { 0 }, new Class[] { LongValue.class }, new boolean[] { true });
final TypePairComparatorFactory<?, ?> pairComparator = RecordPairComparatorFactory.get();
JobGraph jobGraph = new JobGraph("Connected Components (Intermediate Workset Update, Solution Set Tail)");
// input
JobInputVertex vertices = createVerticesInput(jobGraph, verticesPath, numSubTasks, serializer, comparator);
JobInputVertex edges = createEdgesInput(jobGraph, edgesPath, numSubTasks, serializer, comparator);
// head
JobTaskVertex head = createIterationHead(jobGraph, numSubTasks, serializer, comparator, pairComparator);
TaskConfig headConfig = new TaskConfig(head.getConfiguration());
headConfig.setWaitForSolutionSetUpdate();
// intermediate
JobTaskVertex intermediate = createIterationIntermediate(jobGraph, numSubTasks, serializer, comparator);
TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
// output and auxiliaries
JobOutputVertex output = createOutput(jobGraph, resultPath, numSubTasks, serializer);
JobOutputVertex fakeTail = createFakeTail(jobGraph, numSubTasks);
JobOutputVertex sync = createSync(jobGraph, numSubTasks, maxIterations);
// ------------------ the intermediate (ws update) ----------------------
JobTaskVertex wsUpdateIntermediate =
JobGraphUtils.createTask(IterationIntermediatePactTask.class, "WorksetUpdate", jobGraph,
numSubTasks, numSubTasks);
TaskConfig wsUpdateConfig = new TaskConfig(wsUpdateIntermediate.getConfiguration());
{
wsUpdateConfig.setIterationId(ITERATION_ID);
wsUpdateConfig.setIsWorksetIteration();
wsUpdateConfig.setIsWorksetUpdate();
// inputs
wsUpdateConfig.addInputToGroup(0);
wsUpdateConfig.setInputSerializer(serializer, 0);
// output
wsUpdateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
wsUpdateConfig.setOutputComparator(comparator, 0);
wsUpdateConfig.setOutputSerializer(serializer);
// driver
wsUpdateConfig.setDriver(JoinWithSolutionSetSecondDriver.class);
wsUpdateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
wsUpdateConfig.setDriverComparator(comparator, 0);
wsUpdateConfig.setDriverPairComparator(pairComparator);
wsUpdateConfig.setStubWrapper(new UserCodeClassWrapper<UpdateComponentIdMatch>(
UpdateComponentIdMatch.class));
}
// -------------------------- ss tail --------------------------------
JobTaskVertex ssTail =
JobGraphUtils.createTask(IterationTailPactTask.class, "IterationSolutionSetTail", jobGraph,
numSubTasks, numSubTasks);
TaskConfig ssTailConfig = new TaskConfig(ssTail.getConfiguration());
{
ssTailConfig.setIterationId(ITERATION_ID);
ssTailConfig.setIsSolutionSetUpdate();
ssTailConfig.setIsWorksetIteration();
// inputs and driver
ssTailConfig.addInputToGroup(0);
ssTailConfig.setInputSerializer(serializer, 0);
// output
ssTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
ssTailConfig.setOutputSerializer(serializer);
// the driver
ssTailConfig.setDriver(CollectorMapDriver.class);
ssTailConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
ssTailConfig.setStubWrapper(new UserCodeClassWrapper<DummyMapper>(DummyMapper.class));
}
// edges
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(edges, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(head, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);
JobGraphUtils.connect(intermediate, wsUpdateIntermediate, ChannelType.NETWORK,
DistributionPattern.POINTWISE);
wsUpdateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(wsUpdateIntermediate, ssTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
ssTailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(ssTail, fakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
vertices.setVertexToShareInstancesWith(head);
edges.setVertexToShareInstancesWith(head);
intermediate.setVertexToShareInstancesWith(head);
wsUpdateIntermediate.setVertexToShareInstancesWith(head);
ssTail.setVertexToShareInstancesWith(head);
output.setVertexToShareInstancesWith(head);
sync.setVertexToShareInstancesWith(head);
fakeTail.setVertexToShareInstancesWith(ssTail);
return jobGraph;
}
// -----------------------------------------------------------------------------------------------------------------
// Intermediate solution set update and workset tail
// -----------------------------------------------------------------------------------------------------------------
public JobGraph createJobGraphSolutionSetUpdateAndWorksetTail(
String verticesPath, String edgesPath, String resultPath, int numSubTasks, int maxIterations)
throws JobGraphDefinitionException {
// -- init -------------------------------------------------------------------------------------------------
final TypeSerializerFactory<?> serializer = RecordSerializerFactory.get();
@SuppressWarnings("unchecked")
final TypeComparatorFactory<?> comparator =
new RecordComparatorFactory(new int[] { 0 }, new Class[] { LongValue.class }, new boolean[] { true });
final TypePairComparatorFactory<?, ?> pairComparator = RecordPairComparatorFactory.get();
JobGraph jobGraph = new JobGraph("Connected Components (Intermediate Solution Set Update, Workset Tail)");
// input
JobInputVertex vertices = createVerticesInput(jobGraph, verticesPath, numSubTasks, serializer, comparator);
JobInputVertex edges = createEdgesInput(jobGraph, edgesPath, numSubTasks, serializer, comparator);
// head
JobTaskVertex head = createIterationHead(jobGraph, numSubTasks, serializer, comparator, pairComparator);
// intermediate
JobTaskVertex intermediate = createIterationIntermediate(jobGraph, numSubTasks, serializer, comparator);
TaskConfig intermediateConfig = new TaskConfig(intermediate.getConfiguration());
// output and auxiliaries
JobOutputVertex output = createOutput(jobGraph, resultPath, numSubTasks, serializer);
JobOutputVertex fakeTail = createFakeTail(jobGraph, numSubTasks);
JobOutputVertex sync = createSync(jobGraph, numSubTasks, maxIterations);
// ------------------ the intermediate (ss update) ----------------------
JobTaskVertex ssJoinIntermediate = JobGraphUtils.createTask(IterationIntermediatePactTask.class,
"Solution Set Update", jobGraph, numSubTasks, numSubTasks);
TaskConfig ssJoinIntermediateConfig = new TaskConfig(ssJoinIntermediate.getConfiguration());
{
ssJoinIntermediateConfig.setIterationId(ITERATION_ID);
ssJoinIntermediateConfig.setIsSolutionSetUpdate();
ssJoinIntermediateConfig.setIsSolutionSetUpdateWithoutReprobe();
// inputs
ssJoinIntermediateConfig.addInputToGroup(0);
ssJoinIntermediateConfig.setInputSerializer(serializer, 0);
// output
ssJoinIntermediateConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
ssJoinIntermediateConfig.setOutputComparator(comparator, 0);
ssJoinIntermediateConfig.setOutputSerializer(serializer);
// driver
ssJoinIntermediateConfig.setDriver(JoinWithSolutionSetSecondDriver.class);
ssJoinIntermediateConfig.setDriverStrategy(DriverStrategy.HYBRIDHASH_BUILD_SECOND);
ssJoinIntermediateConfig.setDriverComparator(comparator, 0);
ssJoinIntermediateConfig.setDriverPairComparator(pairComparator);
ssJoinIntermediateConfig.setStubWrapper(new UserCodeClassWrapper<UpdateComponentIdMatch>(UpdateComponentIdMatch.class));
}
// -------------------------- ws tail --------------------------------
JobTaskVertex wsTail = JobGraphUtils.createTask(IterationTailPactTask.class, "IterationWorksetTail",
jobGraph, numSubTasks, numSubTasks);
TaskConfig wsTailConfig = new TaskConfig(wsTail.getConfiguration());
{
wsTailConfig.setIterationId(ITERATION_ID);
wsTailConfig.setIsWorksetIteration();
wsTailConfig.setIsWorksetUpdate();
// inputs and driver
wsTailConfig.addInputToGroup(0);
wsTailConfig.setInputSerializer(serializer, 0);
// output
wsTailConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
wsTailConfig.setOutputSerializer(serializer);
// the driver
wsTailConfig.setDriver(CollectorMapDriver.class);
wsTailConfig.setDriverStrategy(DriverStrategy.COLLECTOR_MAP);
wsTailConfig.setStubWrapper(new UserCodeClassWrapper<DummyMapper>(DummyMapper.class));
}
// --------------- the wiring ---------------------
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(edges, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(vertices, head, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
JobGraphUtils.connect(head, intermediate, ChannelType.NETWORK, DistributionPattern.BIPARTITE);
intermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, numSubTasks);
JobGraphUtils.connect(intermediate, ssJoinIntermediate, ChannelType.NETWORK, DistributionPattern.POINTWISE);
ssJoinIntermediateConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(ssJoinIntermediate, wsTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
wsTailConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, 1);
JobGraphUtils.connect(head, output, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(wsTail, fakeTail, ChannelType.IN_MEMORY, DistributionPattern.POINTWISE);
JobGraphUtils.connect(head, sync, ChannelType.NETWORK, DistributionPattern.POINTWISE);
vertices.setVertexToShareInstancesWith(head);
edges.setVertexToShareInstancesWith(head);
intermediate.setVertexToShareInstancesWith(head);
ssJoinIntermediate.setVertexToShareInstancesWith(head);
wsTail.setVertexToShareInstancesWith(head);
output.setVertexToShareInstancesWith(head);
sync.setVertexToShareInstancesWith(head);
fakeTail.setVertexToShareInstancesWith(wsTail);
return jobGraph;
}
public static final class DummyMapper extends MapFunction {
private static final long serialVersionUID = 1L;
@Override
public void map(Record rec, Collector<Record> out) {
out.collect(rec);
}
}
}