/***********************************************************************************************************************
*
* Copyright (C) 2010 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.pact.common.plan;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import eu.stratosphere.pact.common.contract.Contract;
import eu.stratosphere.pact.common.contract.FileDataSink;
import eu.stratosphere.pact.common.contract.FileDataSource;
import eu.stratosphere.pact.common.io.SequentialInputFormat;
import eu.stratosphere.pact.common.io.SequentialOutputFormat;
import eu.stratosphere.util.dag.GraphModule;
import eu.stratosphere.util.dag.GraphPrinter;
import eu.stratosphere.util.dag.GraphTraverseListener;
import eu.stratosphere.util.dag.NodePrinter;
import eu.stratosphere.util.dag.OneTimeTraverser;
/**
* The PactModule is a subgraph of a {@link PactProgram} with an arbitrary but
* well-defined number of inputs and outputs. It is designed to facilitate
* modularization and thus to increase the maintainability of large
* PactPrograms. While the interface of the module are the number of inputs and
* outputs, the actual implementation consists of several interconnected {@link Contract}s that are connected to the
* inputs and outputs of the
* PactModule.
*/
public class PactModule extends GraphModule<Contract, FileDataSource, FileDataSink> implements Visitable<Contract> {
/**
*
*/
private static final long serialVersionUID = -3942891957375333697L;
/**
* Initializes a PactModule having the given name, number of inputs, and
* number of outputs.
*
* @param name
* the name of the PactModule
* @param numberOfInputs
* the number of inputs
* @param numberOfOutputs
* the number of outputs.
*/
public PactModule(final String name, final int numberOfInputs, final int numberOfOutputs) {
super(name, new FileDataSource[numberOfInputs], new FileDataSink[numberOfOutputs],
ContractNavigator.INSTANCE);
for (int index = 0; index < this.inputNodes.length; index++)
this.inputNodes[index] =
new FileDataSource(SequentialInputFormat.class, String.format("%s %d", name, index));
for (int index = 0; index < this.outputNodes.length; index++)
this.outputNodes[index] =
new FileDataSink(SequentialOutputFormat.class, String.format("%s %d", name, index));
}
/**
* Traverses the pact plan, starting from the data outputs that were added
* to this program.
*
* @see eu.stratosphere.pact.common.plan.Visitable#accept(eu.stratosphere.pact.common.plan.Visitor)
*/
@Override
public void accept(final Visitor<Contract> visitor) {
final OneTimeVisitor<Contract> oneTimeVisitor = new OneTimeVisitor<Contract>(visitor);
for (final Contract output : this.getAllOutputs())
output.accept(oneTimeVisitor);
}
@Override
public String toString() {
final GraphPrinter<Contract> dagPrinter = new GraphPrinter<Contract>();
dagPrinter.setNodePrinter(new NodePrinter<Contract>() {
@Override
public String toString(final Contract node) {
final int inputIndex = Arrays.asList(PactModule.this.inputNodes).indexOf(node);
if (inputIndex != -1)
return String.format("Input %d", inputIndex);
final int outputIndex = Arrays.asList(PactModule.this.outputNodes).indexOf(node);
if (outputIndex != -1)
return String.format("Output %d", outputIndex);
return String.format("%s [%s]", node.getClass().getSimpleName(), node.getName());
}
});
dagPrinter.setWidth(40);
return dagPrinter.toString(this.getAllOutputs(), ContractNavigator.INSTANCE);
}
/**
* Wraps the graph given by the sinks and referenced contracts in a
* PactModule.
*
* @param name
* the name of the PactModule
* @param sinks
* all sinks that span the graph to wrap
* @return a PactModule representing the given graph
*/
public static PactModule valueOf(final String name, final Collection<Contract> sinks) {
final List<Contract> inputs = new ArrayList<Contract>();
OneTimeTraverser.INSTANCE.traverse(sinks, ContractNavigator.INSTANCE,
new GraphTraverseListener<Contract>() {
@Override
public void nodeTraversed(final Contract node) {
final List<List<Contract>> contractInputs = ContractUtil.getInputs(node);
if (contractInputs.size() == 0)
inputs.add(node);
else
for (final List<Contract> input : contractInputs)
if (input.size() == 0)
inputs.add(node);
};
});
final PactModule module = new PactModule(name, inputs.size(), sinks.size());
int sinkIndex = 0;
for (final Contract sink : sinks) {
if (sink instanceof FileDataSink)
module.outputNodes[sinkIndex] = (FileDataSink) sink;
else
module.getOutput(sinkIndex).addInput(sink);
sinkIndex++;
}
for (int index = 0; index < inputs.size();) {
final Contract node = inputs.get(index);
final List<List<Contract>> contractInputs = ContractUtil.getInputs(node);
if (contractInputs.isEmpty())
module.inputNodes[index++] = (FileDataSource) node;
else {
for (int unconnectedIndex = 0; unconnectedIndex < contractInputs.size(); unconnectedIndex++)
if (contractInputs.get(unconnectedIndex).isEmpty())
contractInputs.get(unconnectedIndex).add(module.getInput(index++));
ContractUtil.setInputs(node, contractInputs);
}
}
return module;
}
/**
* Wraps the graph given by the sinks and referenced contracts in a
* PactModule.
*
* @param name
* the name of the PactModule
* @param sinks
* all sinks that span the graph to wrap
* @return a PactModule representing the given graph
*/
public static PactModule valueOf(final String name, final Contract... sinks) {
return valueOf(name, Arrays.asList(sinks));
}
}