/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.pact.common.plan;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.FileOutputFormat;
import eu.stratosphere.api.common.operators.FileDataSink;
import eu.stratosphere.api.common.operators.FileDataSource;
import eu.stratosphere.api.common.operators.GenericDataSink;
import eu.stratosphere.api.common.operators.GenericDataSource;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.util.OperatorUtil;
import eu.stratosphere.util.Visitable;
import eu.stratosphere.util.Visitor;
import eu.stratosphere.util.dag.GraphModule;
import eu.stratosphere.util.dag.GraphPrinter;
import eu.stratosphere.util.dag.GraphTraverseListener;
import eu.stratosphere.util.dag.NodePrinter;
import eu.stratosphere.util.dag.OneTimeTraverser;
/**
* The PactModule is a subgraph of a {@link Program} with an arbitrary but
* well-defined number of inputs and outputs. It is designed to facilitate
* modularization and thus to increase the maintainability of large
* PactPrograms. While the interface of the module are the number of inputs and
* outputs, the actual implementation consists of several interconnected {@link Operator}s that are connected to the
* inputs and outputs of the
* PactModule.
*/
public class PactModule extends GraphModule<Operator, GenericDataSource<?>, GenericDataSink> implements
Visitable<Operator> {
/**
* Initializes a PactModule having the given name, number of inputs, and
* number of outputs.
*
* @param numberOfInputs
* the number of inputs
* @param numberOfOutputs
* the number of outputs.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public PactModule(final int numberOfInputs, final int numberOfOutputs) {
super(numberOfInputs, numberOfOutputs, OperatorNavigator.INSTANCE);
for (int index = 0; index < numberOfInputs; index++)
this.setInput(index,
new FileDataSource((Class) FileInputFormat.class, String.format("file:///%d", index),
"Source " + index));
for (int index = 0; index < numberOfOutputs; index++)
this.setOutput(index, new FileDataSink((Class) FileOutputFormat.class, String.format("file:///%d", index),
"Sink " + index));
}
/**
* Traverses the pact plan, starting from the data outputs that were added
* to this program.
*
* @see Visitable#accept(Visitor)
*/
@Override
public void accept(final Visitor<Operator> visitor) {
final OneTimeVisitor<Operator> oneTimeVisitor = new OneTimeVisitor<Operator>(visitor);
for (final Operator output : this.getAllOutputs())
output.accept(oneTimeVisitor);
}
@Override
public String toString() {
final GraphPrinter<Operator> dagPrinter = new GraphPrinter<Operator>();
dagPrinter.setNodePrinter(new NodePrinter<Operator>() {
@Override
public String toString(final Operator node) {
final int inputIndex = PactModule.this.inputNodes.indexOf(node);
if (inputIndex != -1)
return String.format("Input %d", inputIndex);
final int outputIndex = PactModule.this.outputNodes.indexOf(node);
if (outputIndex != -1)
return String.format("Output %d", outputIndex);
return String.format("%s [%s]", node.getClass().getSimpleName(), node.getName());
}
});
dagPrinter.setWidth(40);
return dagPrinter.toString(this.getAllOutputs(), OperatorNavigator.INSTANCE);
}
/**
* Wraps the graph given by the sinks and referenced contracts in a
* PactModule.
*
* @param sinks
* all sinks that span the graph to wrap
* @return a PactModule representing the given graph
*/
public static PactModule valueOf(final Collection<? extends Operator> sinks) {
final List<Operator> inputs = new ArrayList<Operator>();
OneTimeTraverser.INSTANCE.traverse(sinks, OperatorNavigator.INSTANCE,
new GraphTraverseListener<Operator>() {
@Override
public void nodeTraversed(final Operator node) {
final List<List<Operator>> contractInputs = OperatorUtil.getInputs(node);
if (contractInputs.size() == 0)
inputs.add(node);
else
for (final List<Operator> input : contractInputs)
if (input.size() == 0)
inputs.add(node);
};
});
final PactModule module = new PactModule(inputs.size(), sinks.size());
int sinkIndex = 0;
for (final Operator sink : sinks) {
if (sink instanceof GenericDataSink)
module.setOutput(sinkIndex, (GenericDataSink) sink);
else
module.getOutput(sinkIndex).addInput(sink);
sinkIndex++;
}
for (int index = 0; index < inputs.size();) {
final Operator node = inputs.get(index);
final List<List<Operator>> contractInputs = OperatorUtil.getInputs(node);
if (contractInputs.isEmpty())
module.setInput(index++, (GenericDataSource<?>) node);
else {
for (int unconnectedIndex = 0; unconnectedIndex < contractInputs.size(); unconnectedIndex++)
if (contractInputs.get(unconnectedIndex).isEmpty())
contractInputs.get(unconnectedIndex).add(module.getInput(index++));
OperatorUtil.setInputs(node, contractInputs);
}
}
return module;
}
/**
* Wraps the graph given by the sinks and referenced contracts in a
* PactModule.
*
* @param sinks
* all sinks that span the graph to wrap
* @return a PactModule representing the given graph
*/
public static PactModule valueOf(final Operator... sinks) {
return valueOf(Arrays.asList(sinks));
}
}