/***********************************************************************************************************************
*
* Copyright (C) 2010 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.plan;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import eu.stratosphere.api.record.operators .GenericDataSink;
import eu.stratosphere.util.Visitable;
import eu.stratosphere.util.Visitor;
import eu.stratosphere.pact.generic.contract.Operator;
/**
* This class encapsulates a Pact program (which is a form of data flow), together with some parameters, like e.g.
* a name and a default degree of parallelism.
* The program (data flow) is references by this plan by holding it sinks, from which a traversal reaches all connected
* nodes.
*/
public class Plan implements Visitable<Operator> {
/**
* A collection of all sinks in the plan. Since the plan is traversed from the sinks to the sources, this
* collection must contain all the sinks.
*/
protected final Collection<GenericDataSink> sinks;
/**
* The name of the pact job.
*/
protected final String jobName;
/**
* The default parallelism to use for nodes that have no explicitly specified parallelism.
*/
protected int defaultParallelism = -1;
/**
* The maximal number of machines to use in the job.
*/
protected int maxNumberMachines;;
// ------------------------------------------------------------------------
/**
* Creates a new Pact plan with the given name, describing the Pact data flow that ends at the
* given data sinks.
* <p>
* If not all of the sinks of a data flow are given to the plan, the flow might
* not be translated entirely.
*
* @param sinks The collection will the sinks of the plan.
* @param jobName The name to display for the job.
*/
public Plan(Collection<GenericDataSink> sinks, String jobName) {
this.sinks = sinks;
this.jobName = jobName;
}
/**
* Creates a new Pact plan with the given name, containing initially a single data sink.
* <p>
* If not all of the sinks of a data flow are given to the plan, the flow might
* not be translated entirely.
*
* @param sink The data sink of the data flow.
* @param jobName The name to display for the job.
*/
public Plan(GenericDataSink sink, String jobName) {
this.sinks = new ArrayList<GenericDataSink>();
this.sinks.add(sink);
this.jobName = jobName;
}
/**
* Creates a new Pact plan, describing the Pact data flow that ends at the
* given data sinks. The display name for the job is generated using a timestamp.
* <p>
* If not all of the sinks of a data flow are given to the plan, the flow might
* not be translated entirely.
*
* @param sinks The collection will the sinks of the plan.
*/
public Plan(Collection<GenericDataSink> sinks) {
this(sinks, "PACT Job at " + Calendar.getInstance().getTime());
}
/**
* Creates a new Pact plan with the given name, containing initially a single data sink.
* The display name for the job is generated using a timestamp.
* <p>
* If not all of the sinks of a data flow are given to the plan, the flow might
* not be translated entirely.
*
* @param sink The data sink of the data flow.
*/
public Plan(GenericDataSink sink) {
this(sink, "PACT Job at " + Calendar.getInstance().getTime());
}
// ------------------------------------------------------------------------
/**
* Adds a data sink to the set of sinks in this program.
*
* @param sink The data sink to add.
*/
public void addDataSink(GenericDataSink sink) {
if (!this.sinks.contains(sink)) {
this.sinks.add(sink);
}
}
/**
* Gets all the data sinks of this PACT program.
*
* @return All sinks of the program.
*/
public Collection<GenericDataSink> getDataSinks() {
return this.sinks;
}
/**
* Gets the name of this PACT program.
*
* @return The name of the program.
*/
public String getJobName() {
return this.jobName;
}
/**
* Gets the maximum number of machines to be used for this job.
*
* @return The maximum number of machines to be used for this job.
*/
public int getMaxNumberMachines() {
return this.maxNumberMachines;
}
/**
* Sets the maximum number of machines to be used for this job.
*
* @param maxNumberMachines The the maximum number to set.
*/
public void setMaxNumberMachines(int maxNumberMachines) {
this.maxNumberMachines = maxNumberMachines;
}
/**
* Gets the default degree of parallelism for this plan. That degree is always used when a Pact
* is not explicitly given a degree of parallelism,
*
* @return The default parallelism for the plan.
*/
public int getDefaultParallelism() {
return this.defaultParallelism;
}
/**
* Sets the default degree of parallelism for this plan. That degree is always used when a Pact
* is not explicitly given a degree of parallelism,
*
* @param defaultParallelism The default parallelism for the plan.
*/
public void setDefaultParallelism(int defaultParallelism) {
this.defaultParallelism = defaultParallelism;
}
/**
* Gets the postPassClassName from this Plan.
*
* @return The postPassClassName.
*/
public String getPostPassClassName() {
return "eu.stratosphere.compiler.postpass.GenericPactRecordPostPass";
}
// ------------------------------------------------------------------------
/**
* Traverses the pact plan depth first from all data sinks on towards the sources.
*
* @see Visitable#accept(Visitor)
*/
@Override
public void accept(Visitor<Operator> visitor) {
for (GenericDataSink sink : this.sinks) {
sink.accept(visitor);
}
}
}