/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.refiner;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.PCRelation;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.cluster.Clusterer;
import edu.isi.pegasus.planner.cluster.ClustererException;
import edu.isi.pegasus.planner.cluster.ClustererFactory;
import edu.isi.pegasus.planner.parser.dax.DAX2LabelGraph;
import edu.isi.pegasus.planner.partitioner.ClustererCallback;
import edu.isi.pegasus.planner.partitioner.Partitioner;
import edu.isi.pegasus.planner.partitioner.graph.GraphNode;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
/**
* This collapses the nodes of the same logical name scheduled on the same
* pool into fewer fat nodes. The idea behind this is to collapse jobs that
* take a few seconds to run into a larger job, and hence reducing time because
* of lesser delays due to lesser number of Condor Globus interactions.
* Note that the merging of the edges for the jobs being collapsed at present, is
* not the best implementation. Once the graph structure is correct , it would
* be modified.
*
* @author Karan Vahi vahi@isi.edu
* @author Mei-Hui Su mei@isi.edu
*
* @version $Revision$
*/
public class NodeCollapser extends Engine {
/**
* The handle to the logger object.
*/
protected LogManager mLogger;
/**
* The directory, where the stdin file of the fat jobs are created.
* It should be the submit file directory that the user mentions at
* runtime.
*/
private String mDirectory;
/**
* The internal map that contains the adjacency list representation of the
* Graph referred to by the workflow. This is temporary till the main ADag
* data structure is corrected.
*/
private Map mGraph;
/**
* The bag of initialization objects.
*/
private PegasusBag mBag;
/**
* The overloaded constructor.
*
* @param bag the bag of initialization objects.
*
*/
public NodeCollapser( PegasusBag bag ) {
super( bag );
mBag = bag;
mLogger = bag.getLogger();
mGraph = new HashMap();
mPOptions = bag.getPlannerOptions();
setDirectory( mPOptions.getSubmitDirectory() );
}
/**
* Sets the directory where the stdin files are to be generated.
*
* @param directory the path to the directory to which it needs to be set.
*/
public void setDirectory(String directory){
mDirectory = (directory == null)?
//user did not specify a submit file dir
//use the default i.e current directory
".":
//user specified directory picked up
directory;
}
/**
* Clusters the jobs in the workflow. It applies a series of clustering
* actions on the graph, as specified by the user at runtime.
*
* For each clustering action, the graph is first partitioned,
* and then sent to the appropriate clustering module for clustering.
*
* @param dag the scheduled dag that has to be clustered.
*
* @return ADag containing the collapsed scheduled workflow.
*
* @throws ClustererException in case of error while clustering
*/
public ADag cluster( ADag dag ) throws ClustererException{
//load the appropriate partitioner and clusterer
String types = mPOptions.getClusteringTechnique();
//sanity check
if( types == null){
//return the orginal DAG only
mLogger.log( "No clustering actions specified. Returning orginal DAG",
LogManager.DEBUG_MESSAGE_LEVEL);
return dag;
}
//tokenize and get the types
ADag clusteredDAG = dag;
for( StringTokenizer st = new StringTokenizer( types, ","); st.hasMoreTokens(); ){
clusteredDAG = this.cluster( clusteredDAG, st.nextToken() );
}
return clusteredDAG;
}
/**
* Clusters the jobs in the workflow. The graph is first partitioned,
* and then sent to the appropriate clustering module for clustering.
*
* @param dag the scheduled dag that has to be clustered.
* @param type the type of clustering to do.
*
* @return ADag containing the collapsed scheduled workflow.
*
* @throws ClustererException in case of error while clustering
*/
public ADag cluster( ADag dag, String type ) throws ClustererException{
//convert the graph representation to a
//more manageable and traversal data structure that is sent
//to the partitioning stuff
Map<String,String> nameIDMap = new HashMap();
Job job;
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext(); ){
//pass the jobs to the callback
GraphNode node = it.next();
job = (Job)node.getContent();
nameIDMap.put( job.getName(), job.getLogicalID() );
}
mGraph = edgeList2Graph( dag, nameIDMap );
//we need to build up a partitioner graph structure to do
//the partitioning on the graph. Use the callback mechanism
//developed for the partiotioner stuff and populate it
//from the exisiting graph structure
DAX2LabelGraph d2g = new DAX2LabelGraph( );
d2g.initialize(mBag, mPOptions.getDAX() );
//set the appropriate key that is to be used for picking up the labels
d2g.setLabelKey( mProps.getClustererLabelKey() );
//no need to pass any attributes
d2g.cbDocument( null );
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext(); ){
//pass the jobs to the callback
GraphNode node = it.next();
d2g.cbJob( (Job)node.getContent() );
}
//pass the relations
for( Iterator it = mGraph.entrySet().iterator(); it.hasNext(); ){
Map.Entry entry = (Map.Entry)it.next();
d2g.cbParents( (String)entry.getKey(), (List)entry.getValue() );
}
//finished populating
d2g.cbDone();
//get the graph map
mGraph = (Map)d2g.getConstructedObject();
//get the fake dummy root node
GraphNode root = (GraphNode)mGraph.get( DAX2LabelGraph.DUMMY_NODE_ID );
Partitioner p = ClustererFactory.loadPartitioner( mProps, type, root, mGraph );
mLogger.log( "Partitioner loaded is " + p.description(),
LogManager.CONFIG_MESSAGE_LEVEL );
Clusterer c = ClustererFactory.loadClusterer( dag, mBag, type );
mLogger.log( "Clusterer loaded is "+ c.description(),
LogManager.CONFIG_MESSAGE_LEVEL );
ClustererCallback cb = new ClustererCallback();
cb.initialize( mProps, c);
//start the partitioner and let the fun begin!
p.determinePartitions( cb );
ADag clusteredDAG = c.getClusteredDAG();
if( mLogger.getLevel() == LogManager.TRACE_MESSAGE_LEVEL ){
//print out the clustered DAG
//before returning
mLogger.log( "Clustered DAG by clusterer " + c.description() + " is " + clusteredDAG,
LogManager.TRACE_MESSAGE_LEVEL );
}
return clusteredDAG;
}
/**
* Returns an adjacency list representation of the graph referred to by
* the list of edges. The map contains adjacency list with key as a child
* and value as the list of parents.
*
* @param dag the workflow
* @param nameIDMap map with the key as the jobname and value as the
* logical id
*
* @return Map.
*/
protected Map edgeList2Graph(ADag dag, Map nameIDMap){
Map map = new HashMap();
for( Iterator<GraphNode> it = dag.nodeIterator(); it.hasNext(); ){
GraphNode node = it.next();
Job child = (Job)node.getContent();
List l = null;
for( GraphNode parentNode: node.getParents() ){
Job parent = (Job)parentNode.getContent();
if(map.containsKey(nameIDMap.get( child.getID()))){
l = (List)map.get(nameIDMap.get( child.getID() ));
l.add(nameIDMap.get(parent.getID()));
}
else{
l = new java.util.LinkedList();
l.add( nameIDMap.get(parent.getID()));
map.put(nameIDMap.get(child.getID()),l);
}
}
}
return map;
}
/**
* Returns an adjacency list representation of the graph referred to by
* the list of edges. The map contains adjacency list with key as a child
* and value as the list of parents.
*
* @param relations collection of <code>PCRelation</code> objects that does
* the conversion.
* @param nameIDMap map with the key as the jobname and value as the
* logical id
*
* @return Map.
* @deprecated as part of PM-747
*/
protected Map edgeList2Graph(Collection<PCRelation> relations, Map nameIDMap){
Map map = new HashMap();
List l = null;
for( Iterator it = relations.iterator(); it.hasNext(); ){
PCRelation rel = (PCRelation)it.next();
if(map.containsKey(nameIDMap.get(rel.child))){
l = (List)map.get(nameIDMap.get(rel.child));
l.add(nameIDMap.get(rel.parent));
}
else{
l = new java.util.LinkedList();
l.add( nameIDMap.get(rel.parent));
map.put(nameIDMap.get(rel.child),l);
}
}
return map;
}
}//end of NodeCollapser