/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.refiner.createdir;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.TransferJob;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.catalog.site.classes.FileServer;
import edu.isi.pegasus.planner.classes.DAGJob;
import edu.isi.pegasus.planner.classes.DAXJob;
import edu.isi.pegasus.planner.partitioner.graph.GraphNode;
import edu.isi.pegasus.planner.refiner.DeployWorkerPackage;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* This strategy for adding create dir jobs to the workflow only adds the minimum
* number of edges from the create dir job to the compute jobs in the workflow.
*
* The strategy involves in walking the graph in a BFS order, and updating a bit set
* associated with each job based on the BitSet of the parent jobs. The BitSet
* indicates whether an edge exists from the create dir job to an ancestor of the node.
*
* For a node, the bit set is the union of all the parents BitSets. The BFS traversal
* ensures that the bitsets are of a node are only updated once the parents have
* been processed
*
*
* @author Karan Vahi
*
* @version $Revision$
*/
public class Minimal extends AbstractStrategy {
/**
* Intializes the class.
*
* @param bag bag of initialization objects
* @param impl the implementation instance that creates create dir job
*/
public void initialize( PegasusBag bag, Implementation impl ){
super.initialize( bag , impl );
}
/**
* Modifies the workflow to add create directory nodes. The workflow passed
* is a worklow, where the jobs have been mapped to sites.
*
* The strategy involves in walking the graph in a BFS order, and updating a
* bit set associated with each job based on the BitSet of the parent jobs.
* The BitSet indicates whether an edge exists from the create dir job to an
* ancestor of the node.
*
* For a node, the bit set is the union of all the parents BitSets. The BFS
* traversal ensures that the bitsets are of a node are only updated once the
* parents have been processed.
*
* @param dag the workflow to which the nodes have to be added.
*
* @return the added workflow
*/
public ADag addCreateDirectoryNodes( ADag dag ){
//PM-747 no need for conversion as ADag now implements Graph interface
return this.addCreateDirectoryNodes( dag , this.getCreateDirSites(dag));
}
/**
* Adds create dir nodes to the workflow.
*
* The strategy involves in walking the graph in a BFS order, and updating a
* bit set associated with each job based on the BitSet of the parent jobs.
* The BitSet indicates whether an edge exists from the create dir job to an
* ancestor of the node.
*
* For a node, the bit set is the union of all the parents BitSets. The BFS
* traversal ensures that the bitsets are of a node are only updated once the
* parents have been processed.
*
* @param workflow the workflow
* @param sites the staging sites the workflow refers to.
*
* @return
*/
public ADag addCreateDirectoryNodes( ADag workflow, Set<String> sites ) {
//the number of sites dictates the size of the BitSet associated with each job.
Map<String, Integer> siteToBitIndexMap = new HashMap();
int bitSetSize = sites.size();
int i = 0;
for( String site: sites ){
siteToBitIndexMap.put( site, i++ );
}
//create the create dir jobs required but don't add to the workflow
//till edges are figured out
//for each execution pool add a create directory node.
Map<GraphNode,List<GraphNode>> createDirChildrenMap = new HashMap();
Map<String,GraphNode> createDirMap = new HashMap();//mas site to the associated create dir node
for (String site: sites ){
String jobName = getCreateDirJobName( workflow, site );
Job newJob = mImpl.makeCreateDirJob( site,
jobName,
mSiteStore.getExternalWorkDirectoryURL( site , FileServer.OPERATION.put ) );
mLogger.log( "Creating create dir node " + jobName , LogManager.DEBUG_MESSAGE_LEVEL );
GraphNode node = new GraphNode( newJob.getID() );
node.setContent(newJob);
createDirChildrenMap.put(node, new LinkedList<GraphNode>());
createDirMap.put( site, node );
}
//we use an identity hash map to associate the nodes with the bitmaps
Map<GraphNode,BitSet> nodeBitMap = new IdentityHashMap( workflow.size() );
//do a BFS walk over the workflow
for( Iterator<GraphNode> it = workflow.iterator(); it.hasNext(); ){
GraphNode node = it.next();
BitSet set = new BitSet( bitSetSize );
Job job = (Job)node.getContent();
String site = getAssociatedCreateDirSite( job );
//check if for stage out jobs there are any parents specified
//or not.
if( job instanceof TransferJob && job.getJobType() == Job.STAGE_OUT_JOB ){
Collection<GraphNode> parents = node.getParents();
boolean skip = false;
if( parents.isEmpty() ){
//means we have a stage out job only. probably the workflow
//was fully reduced in data reuse
skip = true;
}
if( parents.size() == 1 ){
for(GraphNode parent : parents ){
if( parent.getID().startsWith( DeployWorkerPackage.DEPLOY_WORKER_PREFIX)){
//PM-1128 we only have a single parent to a stage out job that is a
//stage worker job. the stage out job is deleting outputs of jobs
//deleted in data reuse
skip = true;
}
}
}
if( skip ){
//means we have a stage out job only. probably the workflow
//was fully reduced in data reuse
mLogger.log( "Not considering job for create dir edges - " + job.getID() , LogManager.DEBUG_MESSAGE_LEVEL );
nodeBitMap.put(node, set);
continue;
}
}
//the set is a union of all the parents set
for( GraphNode parent: node.getParents() ){
BitSet pSet = nodeBitMap.get( parent );
set.or( pSet );
}
if( site == null ){
//only ok for stage worker jobs
if( job instanceof TransferJob || job.getJobType() == Job.REPLICA_REG_JOB ){
mLogger.log( "Not adding edge to create dir job for job " + job.getID(),
LogManager.DEBUG_MESSAGE_LEVEL );
nodeBitMap.put(node, set);
continue;
}
else{
throw new RuntimeException( "Job not associated with staging site " + job.getID() );
}
}
Object value = siteToBitIndexMap.get( site );
if( value == null){
StringBuffer parents = new StringBuffer();
parents.append( "{");
for(GraphNode parent : node.getParents()){
parents.append( parent.getID() ).append(",");
}
parents.append( "}");
throw new RuntimeException( "Create dir site " + site + " for job " + job.getID() +
" with parents " + parents +
" is not present in staging sites for workflow " + createDirMap.keySet() );
}
int index = (Integer)value;
if(! set.get( index ) ){
//none of the parents have an index to the site
//need to add an edge.
//String parent = getCreateDirJobName( dag, site );
GraphNode parent = createDirMap.get( site );
mLogger.log( "Need to add edge " + parent.getID() + " -> " + job.getID(),
LogManager.DEBUG_MESSAGE_LEVEL );
createDirChildrenMap.get( parent ).add( node );
//edge has been added . set the bit to true
set.set( index );
}
//set the bitset of createdirs for the node
nodeBitMap.put(node, set);
}
//for each create dir job add it to the workflow
//and connect the edges
for ( Map.Entry<GraphNode, List<GraphNode>> entry : createDirChildrenMap.entrySet() ){
GraphNode createDirNode = entry.getKey();
List<GraphNode> children = entry.getValue();
mLogger.log( "Adding node to the worfklow " + createDirNode.getID(),
LogManager.DEBUG_MESSAGE_LEVEL );
for( GraphNode child: children ){
createDirNode.addChild(child);
child.addParent( createDirNode );
}
workflow.addNode( createDirNode );
}
return workflow;
}
/**
* Returns the associated site that job is dependant on.
* This is site, whose create dir job should be a parent or an ancestor of
* the job.
*
* @param job the job for which we need the associated create dir site.
*
* @return the site
*/
private String getAssociatedCreateDirSite( Job job ) {
String site = null;
if( job.getJobType() == Job.CHMOD_JOB ){
site = job.getStagingSiteHandle();
}
else{
//the parent in case of a transfer job
//is the non third party site
site = ( job instanceof TransferJob )?
((TransferJob)job).getNonThirdPartySite():
job.getStagingSiteHandle();
if( site == null ){
//only ok for stage worker jobs
if( job instanceof TransferJob ){
mLogger.log( "Not adding edge to create dir job for job " + job.getID(),
LogManager.DEBUG_MESSAGE_LEVEL );
return site;
}
}
}
return site;
}
public boolean addDependency(Job job ){
//put in the dependency only for transfer jobs that stage in data
//or are jobs running on remote sites
//or are compute jobs running on local site
int type = job.getJobType();
boolean local = job.getSiteHandle().equals("local");
if( (job instanceof TransferJob && type != Job.STAGE_OUT_JOB )
|| (!local
|| (type == Job.COMPUTE_JOB /*|| type == Job.STAGED_COMPUTE_JOB*/ || job instanceof DAXJob || job instanceof DAGJob ))){
return true;
}
return false;
}
}