/** * Copyright 2007-2008 University Of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.isi.pegasus.planner.refiner; import edu.isi.pegasus.common.logging.LogManager; import edu.isi.pegasus.common.util.PegasusURL; import edu.isi.pegasus.common.util.Separator; import edu.isi.pegasus.planner.catalog.site.classes.FileServer; import edu.isi.pegasus.planner.catalog.site.classes.SiteCatalogEntry; import edu.isi.pegasus.planner.catalog.transformation.TransformationCatalogEntry; import edu.isi.pegasus.planner.catalog.transformation.classes.TCType; import edu.isi.pegasus.planner.classes.ADag; import edu.isi.pegasus.planner.classes.DAGJob; import edu.isi.pegasus.planner.classes.Job; import edu.isi.pegasus.planner.classes.DAXJob; import edu.isi.pegasus.planner.classes.PegasusBag; import edu.isi.pegasus.planner.classes.TransferJob; import edu.isi.pegasus.planner.namespace.Pegasus; import edu.isi.pegasus.planner.partitioner.graph.GraphNode; import edu.isi.pegasus.planner.refiner.createdir.AbstractStrategy; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.BitSet; import java.util.Collection; import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; /** * This adds leaf cleanup jobs to the workflow. The strategy is symmetric to the * one used for adding the create dir jobs to the workflow. * * @author Karan Vahi * @version $Revision$ * @see CreateDirectory */ public class RemoveDirectory extends Engine { /** * The logical name of the transformation that removes directories on the * remote execution pools. */ public static final String TRANSFORMATION_NAME = "cleanup"; /** * The basename of the pegasus dirmanager executable. */ public static final String REMOVE_DIR_EXECUTABLE_BASENAME = "pegasus-transfer"; /** * The transformation namespace for the create dir jobs. */ public static final String TRANSFORMATION_NAMESPACE = "pegasus"; /** * The version number for the derivations for create dir jobs. */ public static final String TRANSFORMATION_VERSION = null; /** * The derivation namespace for the create dir jobs. */ public static final String DERIVATION_NAMESPACE = "pegasus"; /** * The logical name of the transformation that removes directories on the * remote execution pools. */ public static final String DERIVATION_NAME = "cleanup"; /** * The version number for the derivations for create dir jobs. */ public static final String DERIVATION_VERSION = "1.0"; /** * Constant suffix for the names of the deployment nodes. */ public static final String CLEANUP_PREFIX = "cleanup_"; /** * The concrete dag so far, for which the clean up dag needs to be generated. */ private ADag mConcDag; /** * Boolean indicating whether we need to transfer dirmanager from the submit * host. */ private boolean mTransferFromSubmitHost; /** * A convenience method to return the complete transformation name being * used to construct jobs in this class. * * @return the complete transformation name */ public static String getCompleteTranformationName(){ return Separator.combine( TRANSFORMATION_NAMESPACE, TRANSFORMATION_NAME, TRANSFORMATION_VERSION ); } /** * The submit directory for the workflow. */ private String mSubmitDirectory; /** * The job prefix that needs to be applied to the job url basenames. */ protected String mJobPrefix; /** * The overloaded constructor that sets the dag for which we have to * generated the cleanup dag for. * * @param concDag the concrete dag for which cleanup is reqd. * @param bag the bag of initialization objects * @param submitDirectory the submit directory for the cleanup workflow */ public RemoveDirectory( ADag concDag, PegasusBag bag, String submitDirectory ) { super( bag ); mConcDag = concDag; mTransferFromSubmitHost = bag.getPegasusProperties().transferWorkerPackage(); mSubmitDirectory = submitDirectory; mJobPrefix = bag.getPlannerOptions().getJobnamePrefix(); } /** * Modifies the workflow to add remove directory nodes. The workflow passed * is a worklow, where the jobs have been mapped to sites. * * The strategy involves in walking the graph in a BFS order, and updating a * bit set associated with each job based on the BitSet of the parents jobs. * The BitSet indicates whether an edge exists from the descendant of a node * to the remove dir job. * * For a node, the bit set is the union of all the parents BitSets. The BFS * traversal ensures that the bitsets are of a node are only updated once the * parents have been processed. * * @param dag the workflow to which the nodes have to be added. * * @return the added workflow */ public ADag addRemoveDirectoryNodes( ADag dag ){ //PM-747 no need for conversion as ADag now implements Graph interface return this.addRemoveDirectoryNodes( dag ,this.getCreateDirSites(dag)); } /** * Adds create dir nodes to the workflow. * * The strategy involves in walking the graph in a bottom up BFS order, and updating a * bit set associated with each job based on the BitSet of the children jobs. * The BitSet indicates whether an edge exists from the descendant of the node * to the remove directory node. * * For a node, the bit set is the union of all the children BitSets. The BFS * traversal ensures that the bitsets are of a node are only updated once the * children have been processed. * * @param workflow the workflow * @param sites the staging sites the workflow refers to. * * @return */ public ADag addRemoveDirectoryNodes( ADag workflow, Set<String> sites ) { //the number of sites dictates the size of the BitSet associated with each job. Map<String, Integer> siteToBitIndexMap = new HashMap(); int bitSetSize = sites.size(); int i = 0; for( String site: sites ){ siteToBitIndexMap.put( site, i++ ); } //create the remove dir jobs required but don't add to the workflow //till edges are figured out //for each execution pool add a remove directory node. Map<GraphNode,Set<GraphNode>> removeDirParentsMap = new HashMap(); Map<String,GraphNode> removeDirMap = new HashMap();//mas site to the associated remove dir node for (String site: sites ){ String jobName = getRemoveDirJobName( workflow, site ); Job newJob = this.makeRemoveDirJob( site, jobName ); mLogger.log( "Creating remove directory node " + jobName , LogManager.DEBUG_MESSAGE_LEVEL ); GraphNode node = new GraphNode( newJob.getID() ); node.setContent(newJob); removeDirParentsMap.put(node, new LinkedHashSet<GraphNode>()); removeDirMap.put( site, node ); } //we use an identity hash map to associate the nodes with the bitmaps Map<GraphNode,BitSet> nodeBitMap = new IdentityHashMap( workflow.size() ); //do a BFS walk over the workflow for( Iterator<GraphNode> it = workflow.bottomUpIterator(); it.hasNext(); ){ GraphNode node = it.next(); BitSet set = new BitSet( bitSetSize ); Job job = (Job)node.getContent(); String site = getAssociatedCreateDirSite( job ); //PM-795 for each DAX|DAG job in the workflow, we need to add //a dependency to all the leaf cleanup jobs if( job instanceof DAXJob || job instanceof DAGJob ){ for ( Map.Entry<GraphNode, Set<GraphNode>> entry : removeDirParentsMap.entrySet() ){ GraphNode removeDirNode = entry.getKey(); Set<GraphNode> parents = entry.getValue(); mLogger.log( "Need to add edge for DAX|DAG job " + job.getID() + " -> " + removeDirNode.getID(), LogManager.DEBUG_MESSAGE_LEVEL ); parents.add(node); } } //check if for stage out jobs there are any parents specified //or not. if( job instanceof TransferJob && job.getJobType() == Job.STAGE_OUT_JOB ){ Collection<GraphNode> parents = node.getParents(); boolean skip = false; if( parents.isEmpty() ){ //means we have a stage out job only. probably the workflow //was fully reduced in data reuse skip = true; } if( parents.size() == 1 ){ for(GraphNode parent : parents ){ if( parent.getID().startsWith( DeployWorkerPackage.DEPLOY_WORKER_PREFIX)){ //PM-1128 we only have a single parent to a stage out job that is a //stage worker job. the stage out job is deleting outputs of jobs //deleted in data reuse skip = true; } } } if( skip ){ //means we have a stage out job only. probably the workflow //was fully reduced in data reuse mLogger.log( "Not considering job for remove dir edges - " + job.getID() , LogManager.DEBUG_MESSAGE_LEVEL ); nodeBitMap.put(node, set); continue; } } if( job.getJobType() == Job.CREATE_DIR_JOB ){ //no need to do anything for the create dir jobs continue; } //the set is a union of all the children's set for( GraphNode child: node.getChildren()){ BitSet cSet = nodeBitMap.get( child ); set.or( cSet ); } if( site == null ){ //only ok for stage worker jobs if( job instanceof TransferJob || job.getJobType() == Job.REPLICA_REG_JOB ){ mLogger.log( "Not adding edge to leaf cleanup job for job " + job.getID(), LogManager.DEBUG_MESSAGE_LEVEL ); nodeBitMap.put(node, set); continue; } else{ throw new RuntimeException( "Job not associated with staging site " + job.getID() ); } } //int index = siteToBitIndexMap.get( site ); Object value = siteToBitIndexMap.get( site ); if( value == null){ throw new RuntimeException( "Remove dir site " + site + " for job " + job.getID() + " is not present in staging sites for workflow " + removeDirMap.keySet() ); } int index = (Integer)value; if(! set.get( index ) ){ //none of the parents have an index to the site //need to add an edge. GraphNode child = removeDirMap.get( site ); mLogger.log( "Need to add edge " + job.getID() + " -> " + child.getID(), LogManager.DEBUG_MESSAGE_LEVEL ); removeDirParentsMap.get( child ).add( node ); //edge has been added . set the bit to true set.set( index ); } //set the bitset of remove dirs for the node nodeBitMap.put(node, set); } //for each leaf cleanup job add it to the workflow //and connect the edges for ( Map.Entry<GraphNode, Set<GraphNode>> entry : removeDirParentsMap.entrySet() ){ GraphNode removeDirNode = entry.getKey(); Set<GraphNode> parents = entry.getValue(); mLogger.log( "Adding node to the worklfow " + removeDirNode.getID(), LogManager.DEBUG_MESSAGE_LEVEL ); for( GraphNode parent: parents ){ removeDirNode.addParent(parent); parent.addChild( removeDirNode ); } workflow.addNode( removeDirNode ); } return workflow; } /** * Retrieves the sites for which the create dir jobs need to be created. * It returns all the sites where the compute jobs have been scheduled. * * * @return a Set containing a list of siteID's of the sites where the * dag has to be run. */ protected Set getCreateDirSites( ADag dag ){ return AbstractStrategy.getCreateDirSites(dag); } /** * It returns the name of the remove directory job, that is to be assigned. * The name takes into account the workflow name while constructing it, as * that is thing that can guarentee uniqueness of name in case of deferred * planning. * * @param dag the dag for which the cleanup DAG is being generated. * @param site the execution site for which the remove directory job * is responsible. * * @return String corresponding to the name of the job. */ private String getRemoveDirJobName(ADag dag,String site){ StringBuffer sb = new StringBuffer(); //append setup prefix sb.append( DeployWorkerPackage.CLEANUP_PREFIX ); //append the job prefix if specified in options at runtime if ( mJobPrefix != null ) { sb.append( mJobPrefix ); } sb.append( dag.getLabel() ).append( "_" ). append( dag.getIndex() ).append( "_" ); sb.append( site ); return sb.toString(); } /** * It creates a remove directory job that creates a directory on the remote pool * using the perl executable that Gaurang wrote. It access mkdir underneath. * It gets the name of the random directory from the Pool handle. * * @param site the execution pool for which the create dir job is to be * created. * @param jobName the name that is to be assigned to the job. * * @return the remove dir job. */ public Job makeRemoveDirJob( String site, String jobName ) { List<String> urls = new LinkedList<String>(); List<String> files = new LinkedList<String>(); //the externally accessible url to the directory/ workspace for the workflow urls.add( mSiteStore.getExternalWorkDirectoryURL( site, FileServer.OPERATION.put ) ); files.add(mSiteStore.getInternalWorkDirectory( site, null ) ); return makeRemoveDirJob( site, jobName, urls, files ); } /** * It creates a remove directory job that creates a directory on the remote site * using pegasus-transfer executable * * @param site the site from where the directory need to be removed. * @param jobName the name that is to be assigned to the job. * @param urls the list of urls for the files to be cleaned up. * * @return the remove dir job. */ public Job makeRemoveDirJob( String site, String jobName, List<String> urls ) { return this.makeRemoveDirJob(site, jobName, urls, null ); } /** * It creates a remove directory job that creates a directory on the remote site * using pegasus-transfer executable * * @param site the site from where the directory need to be removed. * @param jobName the name that is to be assigned to the job. * @param urls the list of urls for the files to be cleaned u * @param files the corresponding list of file url paths. * * @return the remove dir job. */ public Job makeRemoveDirJob( String site, String jobName, List<String> urls, List<String> files ) { Job newJob = new Job(); List entries = null; String execPath = null; TransformationCatalogEntry entry = null; //PM-773 we only do checks for leaf cleanup jobs boolean additionalChecks = !(files == null); if( additionalChecks && urls.size() != files.size() ){ throw new RuntimeException( "Mismatch in URLS and corresponding files " + urls.size() + "," + files.size()); } //the site where the cleanup job will run String eSite = "local"; SiteCatalogEntry siteEntry = mSiteStore.lookup( site ); int index = 0; for( String url: urls ){ if( url.startsWith( PegasusURL.FILE_URL_SCHEME ) ){ if( !siteEntry.isVisibleToLocalSite() ){ //means the cleanup job should run on the staging site mLogger.log( "Directory URL is a file url for site " + site + " " + urls, LogManager.DEBUG_MESSAGE_LEVEL ); eSite = site; } } } //PM-833 set the relative submit directory for the transfer //job based on the associated file factory newJob.setRelativeSubmitDirectory( this.mSubmitDirMapper.getRelativeDir(newJob)); //PM-773 if( additionalChecks ){ String submitDir = mPOptions.getSubmitDirectory(); //check if the submit directory is the same the file being asked to remove for( String file: files ){ if( submitDir.equals( file) ){ //if the staging site is local then it is fatal error //else we log a warning String error = "The submit directory and the scratch directory for the cleanup job match " + file; if( site.equals( "local") ){ error += " . This will result in the cleanup job removing the submit directory as the workflow is running."; throw new RuntimeException( error ); } else{ mLogger.log( error, LogManager.WARNING_MESSAGE_LEVEL ); } } } } SiteCatalogEntry ePool = mSiteStore.lookup( eSite ); try { entries = mTCHandle.lookup( RemoveDirectory.TRANSFORMATION_NAMESPACE, RemoveDirectory.TRANSFORMATION_NAME, RemoveDirectory.TRANSFORMATION_VERSION, eSite, TCType.INSTALLED); } catch (Exception e) { //non sensical catching mLogger.log("Unable to retrieve entry from TC " + e.getMessage(), LogManager.DEBUG_MESSAGE_LEVEL ); } entry = ( entries == null ) ? this.defaultTCEntry( ePool ): //try using a default one (TransformationCatalogEntry) entries.get(0); if( entry == null ){ //NOW THROWN AN EXCEPTION //should throw a TC specific exception StringBuffer error = new StringBuffer(); error.append("Could not find entry in tc for lfn "). append( this.getCompleteTranformationName() ). append(" at site ").append( eSite ); mLogger.log( error.toString(), LogManager.ERROR_MESSAGE_LEVEL); throw new RuntimeException( error.toString() ); } if( mTransferFromSubmitHost ){ /* //we are using mkdir directly argString = " -p " + mPoolHandle.getExecPoolWorkDir( execPool ); execPath = "mkdir"; //path variable needs to be set newJob.envVariables.construct( "PATH", CreateDirectory.PATH_VALUE ); */ newJob.vdsNS.construct( Pegasus.GRIDSTART_KEY, "None" ); StringBuffer sb = new StringBuffer(); sb.append( mProps.getBinDir() ). append( File.separator ).append( RemoveDirectory.REMOVE_DIR_EXECUTABLE_BASENAME ); execPath = sb.toString(); newJob.condorVariables.construct( "transfer_executable", "true" ); } else{ execPath = entry.getPhysicalTransformation(); } //prepare the stdin for the cleanup job String stdIn = jobName + ".in"; try{ BufferedWriter writer; File directory = new File( this.mSubmitDirectory, newJob.getRelativeSubmitDirectory() ); writer = new BufferedWriter( new FileWriter( new File( directory, stdIn ) )); writer.write("[\n"); int fileNum = 1; for( String file: urls ){ if (fileNum > 1) { writer.write(" ,\n"); } writer.write(" {\n"); writer.write(" \"id\": " + fileNum + ",\n"); writer.write(" \"type\": \"remove\",\n"); writer.write(" \"target\": {"); writer.write(" \"site_label\": \"" + site + "\","); writer.write(" \"url\": \"" + file + "\","); writer.write(" \"recursive\": \"True\""); writer.write(" }"); writer.write(" }\n"); //associate a credential if required newJob.addCredentialType( site, file ); } writer.write("]\n"); //closing the handle to the writer writer.close(); } catch(IOException e){ mLogger.log( "While writing the stdIn file " + e.getMessage(), LogManager.ERROR_MESSAGE_LEVEL); throw new RuntimeException( "While writing the stdIn file " + stdIn, e ); } //set the stdin url for the job newJob.setStdIn( stdIn ); newJob.jobName = jobName; newJob.setTransformation( RemoveDirectory.TRANSFORMATION_NAMESPACE, RemoveDirectory.TRANSFORMATION_NAME, RemoveDirectory.TRANSFORMATION_VERSION ); newJob.setDerivation( RemoveDirectory.DERIVATION_NAMESPACE, RemoveDirectory.DERIVATION_NAME, RemoveDirectory.DERIVATION_VERSION ); newJob.executable = execPath; newJob.setSiteHandle( eSite ); newJob.jobClass = Job.CLEANUP_JOB; newJob.jobID = jobName; newJob.setArguments( "" ); //the profile information from the pool catalog needs to be //assimilated into the job. newJob.updateProfiles( mSiteStore.lookup( newJob.getSiteHandle() ).getProfiles() ); //add any notifications specified in the transformation //catalog for the job. JIRA PM-391 newJob.addNotifications( entry ); //the profile information from the transformation //catalog needs to be assimilated into the job //overriding the one from pool catalog. newJob.updateProfiles(entry); //the profile information from the properties url //is assimilated overidding the one from transformation //catalog. newJob.updateProfiles(mProps); return newJob; } /** * Returns a default TC entry to be used in case entry is not found in the * transformation catalog. * * @param site the SiteCatalogEntry for the site for which the default entry is required. * * * @return the default entry. */ private TransformationCatalogEntry defaultTCEntry( SiteCatalogEntry site ){ TransformationCatalogEntry defaultTCEntry = null; //check if PEGASUS_HOME is set String home = site.getPegasusHome(); //if PEGASUS_HOME is not set, use VDS_HOME home = ( home == null )? site.getVDSHome( ): home; mLogger.log( "Creating a default TC entry for " + RemoveDirectory.getCompleteTranformationName() + " at site " + site.getSiteHandle(), LogManager.DEBUG_MESSAGE_LEVEL ); //if home is still null if ( home == null ){ //cannot create default TC mLogger.log( "Unable to create a default entry for " + RemoveDirectory.getCompleteTranformationName(), LogManager.DEBUG_MESSAGE_LEVEL ); //set the flag back to true return defaultTCEntry; } //remove trailing / if specified home = ( home.charAt( home.length() - 1 ) == File.separatorChar )? home.substring( 0, home.length() - 1 ): home; //construct the path to it StringBuffer path = new StringBuffer(); path.append( home ).append( File.separator ). append( "bin" ).append( File.separator ). append( RemoveDirectory.REMOVE_DIR_EXECUTABLE_BASENAME ); defaultTCEntry = new TransformationCatalogEntry( RemoveDirectory.TRANSFORMATION_NAMESPACE, RemoveDirectory.TRANSFORMATION_NAME, RemoveDirectory.TRANSFORMATION_VERSION ); defaultTCEntry.setPhysicalTransformation( path.toString() ); defaultTCEntry.setResourceId( site.getSiteHandle() ); defaultTCEntry.setType( TCType.INSTALLED ); defaultTCEntry.setSysInfo( site.getSysInfo() ); //register back into the transformation catalog //so that we do not need to worry about creating it again try{ mTCHandle.insert( defaultTCEntry , false ); } catch( Exception e ){ //just log as debug. as this is more of a performance improvement //than anything else mLogger.log( "Unable to register in the TC the default entry " + defaultTCEntry.getLogicalTransformation() + " for site " + site, e, LogManager.DEBUG_MESSAGE_LEVEL ); } return defaultTCEntry; } /** * Returns the associated site that job is dependant on. * This is site, whose create dir job should be a parent or an ancestor of * the job. * * @param job the job for which we need the associated create dir site. * * @return the site */ private String getAssociatedCreateDirSite( Job job ) { String site = null; if( job.getJobType() == Job.CHMOD_JOB ){ site = job.getStagingSiteHandle(); } else{ //the parent in case of a transfer job //is the non third party site site = ( job instanceof TransferJob )? ((TransferJob)job).getNonThirdPartySite(): job.getStagingSiteHandle(); if( site == null ){ //only ok for stage worker jobs if( job instanceof TransferJob ){ mLogger.log( "Not adding edge to leaf cleanup job for job " + job.getID(), LogManager.DEBUG_MESSAGE_LEVEL ); return site; } } } return site; } }