/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.refiner;
import edu.isi.pegasus.planner.catalog.site.classes.FileServer;
import edu.isi.pegasus.planner.catalog.site.classes.SiteCatalogEntry;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.FileTransfer;
import edu.isi.pegasus.planner.classes.NameValue;
import edu.isi.pegasus.planner.classes.PegasusFile;
import edu.isi.pegasus.planner.classes.ReplicaLocation;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.common.PegasusProperties;
import edu.isi.pegasus.planner.namespace.Pegasus;
import edu.isi.pegasus.planner.partitioner.graph.GraphNode;
import edu.isi.pegasus.planner.partitioner.graph.Graph;
import edu.isi.pegasus.planner.selector.ReplicaSelector;
import edu.isi.pegasus.planner.selector.replica.ReplicaSelectorFactory;
import edu.isi.pegasus.planner.transfer.Refiner;
import edu.isi.pegasus.planner.transfer.refiner.RefinerFactory;
import edu.isi.pegasus.planner.catalog.ReplicaCatalog;
import edu.isi.pegasus.planner.catalog.replica.ReplicaCatalogEntry;
import edu.isi.pegasus.common.util.FactoryException;
import edu.isi.pegasus.common.util.PegasusURL;
import edu.isi.pegasus.planner.catalog.replica.ReplicaFactory;
import edu.isi.pegasus.planner.catalog.site.classes.Directory;
import edu.isi.pegasus.planner.catalog.site.classes.FileServerType.OPERATION;
import edu.isi.pegasus.planner.classes.DAGJob;
import edu.isi.pegasus.planner.classes.DAXJob;
import edu.isi.pegasus.planner.classes.PlannerCache;
import edu.isi.pegasus.planner.mapper.SubmitMapper;
import edu.isi.pegasus.planner.common.PegasusConfiguration;
import edu.isi.pegasus.planner.mapper.SubmitMapperFactory;
import edu.isi.pegasus.planner.namespace.Dagman;
import edu.isi.pegasus.planner.mapper.OutputMapper;
import edu.isi.pegasus.planner.mapper.OutputMapperFactory;
import edu.isi.pegasus.planner.mapper.StagingMapper;
import edu.isi.pegasus.planner.mapper.StagingMapperFactory;
import edu.isi.pegasus.planner.mapper.output.Hashed;
import edu.isi.pegasus.planner.namespace.Metadata;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.Properties;
/**
* The transfer engine, which on the basis of the pools on which the jobs are to
* run, adds nodes to transfer the data products.
*
* @author Karan Vahi
* @author Gaurang Mehta
* @version $Revision$
*
*/
public class TransferEngine extends Engine {
/**
* The MAX level is assigned as the level for deleted jobs.
* We can put it to Integer.MAX_VALUE, but it is rare that number of levels
* in a workflows exceed 1000.
*/
public static final int DELETED_JOBS_LEVEL = 1000;
/**
* The name of the Replica Catalog Implementer that is used to write out
* the workflow cache file in the submit directory.
*/
public static final String WORKFLOW_CACHE_FILE_IMPLEMENTOR = "FlushedCache";
/**
* The name of the source key for Replica Catalog Implementer that serves as
* cache
*/
public static final String WORKFLOW_CACHE_REPLICA_CATALOG_KEY = "file";
/**
* The property prefix for retrieving SRM properties.
*/
public static final String SRM_PROPERTIES_PREFIX = "pegasus.transfer.srm";
/**
* The suffix to retrive the service url for SRM server.
*/
public static final String SRM_SERVICE_URL_PROPERTIES_SUFFIX = "service.url";
/**
* The suffix to retrive the mount point for SRM server.
*/
public static final String SRM_MOUNT_POINT_PROPERTIES_SUFFIX = "mountpoint";
/**
* The name of the refiner for purposes of error logging
*/
public static final String REFINER_NAME = "TranferEngine";
/**
* A map that associates the site name with the SRM server url and mount point.
*/
private Map<String, NameValue> mSRMServiceURLToMountPointMap;
/**
* The DAG object to which the transfer nodes are to be added. This is the
* reduced Dag, which is got from the Reduction Engine.
*/
private ADag mDag;
/**
* The bridge to the Replica Catalog.
*/
private ReplicaCatalogBridge mRCBridge;
/**
* The handle to the replica selector that is to used to select the various
* replicas.
*/
private ReplicaSelector mReplicaSelector;
/**
* The handle to the transfer refiner that adds the transfer nodes into the
* workflow.
*/
private Refiner mTXRefiner;
/**
* Holds all the jobs deleted by the reduction algorithm.
*/
private List<Job> mDeletedJobs;
/**
* A SimpleFile Replica Catalog, that tracks all the files that are being
* materialized as part of workflow executaion.
*/
private PlannerCache mPlannerCache;
/**
* A Replica Catalog, that tracks all the GET URL's for the files on the
* staging sites.
*/
private ReplicaCatalog mWorkflowCache;
/**
* Handle to an OutputMapper that tells where to place the files on the
* output site.
*/
private OutputMapper mOutputMapper;
/**
* Handle to an Staging Mapper that tells where to place the files on the
* shared scratch space on the staging site.
*/
private StagingMapper mStagingMapper;
/**
* The working directory relative to the mount point of the execution pool.
* It is populated from the pegasus.dir.exec property from the properties file.
* If not specified then it work_dir is supposed to be the exec mount point
* of the execution pool.
*/
protected String mWorkDir;
/**
* This member variable if set causes the destination URL for the symlink jobs
* to have symlink:// url if the pool attributed associated with the pfn
* is same as a particular jobs execution pool.
*/
protected boolean mUseSymLinks;
/**
* A boolean indicating whether we are doing worker node execution or not.
*/
//private boolean mWorkerNodeExecution;
/**
* A boolean indicating whether to bypass first level staging for inputs
*/
private boolean mBypassStagingForInputs;
/**
* A boolean to track whether condor file io is used for the workflow or not.
*/
//private final boolean mSetupForCondorIO;
private PegasusConfiguration mPegasusConfiguration;
/**
* The output site where files need to be staged to.
*/
private final String mOutputSite;
/**
* Overloaded constructor.
*
* @param reducedDag the reduced workflow.
* @param bag bag of initialization objects
* @param deletedJobs list of all jobs deleted by reduction algorithm.
* @param deletedLeafJobs list of deleted leaf jobs by reduction algorithm.
*/
public TransferEngine( ADag reducedDag,
PegasusBag bag,
List<Job> deletedJobs ,
List<Job> deletedLeafJobs){
super( bag );
mSubmitDirMapper = SubmitMapperFactory.loadInstance( bag, new File(mPOptions.getSubmitDirectory()));
bag.add(PegasusBag.PEGASUS_SUBMIT_MAPPER, mSubmitDirMapper );
mStagingMapper = StagingMapperFactory.loadInstance(bag);
bag.add(PegasusBag.PEGASUS_STAGING_MAPPER, mStagingMapper );
mUseSymLinks = mProps.getUseOfSymbolicLinks();
mSRMServiceURLToMountPointMap = constructSiteToSRMServerMap( mProps );
mDag = reducedDag;
mDeletedJobs = deletedJobs;
mBypassStagingForInputs = mProps.bypassFirstLevelStagingForInputs();
mPegasusConfiguration = new PegasusConfiguration( bag.getLogger() );
try{
mTXRefiner = RefinerFactory.loadInstance( reducedDag,
bag );
mReplicaSelector = ReplicaSelectorFactory.loadInstance(mProps);
}
catch(Exception e){
//wrap all the exceptions into a factory exception
throw new FactoryException("Transfer Engine ", e);
}
mOutputSite = mPOptions.getOutputSite();
mOutputMapper = OutputMapperFactory.loadInstance( reducedDag, bag);
mWorkflowCache = this.initializeWorkflowCacheFile( reducedDag );
//log some configuration messages
mLogger.log("Transfer Refiner loaded is [" + mTXRefiner.getDescription() +
"]",LogManager.CONFIG_MESSAGE_LEVEL);
mLogger.log("ReplicaSelector loaded is [" + mReplicaSelector.description() +
"]",LogManager.CONFIG_MESSAGE_LEVEL);
mLogger.log("Submit Directory Mapper loaded is [" + mSubmitDirMapper.description() +
"]",LogManager.CONFIG_MESSAGE_LEVEL);
mLogger.log("Staging Mapper loaded is [" + mStagingMapper.description() +
"]",LogManager.CONFIG_MESSAGE_LEVEL);
mLogger.log("Output Mapper loaded is [" + mOutputMapper.description() +
"]",LogManager.CONFIG_MESSAGE_LEVEL);
}
/**
* Determines a particular created transfer pair has to be binned
* for remote transfer or local.
*
* @param job the associated compute job
* @param ft the file transfer created
* @param stagingSite the staging site for the job
* @return
*/
private boolean runTransferRemotely(Job job , SiteCatalogEntry stagingSite, FileTransfer ft) {
boolean remote = false;
NameValue destTX = ft.getDestURL();
for( String sourceSite: ft.getSourceSites() ){
//traverse through all the URL's on that site
for( ReplicaCatalogEntry rce : ft.getSourceURLs(sourceSite) ){
String sourceURL = rce.getPFN();
//if the source URL is a FILE URL and
//source site matches the destination site
//then has to run remotely
if( sourceURL != null && sourceURL.startsWith( PegasusURL.FILE_URL_SCHEME ) ){
//sanity check to make sure source site
//matches destination site
if( sourceSite.equalsIgnoreCase( destTX.getKey()) ){
if( sourceSite.equalsIgnoreCase( stagingSite.getSiteHandle() ) && stagingSite.isVisibleToLocalSite() ){
//PM-1024 if the source also matches the job staging site
//then we do an extra check if the staging site is the same
//as the sourceSite, then we consider the auxillary.local attribute
//for the staging site
remote = false;
}
else{
remote = true;
break;
}
}
else if( sourceSite.equals( "local") ){
remote = false;
}
}
}
}
return remote;
}
/**
* Removes file URL's from FT sources that if the site attribute for it
* does not match site handle passed
*
* @param job
* @param ft
* @param site
*/
public boolean removeFileURLFromSource( Job job, FileTransfer ft, String site ){
boolean remove = false;
for( String sourceSite: ft.getSourceSites() ){
//traverse through all the URL's on that site
for( Iterator<ReplicaCatalogEntry> it = ft.getSourceURLs(sourceSite).iterator(); it.hasNext(); ){
ReplicaCatalogEntry rce = it.next();
String sourceURL = rce.getPFN();
//if the source URL is a FILE URL and
//source site matches the destination site
//then has to run remotely
if( sourceURL != null && sourceURL.startsWith( PegasusURL.FILE_URL_SCHEME ) ){
if( !sourceSite.equalsIgnoreCase( site ) ){
//source site associated with file URL does
//not match the site attribute. remove the source url
mLogger.log( "Removing source url " + sourceURL + " associated with site " + sourceSite +
" for job " + job.getID(),
LogManager.TRACE_MESSAGE_LEVEL );
it.remove();
remove = true;
}
}
}
}
return remove;
}
/**
* Returns whether to run a transfer job on local site or not.
*
*
* @param site the site entry associated with the destination URL.
* @param destPutURL the destination URL
* @param type the type of transfer job for which the URL is being constructed.
*
* @return true indicating if the associated transfer job should run on local
* site or not.
*/
public boolean runTransferOnLocalSite( SiteCatalogEntry site, String destinationURL, int type) {
//check if user has specified any preference in config
boolean result = true;
String siteHandle = site.getSiteHandle();
//short cut for local site
if( siteHandle.equals( "local" ) ){
//transfer to run on local site
return result;
}
//PM-1024 check if the filesystem on site visible to the local site
if( site.isVisibleToLocalSite() ){
return true;
}
if( mTXRefiner.refinerPreferenceForTransferJobLocation() ){
//refiner is advertising a preference for where transfer job
//should be run. Use that.
return mTXRefiner.refinerPreferenceForLocalTransferJobs( type );
}
if( mTXRefiner.runTransferRemotely(siteHandle, type )){
//always use user preference
return !result;
}
//check to see if destination URL is a file url
else if( destinationURL != null && destinationURL.startsWith( PegasusURL.FILE_URL_SCHEME ) ){
result = false;
}
return result;
}
/**
* Adds the transfer nodes to the workflow.
*
* @param rcb the bridge to the ReplicaCatalog.
* @param plannerCache an instance of the replica catalog that will
* store the locations of the files on the remote
* sites.
*/
public void addTransferNodes( ReplicaCatalogBridge rcb, PlannerCache plannerCache ) {
mRCBridge = rcb;
mRCBridge.mSubmitDirMapper = this.mSubmitDirMapper;
mPlannerCache = plannerCache;
Job currentJob;
String currentJobName;
Vector vOutPoolTX;
String msg;
String outputSite = mPOptions.getOutputSite();
//convert the dax to a graph representation and walk it
//in a top down manner
//PM-747 no need for conversion as ADag now implements Graph interface
Graph workflow = mDag;
//go through each job in turn
boolean stageOut = (( outputSite != null ) && ( outputSite.trim().length() > 0 ));
for( Iterator it = workflow.iterator(); it.hasNext(); ){
GraphNode node = ( GraphNode )it.next();
currentJob = (Job)node.getContent();
//PM-833 associate a directory with the job
//that is used to determine relative submit directory
currentJob.setRelativeSubmitDirectory( getRelativeSubmitDirectory( currentJob ) );
//set the node depth as the level
currentJob.setLevel( node.getDepth() );
currentJobName = currentJob.getName();
mLogger.log("",LogManager.DEBUG_MESSAGE_LEVEL);
msg = "Job being traversed is " + currentJobName;
mLogger.log(msg, LogManager.DEBUG_MESSAGE_LEVEL);
msg = "To be run at " + currentJob.executionPool;
mLogger.log(msg, LogManager.DEBUG_MESSAGE_LEVEL);
//getting the parents of that node
Collection<GraphNode> parents = node.getParents();
mLogger.log("Parents of job:" + node.parentsToString(),
LogManager.DEBUG_MESSAGE_LEVEL);
processParents(currentJob, parents);
//transfer the nodes output files
//to the output pool
if ( stageOut ) {
SiteCatalogEntry stagingSite = mSiteStore.lookup( currentJob.getStagingSiteHandle() );
if (stagingSite == null ) {
mLogMsg = this.poolNotFoundMsg( currentJob.getSiteHandle(), "vanilla");
mLogger.log( mLogMsg, LogManager.ERROR_MESSAGE_LEVEL );
throw new RuntimeException( mLogMsg );
}
//PM-590 Stricter checks
String stagingSiteURLPrefix = stagingSite.selectHeadNodeScratchSharedFileServerURLPrefix( FileServer.OPERATION.put );
if( stagingSiteURLPrefix == null ){
this.complainForHeadNodeURLPrefix( REFINER_NAME, stagingSite.getSiteHandle(), FileServer.OPERATION.put, currentJob );
}
boolean localTransfer = runTransferOnLocalSite(
stagingSite,
stagingSiteURLPrefix,
Job.STAGE_OUT_JOB);
vOutPoolTX = getFileTX(outputSite, currentJob, localTransfer );
mTXRefiner.addStageOutXFERNodes( currentJob, vOutPoolTX, rcb, localTransfer );
}
else{
//create the cache file always
//Pegasus Bug PM-32 and PM-356
trackInCaches( currentJob );
}
}
//we are done with the traversal.
//mTXRefiner.done();
//get the deleted leaf jobs o/p files to output pool
//only if output pool is specified
//should be moved upwards in the pool. redundancy at present
if (outputSite != null &&
outputSite.trim().length() > 0
&& !mDeletedJobs.isEmpty() ) {
mLogger.log( "Adding stage out jobs for jobs deleted from the workflow", LogManager.INFO_MESSAGE_LEVEL );
for( Iterator it = this.mDeletedJobs.iterator(); it.hasNext() ;) {
currentJob = (Job)it.next();
currentJob.setLevel( TransferEngine.DELETED_JOBS_LEVEL );
//for a deleted node, to transfer it's output
//the execution pool should be set to local i.e submit host
currentJob.setSiteHandle( "local" );
//PM-936 set the staging site for the deleted job
//to local site
currentJob.setStagingSiteHandle( "local" );
//for jobs deleted during data reuse we dont
//go through the staging site. they are transferred
//directly to the output site.
vOutPoolTX = getDeletedFileTX(outputSite, currentJob);
if( !vOutPoolTX.isEmpty() ){
//the job is deleted anyways. The files exist somewhere
//as mentioned in the Replica Catalog. We assume it is
//URL remotely accessible
boolean localTransfer = true;
mTXRefiner.addStageOutXFERNodes( currentJob, vOutPoolTX, rcb, localTransfer, true );
}
}
}
//we are done with the traversal.
mTXRefiner.done();
//close the handle to the workflow cache file if it is written
//not the planner cache file
this.mWorkflowCache.close();
}
/**
* Returns the staging site to be used for a job. If a staging site is not
* determined from the options it is set to be the execution site for the job
*
* @param job the job for which to determine the staging site
*
* @return the staging site
*/
public String getStagingSite( Job job ){
/*
String ss = this.mPOptions.getStagingSite( job.getSiteHandle() );
return (ss == null) ? job.getSiteHandle(): ss;
*/
return job.getStagingSiteHandle();
}
/**
* This gets the file transfer objects corresponding to the location of files
* found in the replica mechanism, and transfers it to the output pool asked
* by the user. If the output pool path and the one returned by the replica
* mechanism match then that object is not transferred.
*
* @param destSite this the output pool which the user specifies at runtime.
* @param job The Job object corresponding to the leaf job which was
* deleted by the Reduction algorithm
*
* @return Vector of <code>FileTransfer</code> objects
*/
private Vector getDeletedFileTX( String destSite, Job job ) {
Vector vFileTX = new Vector();
SiteCatalogEntry outputSite = mSiteStore.lookup(destSite);
for( Iterator it = job.getOutputFiles().iterator(); it.hasNext(); ){
PegasusFile pf = (PegasusFile)it.next();
String lfn = pf.getLFN();
//PM-739 all output files for deleted jobs should have their
//cleanup flag set to false. these output files are not
//generated during the workflow, but are retrieved from a
//location specified in the replica catalog.
pf.setForCleanup( false );
//we only have to get a deleted file that user wants to be transferred
if( pf.getTransientTransferFlag() ){
continue;
}
ReplicaLocation rl = mRCBridge.getFileLocs( lfn );
//sanity check
if( rl == null ){
throw new RuntimeException( "Unable to find a physical filename (PFN) in the Replica Catalog for output file with logical filename (LFN) as " + lfn );
}
String putDestURL = mOutputMapper.map( lfn, destSite, FileServer.OPERATION.put );
String getDestURL = mOutputMapper.map( lfn, destSite, FileServer.OPERATION.get );
//selLocs are all the locations found in ReplicaMechanism corr
//to the pool pool
ReplicaLocation selLocs = mReplicaSelector.selectAndOrderReplicas(rl,
destSite,
this.runTransferOnLocalSite( outputSite,putDestURL, Job.STAGE_OUT_JOB ));
boolean flag = false;
FileTransfer ft = null;
//checking through all the pfn's returned on the pool
for ( Iterator selIt = selLocs.pfnIterator(); selIt.hasNext(); ) {
ReplicaCatalogEntry selLoc = ( ReplicaCatalogEntry ) selIt.next();
String sourceURL = selLoc.getPFN();
//check if the URL's match
if (sourceURL.trim().equalsIgnoreCase(putDestURL.trim())){
String msg = "The leaf file " + lfn +
" is already at the output pool " + destSite;
mLogger.log(msg,LogManager.INFO_MESSAGE_LEVEL);
flag = true;
break;
}
ft = new FileTransfer( lfn, job.getName() );
ft.addSource( selLoc.getResourceHandle() , sourceURL );
ft.addDestination(destSite, putDestURL );
ft.setURLForRegistrationOnDestination( getDestURL );
ft.setSize( pf.getSize() );
ft.setForCleanup( false );//PM-739
//System.out.println("Deleted Leaf Job File transfer object " + ft);
}
if (!flag) { // adding the last pfn
vFileTX.addElement(ft);
}
}
return vFileTX;
}
/**
* It processes a nodes parents and determines if nodes are to be added
* or not. All the input files for the job are searched in the output files of
* the parent nodes and the Replica Mechanism.
*
* @param job the <code>Job</code> object containing all the
* details of the job.
* @param parents list <code>GraphNode</code> ojbects corresponding to the parent jobs
* of the job.
*/
private void processParents(Job job, Collection<GraphNode> parents) {
Set nodeIpFiles = job.getInputFiles();
Vector vRCSearchFiles = new Vector(); //vector of PegasusFile
//getAll the output Files of the parents
Set<PegasusFile> parentsOutFiles = getOutputFiles( parents );
//interpool transfer of the nodes parents
//output files
Collection[] interSiteFileTX = this.getInterpoolFileTX(job, parents);
Collection localInterSiteTX = interSiteFileTX[0];
Collection remoteInterSiteTX = interSiteFileTX[1];
//only add if there are files to transfer
if( !localInterSiteTX.isEmpty()){
mTXRefiner.addInterSiteTXNodes(job, localInterSiteTX, true );
}
if( !remoteInterSiteTX.isEmpty() ){
mTXRefiner.addInterSiteTXNodes(job, remoteInterSiteTX, false );
}
//check if node ip files are in the parents out files
//if files are not, then these are to be got
//from the RC based on the transiency characteristic
for( Iterator it = nodeIpFiles.iterator(); it.hasNext(); ){
PegasusFile pf = (PegasusFile) it.next();
if( !parentsOutFiles.contains( pf ) ){
//PM-976 all input files that are not generated
//by parent jobs should be looked up in the replica catalog
//we don't consider the value of the transfer flag
vRCSearchFiles.addElement(pf);
}
}
if( job instanceof DAXJob ){
//for the DAX jobs we should always call the method
//as DAX may just be referred as the LFN
getFilesFromRC( (DAXJob)job, vRCSearchFiles);
}
else if (!vRCSearchFiles.isEmpty()) {
if( job instanceof DAGJob ){
getFilesFromRC( (DAGJob)job, vRCSearchFiles);
}
else{
//get the locations from the RC
getFilesFromRC(job, vRCSearchFiles);
}
}
}
/**
* This gets the Vector of FileTransfer objects for the files which have to
* be transferred to an one destination pool. It checks for the transient
* flags for files. If the transfer transient flag is set, it means the file
* does not have to be transferred to the destination pool.
*
* @param destSiteHandle The pool to which the files are to be transferred to.
* @param job The <code>Job</code>object of the job whose output files
* are needed at the destination pool.
* @param localTransfer boolean indicating that associated transfer job will run
* on local site.
*
* @return Vector of <code>FileTransfer</code> objects
*/
private Vector getFileTX(String destPool, Job job, boolean localTransfer ) {
Vector vFileTX = new Vector();
//check if there is a remote initialdir set
String path = job.vdsNS.getStringValue(
Pegasus.REMOTE_INITIALDIR_KEY );
for( Iterator it = job.getOutputFiles().iterator(); it.hasNext(); ){
PegasusFile pf = (PegasusFile) it.next();
String file = pf.getLFN();
FileTransfer ft = this.constructFileTX( pf,
job,
destPool,
path,
localTransfer );
if (ft != null) {
vFileTX.add(ft);
}
}
return vFileTX;
}
/**
* Constructs the FileTransfer object on the basis of the transiency
* information. If the transient flag for transfer is set, the destPutURL for the
* FileTransfer object would be the execution directory, as this is the entry
* that has to be registered in the ReplicaMechanism
*
* @param pf the PegasusFile for which the transfer has to be done.
* @param stagingSiteHandle the staging site at which file is placed after execution.
* @param destSiteHandle the output pool where the job should be transferred
* @param job the name of the associated job.
* @param path the path that a user specifies in the profile for key
* remote_initialdir that results in the workdir being
* changed for a job on a execution pool.
* @param localTransfer boolean indicating that associated transfer job will run
* on local site.
*
* @return the corresponding FileTransfer object
*/
private FileTransfer constructFileTX( PegasusFile pf,
Job job,
String destSiteHandle,
String path,
boolean localTransfer ) {
String stagingSiteHandle = job.getStagingSiteHandle();
String lfn = pf.getLFN();
FileTransfer ft = null;
SiteCatalogEntry stagingSite = mSiteStore.lookup( stagingSiteHandle );
SiteCatalogEntry destinationSite = mSiteStore.lookup( destSiteHandle );
if (stagingSite == null || destinationSite == null) {
mLogMsg = (stagingSite == null) ?
this.poolNotFoundMsg(stagingSiteHandle, "vanilla") :
this.poolNotFoundMsg(destSiteHandle, "vanilla");
mLogger.log( mLogMsg, LogManager.ERROR_MESSAGE_LEVEL );
throw new RuntimeException( mLogMsg );
}
//PM-833 figure out the addOn component just once per lfn
File addOn = mStagingMapper.mapToRelativeDirectory(job, stagingSite, lfn);
//the get
String sharedScratchGetURL = this.getURLOnSharedScratch( stagingSite, job, OPERATION.get, addOn, lfn );
String sharedScratchPutURL = this.getURLOnSharedScratch( stagingSite, job, OPERATION.put, addOn, lfn );
//in the planner cache we track the output files put url on staging site
trackInPlannerCache( lfn, sharedScratchPutURL, stagingSiteHandle );
//in the workflow cache we track the output files put url on staging site
trackInWorkflowCache( lfn, sharedScratchGetURL, stagingSiteHandle );
//if both transfer and registration
//are transient return null
if (pf.getTransientRegFlag() && pf.getTransientTransferFlag()) {
return null;
}
//if only transient transfer flag
//means destPutURL and sourceURL
//are same and are equal to
//execution directory on stagingSiteHandle
if (pf.getTransientTransferFlag()) {
ft = new FileTransfer(lfn,job.getID(),pf.getFlags());
//set the transfer mode
ft.setSize( pf.getSize() );
ft.setTransferFlag(pf.getTransferFlag());
ft.addSource(stagingSiteHandle,sharedScratchGetURL);
ft.addDestination(stagingSiteHandle,sharedScratchGetURL);
ft.setURLForRegistrationOnDestination( sharedScratchGetURL );
ft.setMetadata( pf.getAllMetadata() );
}
//the source dir is the exec dir
//on exec pool and dest dir
//would be on the output pool
else {
//construct the source url depending on whether third party tx
String sourceURL = localTransfer ?
sharedScratchGetURL :
"file://" + mSiteStore.getInternalWorkDirectory(stagingSiteHandle,path) +
File.separator + lfn;
ft = new FileTransfer(lfn, job.getID(), pf.getFlags());
ft.setSize( pf.getSize() );
//set the transfer mode
ft.setTransferFlag(pf.getTransferFlag());
ft.addSource(stagingSiteHandle,sourceURL);
//if the PegasusFile is already an instance of
//FileTransfer the user has specified the destination
//that they want to use in the DAX 3.0
if( pf instanceof FileTransfer ){
//not really supported in DAX 3.3?
ft.addDestination( ((FileTransfer)pf).removeDestURL() );
return ft;
}
ft.setMetadata( pf.getAllMetadata() );
//add all the possible destination urls iterating through
//the list of grid ftp servers associated with the dest pool.
/*
Directory storageDirectory = mSiteStore.lookup( destSiteHandle ).getHeadNodeStorageDirectory();
if( storageDirectory == null ){
throw new RuntimeException( "No Storage directory specified for site " + destSiteHandle );
}
//sanity check
if( !storageDirectory.hasFileServerForPUTOperations() ){
//no file servers for put operations
throw new RuntimeException( " No File Servers specified for PUT Operation on Shared Storage on Headnode for site " + destSiteHandle );
}
*/
for( String destURL : mOutputMapper.mapAll( lfn, destSiteHandle, OPERATION.put )){
//if the paths match of dest URI
//and execDirURL we return null
if (sharedScratchGetURL.equalsIgnoreCase(destURL)) {
/*ft = new FileTransfer(file, job);
ft.addSource(stagingSiteHandle, sharedScratchGetURL);*/
ft.addDestination(stagingSiteHandle, sharedScratchGetURL);
ft.setURLForRegistrationOnDestination( sharedScratchGetURL );
//make the transfer transient?
ft.setTransferFlag(PegasusFile.TRANSFER_NOT);
return ft;
}
ft.addDestination( destSiteHandle, destURL );
}
//construct a registration URL
ft.setURLForRegistrationOnDestination( mOutputMapper.map( lfn, destSiteHandle, FileServer.OPERATION.get , true ) );
}
return ft;
}
/**
* Constructs a Registration URL for a LFN
*
* @param site the site handle
* @param lfn the LFN for which the URL needs to be constructed
*
* @return the URL
*/
private String constructRegistrationURL( String site, String lfn ){
//assumption of same external mount point for each storage
//file server on output site
// url = this.getURLOnStageoutSite( fs, lfn );
return mOutputMapper.map( lfn, site, FileServer.OPERATION.get );
}
/**
* Constructs a Registration URL for a LFN
*
* @param directory the storage directory
* @param site the site handle
* @param lfn the LFN for which the URL needs to be constructed
*
* @return the URL
*/
/*
private String constructRegistrationURL( Directory directory , String site, String lfn ){
//sanity check
if( !directory.hasFileServerForGETOperations() ){
//no file servers for GET operations
throw new RuntimeException( " No File Servers specified for GET Operation on Shared Storage for site " + site );
}
String url = null;
for( FileServer.OPERATION op : FileServer.OPERATION.operationsForGET() ){
for( Iterator it = directory.getFileServersIterator(op); it.hasNext();){
FileServer fs = (FileServer)it.next();
//assumption of same external mount point for each storage
//file server on output site
// url = this.getURLOnStageoutSite( fs, lfn );
url = mOutputMapper.map( lfn, site, FileServer.OPERATION.get );
return url;
}
}//end of different get operations
return url;
}
*/
/**
* This generates a error message for pool not found in the pool
* config file.
*
* @param poolName the name of pool that is not found.
* @param universe the condor universe
*
* @return the message.
*/
private String poolNotFoundMsg(String poolName, String universe) {
String st = "Error: No matching entry to pool = " + poolName +
" ,universe = " + universe +
"\n found in the pool configuration file ";
return st;
}
/**
* This gets the Vector of FileTransfer objects for all the files which have
* to be transferred to the destination pool in case of Interpool transfers.
* Each FileTransfer object has the source and the destination URLs. the
* source URI is determined from the pool on which the jobs are executed.
*
* @param job the job with reference to which interpool file transfers
* need to be determined.
* @param parents Collection of <code>GraphNode</code> ojbects corresponding to the
* parent jobs of the job.
*
* @return array of Collection of <code>FileTransfer</code> objects
*/
private Collection<FileTransfer>[] getInterpoolFileTX(Job job, Collection<GraphNode>parents ) {
String destSiteHandle = job.getStagingSiteHandle();
//contains the remote_initialdir if specified for the job
String destRemoteDir = job.vdsNS.getStringValue(
Pegasus.REMOTE_INITIALDIR_KEY);
SiteCatalogEntry destSite = mSiteStore.lookup( destSiteHandle );
SiteCatalogEntry sourceSite;
Collection[] result = new Collection[2];
Collection<FileTransfer> localTransfers = new LinkedList();
Collection<FileTransfer> remoteTransfers = new LinkedList();
for ( GraphNode parent: parents ) {
//get the parent job
Job pJob = (Job)parent.getContent();
sourceSite = mSiteStore.lookup( pJob.getStagingSiteHandle() );
if( sourceSite.getSiteHandle().equalsIgnoreCase( destSiteHandle ) ){
//no need to add transfers, as the parent job and child
//job are run in the same directory on the pool
continue;
}
String sourceURI = null;
//PM-590 Stricter checks
/* PM-833
String thirdPartyDestPutURI = this.getURLOnSharedScratch( destSite, job, OPERATION.put, null );
//definite inconsitency as url prefix and mount point
//are not picked up from the same server
boolean localTransfer = runTransferOnLocalSite( destSite, thirdPartyDestPutURI, Job.INTER_POOL_JOB );
String destURI = localTransfer ?
//construct for third party transfer
thirdPartyDestPutURI :
//construct for normal transfer
"file://" + mSiteStore.getInternalWorkDirectory( destSiteHandle, destRemoteDir );
*/
for (Iterator fileIt = pJob.getOutputFiles().iterator(); fileIt.hasNext(); ){
PegasusFile pf = (PegasusFile) fileIt.next();
String outFile = pf.getLFN();
if( job.getInputFiles().contains( pf ) ){
//PM-833 figure out the addOn component just once per lfn
String lfn = pf.getLFN();
File addOn = mStagingMapper.mapToRelativeDirectory(job, destSite, lfn);
String thirdPartyDestPutURL = this.getURLOnSharedScratch(destSite, job, OPERATION.put, addOn, lfn);
//definite inconsitency as url prefix and mount point
//are not picked up from the same server
boolean localTransfer = runTransferOnLocalSite( destSite, thirdPartyDestPutURL, Job.INTER_POOL_JOB );
String destURL = localTransfer ?
//construct for third party transfer
thirdPartyDestPutURL :
//construct for normal transfer
"file://" + mSiteStore.getInternalWorkDirectory( destSiteHandle, destRemoteDir ) +
File.separator + addOn +
File.separator + lfn;
String sourceURL = null;
/* PM-833 String destURL = destURI + File.separator + outFile;
String thirdPartyDestURL = thirdPartyDestPutURI + File.separator +
outFile;
*/
FileTransfer ft = new FileTransfer(outFile,pJob.jobName);
ft.setSize( pf.getSize() );
ft.addDestination(destSiteHandle,destURL);
//for intersite transfers we need to track in transient rc
//for the cleanup algorithm
//only the destination is tracked as source will have been
//tracked for the parent jobs
trackInPlannerCache( outFile, thirdPartyDestPutURL, destSiteHandle );
//in the workflow cache we track the get URL for the outfile
String thirdPartyDestGetURL = this.getURLOnSharedScratch( destSite, job, OPERATION.get, addOn, outFile );
trackInWorkflowCache( outFile, thirdPartyDestGetURL, destSiteHandle );
//add all the possible source urls iterating through
//the list of grid ftp servers associated with the dest pool.
boolean first = true;
Directory parentScratchDir = mSiteStore.lookup( pJob.getStagingSiteHandle() ).getDirectory( Directory.TYPE.shared_scratch );
if( parentScratchDir == null ){
throw new RuntimeException( "Unable to determine the scratch dir for site " + pJob.getStagingSiteHandle() );
}
//retrive all the file servers matching the get operations
for( FileServer.OPERATION op : FileServer.OPERATION.operationsForGET() ){
for( Iterator it1 = parentScratchDir.getFileServersIterator(op); it1.hasNext(); ){
FileServer server = ( FileServer)it1.next();
//definite inconsitency as url prefix and mount point
//are not picked up from the same server
sourceURI = server.getURLPrefix();
//sourceURI += server.getMountPoint();
sourceURI += mSiteStore.getExternalWorkDirectory(server, pJob.getSiteHandle());
sourceURL = sourceURI + File.separator + outFile;
if(!(sourceURL.equalsIgnoreCase(thirdPartyDestPutURL))){
//add the source url only if it does not match to
//the third party destination url
ft.addSource(pJob.getStagingSiteHandle(), sourceURL);
}
first = false;
}
}
if( ft.isValid() ){
if( localTransfer ){
localTransfers.add(ft);
}
else{
remoteTransfers.add(ft);
}
}
}
}
}
result[0] = localTransfers;
result[1] = remoteTransfers;
return result;
}
/**
* Special Handling for a DAGJob for retrieving files from the Replica Catalog.
*
* @param job the DAGJob
* @param searchFiles file that need to be looked in the Replica Catalog.
*/
private void getFilesFromRC( DAGJob job, Collection searchFiles ){
//dax appears in adag element
String dag = null;
//go through all the job input files
//and set transfer flag to false
for (Iterator<PegasusFile> it = job.getInputFiles().iterator(); it.hasNext();) {
PegasusFile pf = it.next();
//at the moment dax files are not staged in.
//remove from input set of files
//part of the reason is about how to handle where
//to run the DAGJob. We dont have much control over it.
it.remove();
}
String lfn = job.getDAGLFN();
ReplicaLocation rl = mRCBridge.getFileLocs( lfn );
if (rl == null) { //flag an error
throw new RuntimeException(
"TransferEngine.java: Can't determine a location to " +
"transfer input file for DAG lfn " + lfn + " for job " +
job.getName());
}
ReplicaCatalogEntry selLoc = mReplicaSelector.selectReplica( rl,
job.getSiteHandle(),
true );
String pfn = selLoc.getPFN();
//some extra checks to ensure paths
if( pfn.startsWith( File.separator ) ){
dag = pfn;
}
else if( pfn.startsWith( PegasusURL.FILE_URL_SCHEME ) ){
dag = new PegasusURL( pfn ).getPath();
}
else{
throw new RuntimeException( "Invalid URL Specified for DAG Job " + job.getName() + " -> " + pfn );
}
job.setDAGFile(dag);
//set the directory if specified
job.setDirectory((String) job.dagmanVariables.removeKey(Dagman.DIRECTORY_EXTERNAL_KEY));
}
/**
* Special Handling for a DAXJob for retrieving files from the Replica Catalog.
*
* @param job the DAXJob
* @param searchFiles file that need to be looked in the Replica Catalog.
*/
private void getFilesFromRC( DAXJob job, Collection searchFiles ){
//dax appears in adag element
String dax = null;
String lfn = job.getDAXLFN();
PegasusFile daxFile = new PegasusFile( lfn );
if( !job.getInputFiles().contains( daxFile )){
//if the LFN is not specified as an input file in the DAX
//lets add it PM-667 more of a sanity check.
daxFile.setTransferFlag( PegasusFile.TRANSFER_MANDATORY );
job.getInputFiles().add( daxFile );
searchFiles.add( daxFile );
}
//update the dax argument with the direct path to the DAX file
//if present locally. This is to ensure that SUBDAXGenerator
//can figure out the path to the dag file that will be created for the
//job. Else the dax job needs to have a --basename option passed.
ReplicaLocation rl = mRCBridge.getFileLocs( lfn );
if (rl != null) {
ReplicaCatalogEntry selLoc = mReplicaSelector.selectReplica( rl,
job.getSiteHandle(),
true );
String pfn = selLoc.getPFN();
//some extra checks to ensure paths
if( pfn.startsWith( File.separator ) ){
dax = pfn;
}
else if( pfn.startsWith( PegasusURL.FILE_URL_SCHEME ) ){
dax = new PegasusURL( pfn ).getPath();
}
}
if( dax == null ){
//append the lfn instead of the full path to the dax PM-667
//the user then needs to have a basename option set for the DAX job
dax = lfn;
}
else{
//we also remove the daxFile from the input files for the job.
//and the searchFiles as we have a local path to the DAX .
if( job.getInputFiles().contains( daxFile )){
boolean removed = job.getInputFiles().remove( daxFile );
logRemoval( job, daxFile, "Job Input files ", removed );
}
if( searchFiles.contains( daxFile ) ){
boolean removed = searchFiles.remove( daxFile );
logRemoval( job, daxFile, "Job Search Files", removed );
}
}
//add the dax to the argument
StringBuilder arguments = new StringBuilder();
arguments.append(job.getArguments()).
append(" --dax ").append( dax );
job.setArguments(arguments.toString());
mLogger.log( "Set arguments for DAX job " + job.getID()+ " to " + arguments.toString(),
LogManager.DEBUG_MESSAGE_LEVEL );
this.getFilesFromRC( (Job)job, searchFiles );
}
/**
* It looks up the RCEngine Hashtable to lookup the locations for the
* files and add nodes to transfer them. If a file is not found to be in
* the Replica Catalog the Transfer Engine flags an error and exits
*
* @param job the <code>Job</code>object for whose ipfile have
* to search the Replica Mechanism for.
* @param searchFiles Vector containing the PegasusFile objects corresponding
* to the files that need to have their mapping looked
* up from the Replica Mechanism.
*/
private void getFilesFromRC( Job job, Collection searchFiles ) {
//Vector vFileTX = new Vector();
//Collection<FileTransfer> symLinkFileTransfers = new LinkedList();
Collection<FileTransfer> localFileTransfers = new LinkedList();
Collection<FileTransfer> remoteFileTransfers = new LinkedList();
String jobName = job.logicalName;
String stagingSiteHandle = job.getStagingSiteHandle();
String executionSiteHandle = job.getSiteHandle();
//contains the remote_initialdir if specified for the job
String eRemoteDir = job.vdsNS.getStringValue(
Pegasus.REMOTE_INITIALDIR_KEY);
SiteCatalogEntry stagingSite = mSiteStore.lookup( stagingSiteHandle );
//we are using the pull mode for data transfer
String scheme = "file";
//sAbsPath would be just the source directory absolute path
//dAbsPath would be just the destination directory absolute path
//sDirURL would be the url to the source directory.
//dDirPutURL would be the url to the destination directoy
//and is always a networked url.
/* for PM-833
String dDirPutURL = this.getURLOnSharedScratch( stagingSite, job, OPERATION.put, null );
String dDirGetURL = this.getURLOnSharedScratch( stagingSite, job, OPERATION.get, null );
String sDirURL = null;
String sAbsPath = null;
String dAbsPath = mSiteStore.getInternalWorkDirectory( stagingSiteHandle, eRemoteDir );
//file dest dir is destination dir accessed as a file URL
String fileDestDir = scheme + "://" + dAbsPath;
//check if the execution pool is third party or not
boolean runTransferOnLocalSite = runTransferOnLocalSite( stagingSite, dDirPutURL, Job.STAGE_IN_JOB);
String destDir = ( runTransferOnLocalSite ) ?
//use the full networked url to the directory
dDirPutURL
:
//use the default pull mode
fileDestDir;
*/
for( Iterator it = searchFiles.iterator(); it.hasNext(); ){
String sourceURL = null,destPutURL = null, destGetURL =null;
PegasusFile pf = (PegasusFile) it.next();
List pfns = null;
ReplicaLocation rl = null;
String lfn = pf.getLFN();
NameValue nv = null;
//PM-833 figure out the addOn component just once per lfn
File addOn = mStagingMapper.mapToRelativeDirectory(job, stagingSite, lfn);
destPutURL = this.getURLOnSharedScratch( stagingSite, job, OPERATION.put, addOn, lfn );
destGetURL = this.getURLOnSharedScratch( stagingSite, job, OPERATION.get, addOn, lfn );
String sDirURL = null;
String sAbsPath = null;
String dAbsPath = mSiteStore.getInternalWorkDirectory( stagingSiteHandle, eRemoteDir ) + File.separator + addOn;
//file dest dir is destination dir accessed as a file URL
String fileDestDir = scheme + "://" + dAbsPath;
//check if the execution pool is third party or not
boolean runTransferOnLocalSite = runTransferOnLocalSite( stagingSite, destPutURL, Job.STAGE_IN_JOB);
String destDir = ( runTransferOnLocalSite ) ?
//use the full networked url to the directory
destPutURL
:
//use the default pull mode
fileDestDir;
//see if the pf is infact an instance of FileTransfer
if( pf instanceof FileTransfer ){
//that means we should be having the source url already.
//nv contains both the source pool and the url.
//This happens in case of AI Planner or transfer of executables
nv = ((FileTransfer)pf).getSourceURL();
NameValue destNV = ((FileTransfer)pf).removeDestURL();
//PM-833 we have to explicity set the remote executable
//especially for the staging of executables in sharedfs
if( lfn.equalsIgnoreCase( job.getStagedExecutableBaseName() )){
job.setRemoteExecutable( dAbsPath + File.separator + lfn );
}
/* PM-833
if( destNV == null ){
//the source URL was specified in the DAX
//no transfer of executables case
throw new RuntimeException( "Unreachable code . Signifies error in internal logic " );
}
else{
*/
//staging of executables case
//PM-833 destPutURL = destNV.getValue();
destPutURL = (runTransferOnLocalSite( stagingSite, destPutURL, Job.STAGE_IN_JOB))?
//the destination URL is already third party
//enabled. use as it is
destPutURL:
//explicitly convert to file URL scheme
scheme + "://" + new PegasusURL( destPutURL ).getPath();
//for time being for this case the get url is same as put url
destGetURL = destPutURL;
//PM-833 }
}
else{
//query the replica services and get hold of pfn
rl = mRCBridge.getFileLocs( lfn );
pfns = (rl == null) ? null: rl.getPFNList();
}
if ( pfns == null && nv == null ) {
//check to see if the input file is optional
if(pf.fileOptional()){
//no need to add a transfer node for it if no location found
//remove the PegasusFile object from the list of
//input files for the job, only if file is not a checkpoint file
if ( !pf.isCheckpointFile()){
job.getInputFiles().remove( pf );
}
continue;
}
//flag an error. this is when we don't get any replica location
//from any source
throw new RuntimeException(
"TransferEngine.java: Can't determine a location to " +
"transfer input file for lfn " + lfn + " for job " +
job.getName());
}
FileTransfer ft = (pf instanceof FileTransfer) ?
(FileTransfer)pf:
new FileTransfer( lfn, jobName, pf.getFlags() );
//make sure the type information is set in file transfer
ft.setType( pf.getType() );
ft.setSize( pf.getSize() );
//the transfer mode for the file needs to be
//propogated for optional transfers.
ft.setTransferFlag(pf.getTransferFlag());
//PM-1190 associate metadata with the FileTransfer
ft.setMetadata( pf.getAllMetadata());
ReplicaLocation candidateLocations = null;
if( nv != null ){
//we have the replica already selected as a result
//of executable staging
List rces = new LinkedList();
rces.add( new ReplicaCatalogEntry( nv.getValue(), nv.getKey() ));
rl = new ReplicaLocation( lfn, rces );
}
//PM-1190 add any retrieved metadata from the replica catalog
//to the associated PegasusFile that is associated with the compute jobs
pf.addMetadata( rl.getAllMetadata() );
//select from the various replicas
candidateLocations = mReplicaSelector.selectAndOrderReplicas( rl,
executionSiteHandle,
runTransferOnLocalSite );
if( candidateLocations.getPFNCount() == 0 ){
StringBuilder error = new StringBuilder();
error.append( "Unable to select a Physical Filename (PFN) for file with logical filename (LFN) as ").
append( rl.getLFN() ).append( " for preferred site " ).append( executionSiteHandle ).
append( "with runTransferOnLocalSite - ").append( runTransferOnLocalSite ).
append( " amongst ").append( rl.getPFNList() );
throw new RuntimeException( error.toString() );
}
//check if we need to replace url prefix for
//symbolic linking
boolean symLinkSelectedLocation = false;
//is set to false later, on basis of property value
boolean bypassFirstLevelStaging = true;
int candidateNum = 0;
//PM-1082 we want to select only one destination put URL
//with preference for symlinks
//assign to destPutURL to take care of executable staging
String preferredDestPutURL = destPutURL;
for( ReplicaCatalogEntry selLoc : candidateLocations.getPFNList()){
candidateNum++;
if ( symLinkSelectedLocation =
( mUseSymLinks &&
selLoc.getResourceHandle().equals( job.getStagingSiteHandle() ) &&
!pf.isExecutable() //PM-1086 symlink only data files as chmod fails on symlinked file
) ) {
//resolve any srm url's that are specified
selLoc = replaceSourceProtocolFromURL( selLoc );
}
//get the file to the job's execution pool
//this is assuming that there are no directory paths
//in the pfn!!!
sDirURL = selLoc.getPFN().substring( 0, selLoc.getPFN().lastIndexOf(File.separator) );
//try to get the directory absolute path
//yes i know that we sending the url to directory
//not the file.
sAbsPath = new PegasusURL( sDirURL ).getPath();
//the final source and destination url's to the file
sourceURL = selLoc.getPFN();
if( destPutURL == null ||
symLinkSelectedLocation){ //PM-1082 if a destination has to be symlinked always recompute
//no staging of executables case.
//we construct destination URL to file.
/* PM-833
StringBuffer destPFN = new StringBuffer();
if( symLinkSelectedLocation ){
//we use the file URL location to dest dir
//in case we are symlinking
//destPFN.append( fileDestDir );
destPFN.append( this.replaceProtocolFromURL( destDir ) );
}
else{
//we use whatever destDir was set to earlier
destPFN.append( destDir );
}
destPFN.append( File.separator).append( lfn );
destPutURL = destPFN.toString();
preferredDestPutURL = destPutURL;
destGetURL = dDirGetURL + File.separator + lfn;
*/
if( symLinkSelectedLocation ){
//we use the file URL location to dest dir
//in case we are symlinking
//destPFN.append( fileDestDir );
destPutURL = this.replaceProtocolFromURL( destPutURL );
}
//ensures symlinked location gets picked up
preferredDestPutURL = destPutURL;
}
//we have all the chopped up combos of the urls.
//do some funky matching on the basis of the fact
//that each pool has one shared filesystem
//match the source and dest 3rd party urls or
//match the directory url knowing that lfn and
//(source and dest pool) are same
try{
//PM-833if(sourceURL.equalsIgnoreCase(dDirPutURL + File.separator + lfn)||
if(sourceURL.equalsIgnoreCase( destPutURL )||
( selLoc.getResourceHandle().equalsIgnoreCase( stagingSiteHandle ) &&
lfn.equals( sourceURL.substring(sourceURL.lastIndexOf(File.separator) + 1)) &&
//sAbsPath.equals( dAbsPath )
new File( sAbsPath ).getCanonicalPath().equals( new File( dAbsPath).getCanonicalPath())
)
){
//do not need to add any transfer node
StringBuffer message = new StringBuffer( );
message.append( sAbsPath ).append( " same as " ).append( dAbsPath );
mLogger.log( message.toString() , LogManager.DEBUG_MESSAGE_LEVEL );
message = new StringBuffer();
message.append( " Not transferring ip file as ").append( lfn ).
append( " for job " ).append( job.jobName ).append( " to site " ).append( stagingSiteHandle );
mLogger.log( message.toString() , LogManager.DEBUG_MESSAGE_LEVEL );
continue;
}
}catch( IOException ioe ){
/*ignore */
}
//add locations of input data on the remote site to the transient RC
bypassFirstLevelStaging = this.bypassStagingForInputFile( selLoc , pf , job );
if( bypassFirstLevelStaging ){
//only the files for which we bypass first level staging , we
//store them in the planner cache as a GET URL and associate with the compute site
//PM-698 . we have to clone since original site attribute will be different
ReplicaCatalogEntry rce = (ReplicaCatalogEntry) selLoc.clone();
rce.setResourceHandle( executionSiteHandle );
trackInPlannerCache( lfn, rce, OPERATION.get );
if( candidateNum == 1 ){
//PM-1014 we only track the first candidate in the workflow cache
//i.e the cache file written out in the submit directory
trackInWorkflowCache( lfn, sourceURL, selLoc.getResourceHandle() );
}
//ensure the input file does not get cleaned up by the
//InPlace cleanup algorithm
pf.setForCleanup( false );
continue;
}
else{
//track the location where the data is staged as
//part of the first level staging
//we always store the thirdparty url
//trackInCaches( lfn, destPutURL, job.getSiteHandle() );
trackInPlannerCache( lfn,
destPutURL,
job.getStagingSiteHandle());
if( candidateNum == 1 ){
//PM-1014 we only track the first candidate in the workflow cache
//i.e the cache file written out in the submit directory
trackInWorkflowCache( lfn,
destGetURL,
job.getStagingSiteHandle());
}
}
//PM-1014 we want to track all candidate locations
ft.addSource( selLoc);
} //end of traversal of all candidate locations
//PM-1082 we want to add only one destination URL
//with preference for symlink destination URL
if(preferredDestPutURL == null){
throw new RuntimeException( "Unable to determine a destination put URL on staging site " + stagingSiteHandle +
" for file " + lfn + " for job " + job.getID() );
}
else{
ft.addDestination(stagingSiteHandle,preferredDestPutURL);
}
if ( !bypassFirstLevelStaging ) {
//no bypass of input file staging. we need to add
//data stage in nodes for the lfn
if( symLinkSelectedLocation || //symlinks can run only on staging site
!runTransferOnLocalSite ||
runTransferRemotely( job, stagingSite, ft ) ){ //check on the basis of constructed source URL whether to run remotely
if( removeFileURLFromSource( job, ft, stagingSiteHandle ) ){
//PM-1082 remote transfers ft can still have file url's
//not matching the staging site
//sanity check
if( ft.getSourceURLCount() == 0 ){
throw new RuntimeException( "No source URL's available for stage-in( remote ) transfers for file " +
ft + " for job " + job.getID());
}
}
//all symlink transfers and user specified remote transfers
remoteFileTransfers.add(ft);
}
else{
localFileTransfers.add(ft);
}
}
//we need to set destPutURL to null
destPutURL = null;
}
//call addTransferNode
if (!localFileTransfers.isEmpty() || !remoteFileTransfers.isEmpty()) {
mTXRefiner.addStageInXFERNodes(job, localFileTransfers, remoteFileTransfers );
}
}
/**
* Replaces the SRM URL scheme from the url, and replaces it with the
* file url scheme and returns in a new object if replacement happens.
* The original object passed as a parameter still remains the same.
*
* @param rce the <code>ReplicaCatalogEntry</code> object whose url need to be
* replaced.
*
* @return the object with the url replaced.
*/
protected ReplicaCatalogEntry replaceSourceProtocolFromURL( ReplicaCatalogEntry rce ) {
String pfn = rce.getPFN();
//if the pfn starts with a file url we
//dont need to replace . a sanity check
if( pfn.startsWith( PegasusURL.FILE_URL_SCHEME ) ){
return rce;
}
/* special handling for SRM urls */
StringBuffer newPFN = new StringBuffer();
if( mSRMServiceURLToMountPointMap.containsKey( rce.getResourceHandle() ) ){
//try to do replacement of URL with internal mount point
NameValue nv = mSRMServiceURLToMountPointMap.get( rce.getResourceHandle() );
String urlPrefix = nv.getKey();
if( pfn.startsWith( urlPrefix ) ){
//replace the starting with the mount point
newPFN.append( PegasusURL.FILE_URL_SCHEME ).append( "//" );
newPFN.append( nv.getValue() );
newPFN.append( pfn.substring( urlPrefix.length(), pfn.length() ));
mLogger.log( "Replaced pfn " + pfn + " with " + newPFN.toString() ,
LogManager.TRACE_MESSAGE_LEVEL );
}
}
if( newPFN.length() == 0 ){
//there is no SRM Replacement to do
//Still do the FILE replacement
//return the original object
//we have to the manual replacement
/*
String hostName = Utility.getHostName( pfn );
newPFN.append( FILE_URL_SCHEME ).append( "//" );
//we want to skip out the hostname
newPFN.append( pfn.substring( pfn.indexOf( hostName ) + hostName.length() ) );
*/
newPFN.append( PegasusURL.FILE_URL_SCHEME ).append( "//" );
newPFN.append( new PegasusURL( pfn ).getPath() );
}
//we do not need a full clone, just the PFN
ReplicaCatalogEntry result = new ReplicaCatalogEntry( newPFN.toString(),
rce.getResourceHandle() );
for( Iterator it = rce.getAttributeIterator(); it.hasNext();){
String key = (String)it.next();
result.addAttribute( key, rce.getAttribute( key ) );
}
return result;
}
/**
* Replaces the gsiftp URL scheme from the url, and replaces it with the
* symlink url scheme and returns in a new object. The original object
* passed as a parameter still remains the same.
*
* @param pfn the pfn that needs to be replaced
*
* @return the replaced PFN
*/
protected String replaceProtocolFromURL( String pfn ) {
/* special handling for SRM urls */
StringBuffer newPFN = new StringBuffer();
if( pfn.startsWith(PegasusURL.FILE_URL_SCHEME) ){
//special handling for FILE URL's as
//utility hostname functions dont hold up
newPFN.append( PegasusURL.SYMLINK_URL_SCHEME ).
append( pfn.substring( PegasusURL.FILE_URL_SCHEME.length() ) );
//System.out.println( "Original PFN " + pfn + " \nReplaced PFN " + newPFN.toString() );
return newPFN.toString();
}
newPFN.append( PegasusURL.SYMLINK_URL_SCHEME ).append( "//" );
//we want to skip out the hostname
newPFN.append( new PegasusURL( pfn ).getPath() );
return newPFN.toString();
}
/**
* Constructs a Properties objects by parsing the relevant SRM
* pegasus properties.
*
* For example, if users have the following specified in properties file
* <pre>
* pegasus.transfer.srm.ligo-cit.service.url srm://osg-se.ligo.caltech.edu:10443/srm/v2/server?SFN=/mnt/hadoop
* pegasus.transfer.srm.ligo-cit.service.mountpoint /mnt/hadoop
* </pre>
*
* then, a Map is create the associates ligo-cit with NameValue object
* containing the service url and mount point ( ).
*
* @param props the <code>PegasusProperties</code> object
*
* @return Map that maps a site name to a NameValue object that has the
* URL prefix and the mount point
*/
private Map<String, NameValue> constructSiteToSRMServerMap( PegasusProperties props ) {
Map<String, NameValue> m = new HashMap();
//first strip of prefix from properties and get matching subset
Properties siteProps = props.matchingSubset( TransferEngine.SRM_PROPERTIES_PREFIX, false );
//retrieve all the sites for which SRM servers are specified
Map<String, String> m1 = new HashMap(); //associates site name to url prefix
Map<String, String> m2 = new HashMap(); //associates site name to mount point
for( Iterator it = siteProps.keySet().iterator(); it.hasNext(); ){
String key = (String) it.next();
//determine the site name
String site = key.substring( 0, key.indexOf( "." ) );
if( key.endsWith( TransferEngine.SRM_SERVICE_URL_PROPERTIES_SUFFIX ) ){
m1.put( site, siteProps.getProperty( key ) );
}
else if( key.endsWith( TransferEngine.SRM_MOUNT_POINT_PROPERTIES_SUFFIX ) ){
m2.put( site, siteProps.getProperty( key ) );
}
}
//now merge the information into m and return
for( Iterator <Map.Entry<String, String>>it = m1.entrySet().iterator(); it.hasNext(); ){
Map.Entry<String, String> entry = it.next();
String site = entry.getKey();
String url = entry.getValue();
String mountPoint = m2.get( site );
if( mountPoint == null ){
mLogger.log( "Mount Point for SRM server not specified in properties for site " + site,
LogManager.WARNING_MESSAGE_LEVEL );
continue;
}
m.put( site, new NameValue( url, mountPoint ) );
}
mLogger.log( "SRM Server map is " + m,
LogManager.DEBUG_MESSAGE_LEVEL );
return m;
}
/**
* It gets the output files for all the nodes which are specified in
* the nodes passed.
*
* @param nodes List<GraphNode> containing the jobs
*
*
* @return Set of PegasusFile objects
*/
private Set<PegasusFile> getOutputFiles( Collection<GraphNode> nodes ) {
Set<PegasusFile> files = new HashSet();
for( GraphNode n : nodes ){
Job job = (Job)n.getContent();
files.addAll( job.getOutputFiles() );
}
return files;
}
/**
* Tracks the files created by a job in the both the planner and workflow cache
* The planner cache stores the put URL's and the GET URL is stored in the
* workflow cache.
*
* @param job the job whose input files need to be tracked.
*/
private void trackInCaches( Job job ){
//check if there is a remote initialdir set
String path = job.vdsNS.getStringValue(
Pegasus.REMOTE_INITIALDIR_KEY );
SiteCatalogEntry stagingSiteEntry = mSiteStore.lookup( job.getStagingSiteHandle() );
if ( stagingSiteEntry == null ) {
this.poolNotFoundMsg( job.getStagingSiteHandle(), "vanilla" ) ;
mLogger.log( mLogMsg, LogManager.ERROR_MESSAGE_LEVEL );
throw new RuntimeException( mLogMsg );
}
for( Iterator it = job.getOutputFiles().iterator(); it.hasNext(); ){
PegasusFile pf = (PegasusFile) it.next();
String lfn = pf.getLFN();
//PM-833 figure out the addOn component just once per lfn
File addOn = mStagingMapper.mapToRelativeDirectory(job, stagingSiteEntry, lfn);
//construct the URL to track in planner cache
String stagingSitePutURL = this.getURLOnSharedScratch( stagingSiteEntry, job, OPERATION.put, addOn, lfn);
trackInPlannerCache( lfn, stagingSitePutURL, stagingSiteEntry.getSiteHandle() );
String stagingSiteGetURL = this.getURLOnSharedScratch( stagingSiteEntry, job, OPERATION.get, addOn, lfn);
trackInWorkflowCache( lfn, stagingSiteGetURL, stagingSiteEntry.getSiteHandle() );
}
}
/**
* Inserts an entry into the planner cache as a put URL.
*
*
* @param lfn the logical name of the file.
* @param pfn the pfn
* @param site the site handle
*/
private void trackInPlannerCache( String lfn,
String pfn,
String site ){
trackInPlannerCache( lfn, pfn, site, OPERATION.put );
}
/**
* Inserts an entry into the planner cache as a put URL.
*
*
* @param lfn the logical name of the file.
* @param rce replica catalog entry
* @param type the type of url
*/
private void trackInPlannerCache( String lfn,
ReplicaCatalogEntry rce,
OPERATION type ){
mPlannerCache.insert( lfn, rce, type );
}
/**
* Inserts an entry into the planner cache as a put URL.
*
*
* @param lfn the logical name of the file.
* @param pfn the pfn
* @param site the site handle
* @param type the type of url
*/
private void trackInPlannerCache( String lfn,
String pfn,
String site,
OPERATION type ){
mPlannerCache.insert( lfn, pfn, site, type );
}
/**
* Inserts an entry into the workflow cache that is to be written out to the
* submit directory.
*
* @param lfn the logical name of the file.
* @param pfn the pfn
* @param site the site handle
*/
private void trackInWorkflowCache( String lfn,
String pfn,
String site ){
mWorkflowCache.insert( lfn, pfn, site );
}
/**
* Returns a URL on the shared scratch of the staging site
*
* @param entry the SiteCatalogEntry for the associated stagingsite
* @param job the job
* @param operation the FileServer operation for which we need the URL
* @param lfn the LFN can be null to get the path to the directory
*
* @return the URL
*/
private String getURLOnSharedScratch( SiteCatalogEntry entry ,
Job job,
FileServer.OPERATION operation ,
File addOn,
String lfn ){
return mStagingMapper.map(job, addOn, entry, operation, lfn );
}
/**
* Returns a URL on the shared scratch of the staging site
*
* @param entry the SiteCatalogEntry for the associated stagingsite
* @param job the job
* @param operation the FileServer operation for which we need the URL
* @param lfn the LFN can be null to get the path to the directory
*
* @return the URL
*/
private String getURLOnSharedScratchOriginal( SiteCatalogEntry entry ,
Job job,
FileServer.OPERATION operation ,
String lfn ){
StringBuffer url = new StringBuffer();
FileServer getServer = entry.selectHeadNodeScratchSharedFileServer( operation );
if( getServer == null ){
this.complainForScratchFileServer(job, operation, entry.getSiteHandle());
}
url.append( getServer.getURLPrefix() ).
append( mSiteStore.getExternalWorkDirectory(getServer, entry.getSiteHandle() ));
if( lfn != null ){
url.append( File.separatorChar ).append( lfn );
}
return url.toString();
}
/**
* Complains for a missing head node file server on a site for a job
*
* @param job the job
* @param operation the operation
* @param site the site
*/
private void complainForScratchFileServer( Job job,
FileServer.OPERATION operation,
String site) {
this.complainForScratchFileServer( job.getID(), operation, site);
}
/**
* Complains for a missing head node file server on a site for a job
*
* @param jobname the name of the job
* @param operation the file server operation
* @param site the site
*/
private void complainForScratchFileServer( String jobname,
FileServer.OPERATION operation,
String site) {
StringBuffer error = new StringBuffer();
error.append( "[" ).append( REFINER_NAME ).append( "] ");
if( jobname != null ){
error.append( "For job (" ).append( jobname).append( ")." );
}
error.append( " File Server not specified for shared-scratch filesystem for site: ").
append( site );
throw new RuntimeException( error.toString() );
}
/**
* Initializes a Replica Catalog Instance that is used to store
* the GET URL's for all files on the staging site ( inputs staged and outputs
* created ).
*
* @param dag the workflow being planned
*
* @return handle to transient catalog
*/
private ReplicaCatalog initializeWorkflowCacheFile( ADag dag ){
ReplicaCatalog rc = null;
mLogger.log("Initialising Workflow Cache File in the Submit Directory",
LogManager.DEBUG_MESSAGE_LEVEL );
Properties cacheProps = mProps.getVDSProperties().matchingSubset(
ReplicaCatalog.c_prefix,
false );
String file = mPOptions.getSubmitDirectory() + File.separatorChar +
getCacheFileName( dag );
//set the appropriate property to designate path to file
cacheProps.setProperty( WORKFLOW_CACHE_REPLICA_CATALOG_KEY, file );
try{
rc = ReplicaFactory.loadInstance(
WORKFLOW_CACHE_FILE_IMPLEMENTOR,
cacheProps);
}
catch( Exception e ){
throw new RuntimeException( "Unable to initialize Workflow Cache File in the Submit Directory " + file,
e );
}
return rc;
}
/**
* Constructs the basename to the cache file that is to be used
* to log the transient files. The basename is dependant on whether the
* basename prefix has been specified at runtime or not.
*
* @param adag the ADag object containing the workflow that is being
* concretized.
*
* @return the name of the cache file
*/
private String getCacheFileName(ADag adag){
StringBuffer sb = new StringBuffer();
String bprefix = mPOptions.getBasenamePrefix();
if(bprefix != null){
//the prefix is not null using it
sb.append(bprefix);
}
else{
//generate the prefix from the name of the dag
sb.append(adag.getLabel()).append("-").
append(adag.getIndex());
}
//append the suffix
sb.append(".cache");
return sb.toString();
}
/**
* Returns a boolean indicating whether to bypass first level staging for a
* file or not
*
* @param entry a ReplicaCatalogEntry matching the selected replica location.
* @param file the corresponding Pegasus File object
* @param job the associated job
*
* @return boolean indicating whether we need to enable bypass or not
*/
private boolean bypassStagingForInputFile( ReplicaCatalogEntry entry , PegasusFile file, Job job ) {
boolean bypass = false;
String computeSite = job.getSiteHandle();
//check if user has it configured for bypassing the staging and
//we are in pegasus lite mode
if( this.mBypassStagingForInputs && mPegasusConfiguration.jobSetupForWorkerNodeExecution(job) ){
boolean isFileURL = entry.getPFN().startsWith( PegasusURL.FILE_URL_SCHEME);
String fileSite = entry.getResourceHandle();
if( mPegasusConfiguration.jobSetupForCondorIO(job, mProps) ){
//additional check for condor io
//we need to inspect the URL and it's location
//only file urls for input files are eligible for bypass
if( isFileURL &&
fileSite.equals( "local" ) ){
//in condor io we cannot remap the destination URL
//we need to make sure the PFN ends with lfn to enable bypass
bypass = entry.getPFN().endsWith( file.getLFN() );
}
}
else{
//for non shared fs case we can bypass all url's safely
//other than file urls
bypass = isFileURL ?
fileSite.equalsIgnoreCase( computeSite )://file site is same as the compute site
true;
}
}
return bypass;
}
/**
* Helped method for logging removal message. If removed is true, then logged on
* debug else logged as warning.
*
* @param job the job
* @param file the file to be removed
* @param prefix prefix for log message
* @param removed whether removal was successful or not.
*
*/
private void logRemoval( Job job, PegasusFile file, String prefix, boolean removed) {
StringBuilder message = new StringBuilder();
message.append( prefix ).append( " : " );
if( removed ){
message.append( "Removed file " ).append( file.getLFN() ).append( " for job " ).
append( job.getID() );
mLogger.log( message.toString() , LogManager.DEBUG_MESSAGE_LEVEL );
}
else{
//warn
message.append( "Unable to remove file " ).append( file.getLFN() ).append( " for job " ).
append( job.getID() );
mLogger.log( message.toString() , LogManager.WARNING_MESSAGE_LEVEL );
}
}
/**
* Returns the relative submit directory for the job from the top level
* submit directory where workflow files are written.
*
* @param job
* @return
*/
protected String getRelativeSubmitDirectory(Job job) {
String relative = null;
try {
File f = mSubmitDirMapper.getRelativeDir(job);
mLogger.log("Directory for job " + job.getID() + " is " + f,
LogManager.DEBUG_MESSAGE_LEVEL );
relative = f.getPath();
} catch ( Exception ex) {
throw new RuntimeException( "Error while determining relative submit dir for job " + job.getID() , ex);
}
return relative;
}
}