/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.transfer.implementation;
import edu.isi.pegasus.planner.classes.TransferJob;
import edu.isi.pegasus.planner.classes.NameValue;
import edu.isi.pegasus.planner.classes.FileTransfer;
import edu.isi.pegasus.planner.classes.Profile;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.namespace.Pegasus;
import edu.isi.pegasus.planner.catalog.transformation.classes.TCType;
import edu.isi.pegasus.planner.catalog.transformation.TransformationCatalogEntry;
import edu.isi.pegasus.planner.catalog.replica.ReplicaCatalogEntry;
import edu.isi.pegasus.common.util.Separator;
import edu.isi.pegasus.planner.classes.Job;
import static edu.isi.pegasus.planner.classes.Job.COMPUTE_JOB;
import static edu.isi.pegasus.planner.classes.Job.INTER_POOL_JOB;
import static edu.isi.pegasus.planner.classes.Job.STAGE_IN_JOB;
import static edu.isi.pegasus.planner.classes.Job.STAGE_OUT_JOB;
import java.io.FileWriter;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.io.File;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.namespace.Dagman;
import edu.isi.pegasus.planner.namespace.Metadata;
import edu.isi.pegasus.planner.selector.ReplicaSelector;
/**
* The implementation that creates transfer jobs referring to the python based
* transfer script distributed with Pegasus since version 3.0
*
* <p>
* Transfer is distributed as part of the Pegasus worker package and can be found at
* $PEGASUS_HOME/bin/pegasus-transfer.
*
* <p>
* It leads to the creation of the setup chmod jobs to the workflow, that appear
* as parents to compute jobs in case the transfer implementation does not
* preserve the X bit on the file being transferred. This is required for
* staging of executables as part of the workflow. The setup jobs are only added
* as children to the stage in jobs.
* <p>
* In order to use the transfer implementation implemented by this class, the
* property <code>pegasus.transfer.*.impl</code> must be set to
* value <code>Transfer</code>.
*
* The arguments with which the pegasus-transfer client is invoked can be specified
* <pre>
* - by specifying the property pegasus.transfer.arguments
* - associating the Pegasus profile key transfer.arguments
* </pre>
*
* @author Karan Vahi
* @version $Revision$
*/
public class Transfer extends AbstractMultipleFTPerXFERJob {
/**
* The transformation namespace for the transfer job.
*/
public static final String TRANSFORMATION_NAMESPACE = "pegasus";
/**
* The name of the underlying transformation that is queried for in the
* Transformation Catalog.
*/
public static final String TRANSFORMATION_NAME = "transfer";
/**
* The version number for the transfer job.
*/
public static final String TRANSFORMATION_VERSION = null;
/**
* The derivation namespace for for the transfer job.
*/
public static final String DERIVATION_NAMESPACE = "pegasus";
/**
* The name of the underlying derivation.
*/
public static final String DERIVATION_NAME = "transfer";
/**
* The derivation version number for the transfer job.
*/
public static final String DERIVATION_VERSION = "1.0";
/**
* The default number of threads pegasus-transfer uses
*/
public static final int DEFAULT_NUMBER_OF_THREADS = 2;
/**
* A short description of the transfer implementation.
*/
public static final String DESCRIPTION = "Python based Transfer Script";
/**
* The executable basename for the transfer executable.
*/
public static final String EXECUTABLE_BASENAME = "pegasus-transfer";
/**
* The overloaded constructor, that is called by the Factory to load the
* class.
*
* @param bag the bag of initialization objects.
*/
public Transfer( PegasusBag bag ){
super( bag );
}
/**
* Return a boolean indicating whether the transfers to be done always in
* a third party transfer mode. A value of false, results in the
* direct or peer to peer transfers being done.
* <p>
* A value of false does not preclude third party transfers. They still can
* be done, by setting the property "pegasus.transfer.*.thirdparty.sites".
*
* @return boolean indicating whether to always use third party transfers
* or not.
*
*/
public boolean useThirdPartyTransferAlways(){
return false;
}
/**
* Returns a boolean indicating whether the transfer protocol being used by
* the implementation preserves the X Bit or not while staging.
*
* @return boolean
*/
public boolean doesPreserveXBit(){
return false;
}
/**
* Returns a textual description of the transfer implementation.
*
* @return a short textual description
*/
public String getDescription(){
return Transfer.DESCRIPTION;
}
/**
* Retrieves the transformation catalog entry for the executable that is
* being used to transfer the files in the implementation.
*
* @param siteHandle the handle of the site where the transformation is
* to be searched.
* @param jobClass the job Class for the newly added job. Can be one of the
* following:
* stage-in
* stage-out
* inter-pool transfer
* stage-in worker transfer
*
*
* @return the transformation catalog entry if found, else null.
*/
public TransformationCatalogEntry getTransformationCatalogEntry(String siteHandle, int jobClass ){
if( jobClass == Job.STAGE_IN_WORKER_PACKAGE_JOB && !siteHandle.equalsIgnoreCase( "local") ){
//PM-538
//construct an entry for the local site and transfer it.
return this.defaultTCEntry( Transfer.TRANSFORMATION_NAMESPACE,
Transfer.TRANSFORMATION_NAME,
Transfer.TRANSFORMATION_VERSION,
Transfer.EXECUTABLE_BASENAME,
"local" );
}
List tcentries = null;
try {
//namespace and version are null for time being
tcentries = mTCHandle.lookup( Transfer.TRANSFORMATION_NAMESPACE,
Transfer.TRANSFORMATION_NAME,
Transfer.TRANSFORMATION_VERSION,
siteHandle,
TCType.INSTALLED);
} catch (Exception e) {
mLogger.log(
"Unable to retrieve entry from TC for " + getCompleteTCName()
+ " Cause:" + e, LogManager.DEBUG_MESSAGE_LEVEL );
}
TransformationCatalogEntry entry = ( tcentries == null ) ?
//attempt to create a default entry on the basis of
//PEGASUS_HOME defined in the site catalog
this.defaultTCEntry( Transfer.TRANSFORMATION_NAMESPACE,
Transfer.TRANSFORMATION_NAME,
Transfer.TRANSFORMATION_VERSION,
Transfer.EXECUTABLE_BASENAME,
siteHandle ):
//get what was returned in the transformation catalog
(TransformationCatalogEntry) tcentries.get(0);
return entry;
}
/**
* An optional method that allows the derived classes to do their own
* post processing on the the transfer job before it is returned to
* the calling module.
*
* @param job the <code>TransferJob</code> that has been created.
*/
public void postProcess( TransferJob job ){
if( job.getJobType() == Job.STAGE_IN_WORKER_PACKAGE_JOB ){
//all stage worker jobs are classified as stage in jobs
//for further use in the planner
job.setJobType( Job.STAGE_IN_JOB );
if( !job.getSiteHandle().equalsIgnoreCase( "local" ) ){
//PM-538
//executable for remote stage worker jobs is transferred
//from local site.
job.condorVariables.setExecutableForTransfer();
}
}
//associate DAGMan categories with these jobs to enable
//throttling in properties file
if( !job.dagmanVariables.containsKey( Dagman.CATEGORY_KEY ) ){
job.dagmanVariables.construct( Dagman.CATEGORY_KEY, getDAGManCategory( job.getJobType() ) );
}
}
/**
* Returns the environment profiles that are required for the default
* entry to sensibly work. Tries to retrieve the following variables
*
* <pre>
* PEGASUS_HOME
* GLOBUS_LOCATION
* </pre>
*
*
* @param site the site where the job is going to run.
*
* @return List of environment variables, else empty list if none are found
*/
protected List getEnvironmentVariables( String site ){
List result = new ArrayList(2) ;
String pegasusHome = mSiteStore.getEnvironmentVariable( site, "PEGASUS_HOME" );
if( pegasusHome != null ){
//we have both the environment variables
result.add( new Profile( Profile.ENV, "PEGASUS_HOME", pegasusHome ) );
}
String globus = mSiteStore.getEnvironmentVariable( site, "GLOBUS_LOCATION" );
if( globus != null && globus.length() > 1 ){
result.add( new Profile( Profile.ENV, "GLOBUS_LOCATION", globus) );
}
return result;
}
/**
* Returns the namespace of the derivation that this implementation
* refers to.
*
* @return the namespace of the derivation.
*/
protected String getDerivationNamespace(){
return Transfer.DERIVATION_NAMESPACE;
}
/**
* Returns the logical name of the derivation that this implementation
* refers to.
*
* @return the name of the derivation.
*/
protected String getDerivationName(){
return Transfer.DERIVATION_NAME;
}
/**
* Returns the version of the derivation that this implementation
* refers to.
*
* @return the version of the derivation.
*/
protected String getDerivationVersion(){
return Transfer.DERIVATION_VERSION;
}
/**
* It constructs the arguments to the transfer executable that need to be passed
* to the executable referred to in this transfer mode.
*
* @param job the object containing the transfer node.
* @return the argument string
*/
protected String generateArgumentString(TransferJob job) {
StringBuffer sb = new StringBuffer();
int threads = Transfer.DEFAULT_NUMBER_OF_THREADS;
if(job.vdsNS.containsKey(Pegasus.TRANSFER_THREADS_KEY )){
try{
threads = Integer.parseInt( job.vdsNS.getStringValue( Pegasus.TRANSFER_THREADS_KEY ) );
}
catch( Exception e ){
mLogger.log( "Invalid value picked up for Pegasus profile " + Pegasus.TRANSFER_THREADS_KEY + " transfer job " + job.getID(),
LogManager.ERROR_MESSAGE_LEVEL );
}
}
sb.append( " --threads ").
append( threads ).append( " " );
if(job.vdsNS.containsKey(Pegasus.TRANSFER_ARGUMENTS_KEY)){
sb.append(
job.vdsNS.removeKey(Pegasus.TRANSFER_ARGUMENTS_KEY)
);
}
return sb.toString();
}
/**
* Writes to a FileWriter stream the stdin which goes into the magic script
* via standard input
*
* @param job the transfer job
* @param writer the writer to the stdin file.
* @param files Collection of <code>FileTransfer</code> objects containing
* the information about sourceam fin and destURL's.
* @param stagingSite the site where the data will be populated by first
* level staging jobs.
* @param jobClass the job Class for the newly added job. Can be one of the
* following:
* stage-in
* stage-out
* inter-pool transfer
*
* @throws Exception
*/
protected void writeStdInAndAssociateCredentials(TransferJob job, FileWriter writer, Collection files, String stagingSite, int jobClass ) throws
Exception {
// format is a JSON list
writer.write("[\n");
int num = 1;
for( Iterator it = files.iterator(); it.hasNext(); ){
FileTransfer ft = (FileTransfer) it.next();
Collection<String> sourceSites = ft.getSourceSites( );
NameValue dest = ft.getDestURL( true );
//write to the file one URL pair at a time
StringBuffer urlPair = new StringBuffer( );
// Wow, annoying to require double quotes all over the place...
if (num > 1) {
urlPair.append(" ,\n");
}
urlPair.append(" { \"type\": \"transfer\",\n");
urlPair.append(" \"lfn\": ").append("\"").append(ft.getLFN()).append("\"").append(",\n");
urlPair.append(" \"id\": ").append(num).append(",\n");
//PM-1190 dump any metadata that planner knows of about the file
Metadata m = ft.getAllMetadata();
if( !m.isEmpty() ){
urlPair.append( " ").append( "\"attributes\": {");
for( Iterator<String> mit = m.getProfileKeyIterator(); mit.hasNext(); ){
String key = mit.next();
urlPair.append( "\n" ).append( " ");
urlPair.append( "\"").append( key ).append( "\"" ).append( ":" ).append( "\"" ).append( m.get(key)).append( "\"").
append(",");
}
//remove trailing ,
urlPair = urlPair.deleteCharAt( urlPair.length() - 1 );
urlPair.append( "\n" ).append( " ").append( "}").append( "," ).append( "\n" );
}
urlPair.append(" \"src_urls\": [");
boolean notFirst = false;
for( String sourceSite: sourceSites ){
//traverse through all the URL's on that site
for( ReplicaCatalogEntry url : ft.getSourceURLs(sourceSite) ){
if( notFirst ){
urlPair.append(",");
}
String prio = (String) url.getAttribute( ReplicaSelector.PRIORITY_KEY);
urlPair.append("\n {");
urlPair.append(" \"site_label\": \"").append(sourceSite).append("\",");
urlPair.append(" \"url\": \"").append( url.getPFN() ).append("\"");
if( prio != null ){
urlPair.append(",");
urlPair.append(" \"priority\": ").append( prio );
}
urlPair.append(" }");
notFirst = true;
// and the credential for the source url
job.addCredentialType( sourceSite, url.getPFN() );
}
}
urlPair.append("\n ],\n");
urlPair.append(" \"dest_urls\": [\n");
urlPair.append(" {");
urlPair.append(" \"site_label\": \"").append(dest.getKey()).append("\",");
urlPair.append(" \"url\": \"").append(dest.getValue()).append("\"");
urlPair.append(" }\n");
urlPair.append(" ]");
urlPair.append(" }\n"); // end of this transfer
writer.write( urlPair.toString() );
writer.flush();
num++;
//associate any credential required , with destination URL
job.addCredentialType( dest.getKey(), dest.getValue() );
}
writer.write("]\n");
}
/**
* Returns the complete name for the transformation.
*
* @return the complete name.
*/
protected String getCompleteTCName(){
return Separator.combine( Transfer.TRANSFORMATION_NAMESPACE,
Transfer.TRANSFORMATION_NAME,
Transfer.TRANSFORMATION_VERSION);
}
/**
* Returns the dagman category for transfer job
*
* @param type job type
*
* @return
*/
protected String getDAGManCategory( int type ) {
String category = null;
switch (type){
case STAGE_IN_JOB:
category = "stage-in";
break;
case STAGE_OUT_JOB:
category = "stage-out";
break;
case INTER_POOL_JOB:
category = "stage-inter";
break;
default:
category = "transfer";
}
return category;
}
}