/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.code.gridstart;
import edu.isi.pegasus.planner.catalog.site.classes.SiteStore;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.common.PegasusProperties;
import edu.isi.pegasus.planner.code.GridStart;
import edu.isi.pegasus.planner.code.POSTScript;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.AggregatedJob;
import edu.isi.pegasus.planner.classes.PegasusFile;
import edu.isi.pegasus.planner.classes.TransferJob;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.transfer.SLS;
import edu.isi.pegasus.planner.namespace.Pegasus;
import java.io.File;
import java.io.FileInputStream;
import java.util.Collection;
import java.util.Iterator;
import java.util.Set;
import java.io.IOException;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.OutputStream;
import edu.isi.pegasus.planner.classes.PlannerOptions;
import edu.isi.pegasus.planner.cluster.JobAggregator;
import edu.isi.pegasus.planner.namespace.Condor;
/**
* This class ends up running the job directly on the grid, without wrapping
* it in any other launcher executable.
* It ends up connecting the jobs stdio and stderr to condor commands to
* ensure they are sent back to the submit host.
*
*
* @author Karan Vahi vahi@isi.edu
* @version $Revision$
*/
public class NoGridStart implements GridStart {
private PegasusBag mBag;
private ADag mDAG;
/**
* The basename of the class that is implmenting this. Could have
* been determined by reflection.
*/
public static final String CLASSNAME = "NoGridStart";
/**
* The SHORTNAME for this implementation.
*/
public static final String SHORT_NAME = "none";
/**
* The LogManager object which is used to log all the messages.
*/
protected LogManager mLogger;
/**
* The object holding all the properties pertaining to Pegasus.
*/
protected PegasusProperties mProps;
/**
* The submit directory where the submit files are being generated for
* the workflow.
*/
protected String mSubmitDir;
/**
* The argument string containing the arguments with which the exitcode
* is invoked on kickstart output.
*/
protected String mExitParserArguments;
/**
* A boolean indicating whether to generate lof files or not.
*/
protected boolean mGenerateLOF;
/**
* A boolean indicating whether to have worker node execution or not.
*/
//protected boolean mWorkerNodeExecution;
/**
* The handle to the SLS implementor
*/
protected SLS mSLS;
/**
* The options passed to the planner.
*/
protected PlannerOptions mPOptions;
/**
* Handle to the site catalog store.
*/
//protected PoolInfoProvider mSiteHandle;
protected SiteStore mSiteStore;
/**
* An instance variable to track if enabling is happening as part of a clustered job.
* See Bug 21 comments on Pegasus Bugzilla
*/
protected boolean mEnablingPartOfAggregatedJob;
/**
* Boolean indicating whether worker package staging is enabled or not.
*/
protected boolean mWorkerPackageStagingEnabled;
/**
* Initializes the GridStart implementation.
*
* @param bag the bag of objects that is used for initialization.
* @param dag the concrete dag so far.
*/
public void initialize( PegasusBag bag, ADag dag ){
mBag = bag;
mDAG = dag;
mLogger = bag.getLogger();
mSiteStore = bag.getHandleToSiteStore();
mPOptions = bag.getPlannerOptions();
mSubmitDir = mPOptions.getSubmitDirectory();
mProps = bag.getPegasusProperties();
mGenerateLOF = mProps.generateLOFFiles();
mWorkerPackageStagingEnabled = mProps.transferWorkerPackage();
// mExitParserArguments = getExitCodeArguments();
/* JIRA PM-495
mWorkerNodeExecution = mProps.executeOnWorkerNode();
if( mWorkerNodeExecution ){
//load SLS
mSLS = SLSFactory.loadInstance( bag );
}
*/
mEnablingPartOfAggregatedJob = false;
}
/**
* Enables a collection of jobs and puts them into an AggregatedJob.
* The assumption here is that all the jobs are being enabled by the same
* implementation. It enables the jobs and puts them into the AggregatedJob
* that is passed to it.
*
* @param aggJob the AggregatedJob into which the collection has to be
* integrated.
* @param jobs the collection of jobs (Job) that need to be enabled.
*
* @return the AggregatedJob containing the enabled jobs.
* @see #enable(Job,boolean)
*/
public AggregatedJob enable(AggregatedJob aggJob,Collection jobs){
//sanity check for the arguments
if( aggJob.strargs != null && aggJob.strargs.length() > 0){
//construct( aggJob, "arguments", aggJob.strargs);
// the arguments are no longer set as condor profiles
// they are now set to the corresponding profiles in
// the Condor Code Generator only.
aggJob.setArguments( aggJob.strargs );
}
//we do not want the jobs being clustered to be enabled
//for worker node execution just yet.
mEnablingPartOfAggregatedJob = true;
for (Iterator it = jobs.iterator(); it.hasNext(); ) {
Job job = (Job)it.next();
//always pass isGlobus true as always
//interested only in executable strargs
this.enable(job, true);
aggJob.add(job);
}
//set the flag back to false
mEnablingPartOfAggregatedJob = false;
return aggJob;
}
/**
* Enables a job to run on the grid. This also determines how the
* stdin,stderr and stdout of the job are to be propogated.
* To grid enable a job, the job may need to be wrapped into another
* job, that actually launches the job. It usually results in the job
* description passed being modified modified.
*
* @param job the <code>Job</code> object containing the job description
* of the job that has to be enabled on the grid.
* @param isGlobusJob is <code>true</code>, if the job generated a
* line <code>universe = globus</code>, and thus runs remotely.
* Set to <code>false</code>, if the job runs on the submit
* host in any way.
*
* @return boolean true if enabling was successful,else false.
*/
public boolean enable( AggregatedJob job,boolean isGlobusJob){
//get hold of the JobAggregator determined for this clustered job
//during clustering
JobAggregator aggregator = job.getJobAggregator();
if( aggregator == null ){
throw new RuntimeException( "Clustered job not associated with a job aggregator " + job.getID() );
}
boolean first = true;
for (Iterator it = job.constituentJobsIterator(); it.hasNext(); ) {
Job constituentJob = (Job)it.next();
//earlier was set in SeqExec JobAggregator in the enable function
constituentJob.vdsNS.construct( Pegasus.GRIDSTART_KEY,
this.getVDSKeyValue() );
if(first){
first = false;
}
else{
//we need to pass -H to kickstart
//to suppress the header creation
constituentJob.vdsNS.construct(Pegasus.GRIDSTART_ARGUMENTS_KEY,"-H");
}
//always pass isGlobus true as always
//interested only in executable strargs
//due to the fact that seqexec does not allow for setting environment
//per constitutent constituentJob, we cannot set the postscript removal option
this.enable( constituentJob, isGlobusJob );
}
//all the constitutent jobs are enabled.
//get the job aggregator to render the job
//to it's executable form
aggregator.makeAbstractAggregatedJobConcrete( job );
//set the flag back to false
//mEnablingPartOfAggregatedJob = false;
//the aggregated job itself needs to be enabled via NoGridStart
this.enable( (Job)job, isGlobusJob);
return true;
}
/**
* Enables a job to run on the grid by launching it directly. It ends
* up running the executable directly without going through any intermediate
* launcher executable. It connects the stdio, and stderr to underlying
* condor mechanisms so that they are transported back to the submit host.
*
* @param job the <code>Job</code> object containing the job description
* of the job that has to be enabled on the grid.
* @param isGlobusJob is <code>true</code>, if the job generated a
* line <code>universe = globus</code>, and thus runs remotely.
* Set to <code>false</code>, if the job runs on the submit
* host in any way.
*
* @return boolean true if enabling was successful,else false in case when
* the path to kickstart could not be determined on the site where
* the job is scheduled.
*/
public boolean enable(Job job, boolean isGlobusJob) {
//take care of relative submit directory if specified
String submitDir = mSubmitDir + mSeparator;
// String submitDir = getSubmitDirectory( mSubmitDir , job) + mSeparator;
// the arguments are no longer set as condor profiles
// they are now set to the corresponding profiles in
// the Condor Code Generator only.
job.setRemoteExecutable( handleTransferOfExecutable( job ) );
//JIRA PM-543
//set the directory key with the job
if( requiresToSetDirectory( job ) ){
job.setDirectory( this.getDirectory( job ) );
}
/*
//the executable path and arguments are put
//in the Condor namespace and not printed to the
//file so that they can be overriden if desired
//later through profiles and key transfer_executable
construct(job,"executable", handleTransferOfExecutable( job ) );
//sanity check for the arguments
if(job.strargs != null && job.strargs.length() > 0){
construct(job, "arguments", job.strargs);
}
*/
// handle stdin
if (job.stdIn.length() > 0) {
//PM-833 for planner added auxillary jobs pick the .in file from
//right submit directory
if (job.logicalName.equals(
edu.isi.pegasus.planner.transfer.implementation.Transfer.TRANSFORMATION_NAME)
|| job.logicalName.equals(edu.isi.pegasus.planner.refiner.cleanup.Cleanup.TRANSFORMATION_NAME )
|| job.logicalName.equals( edu.isi.pegasus.planner.refiner.createdir.DefaultImplementation.TRANSFORMATION_NAME )
|| job.logicalName.equals(edu.isi.pegasus.planner.cluster.aggregator.SeqExec.
COLLAPSE_LOGICAL_NAME)
|| job.logicalName.equals(edu.isi.pegasus.planner.cluster.aggregator.MPIExec.
COLLAPSE_LOGICAL_NAME)
) {
//condor needs to pick up the constituentJob stdin and
//transfer it to the remote end
construct( job, "input" , job.getFileFullPath( submitDir, ".in") );
}
else{
construct(job,"input",submitDir + job.stdIn);
}
if (isGlobusJob) {
//this needs to be true as you want the stdin
//to be transfered to the remote execution
//pool, as in case of the transfer script.
//it needs to be set if the stdin is already
//prepopulated at the remote side which
//it is not.
construct(job,"transfer_input","true");
}
}
if (job.stdOut.length() > 0) {
//handle stdout
construct(job,"output",job.stdOut);
if (isGlobusJob) {
construct(job,"transfer_output","false");
}
} else {
// transfer output back to submit host, if unused
construct(job,"output", job.getFileFullPath( submitDir, ".out") );
if (isGlobusJob) {
construct(job,"transfer_output","true");
}
}
if (job.stdErr.length() > 0) {
//handle stderr
construct(job,"error",job.stdErr);
if (isGlobusJob) {
construct(job,"transfer_error","false");
}
} else {
// transfer error back to submit host, if unused
construct(job,"error", job.getFileFullPath( submitDir, ".err"));
if (isGlobusJob) {
construct(job,"transfer_error","true");
}
}
if( mGenerateLOF ){
//but generate lof files nevertheless
//inefficient check here again. just a prototype
//we need to generate -S option only for non transfer jobs
//generate the list of filenames file for the input and output files.
if (! (job instanceof TransferJob)) {
generateListofFilenamesFile( job.getInputFiles(),
job,
".in.lof");
}
//for cleanup jobs no generation of stats for output files
if (job.getJobType() != Job.CLEANUP_JOB) {
generateListofFilenamesFile(job.getOutputFiles(),
job,
".out.lof");
}
}///end of mGenerateLOF
return true;
}
/**
* It changes the paths to the executable depending on whether we want to
* transfer the executable or not. Currently, the transfer_executable is only
* handled for staged compute jobs, where Pegasus is staging the binaries
* to the remote site.
*
* @param job the <code>Job</code> containing the job description.
*
* @return the path that needs to be set as the executable key. If
* transfer_executable is not set the path to the executable is
* returned as is.
*/
protected String handleTransferOfExecutable( Job job ) {
Condor cvar = job.condorVariables;
String path = job.executable;
if ( cvar.getBooleanValue( "transfer_executable" )) {
//explicitly check for whether the job is a staged compute job or not
// if( job.getJobType() == Job.STAGED_COMPUTE_JOB ){
if( job.userExecutablesStagedForJob() ){
//the executable is being staged to the remote site.
//all we need to do is unset transfer_executable
cvar.construct( "transfer_executable", "false" );
}
else if ( mWorkerPackageStagingEnabled &&
( job.getJobType() == Job.CREATE_DIR_JOB || job.getJobType() == Job.CLEANUP_JOB) ){
//we dont complain.
//JIRA PM-281
}
else{
mLogger.log( "Transfer of Executables in NoGridStart only works for staged computes jobs " + job.getName(),
LogManager.ERROR_MESSAGE_LEVEL );
}
}
else{
//the executable paths are correct and
//point to the executable on the remote pool
}
return path;
}
/**
* Indicates whether the enabling mechanism can set the X bit
* on the executable on the remote grid site, in addition to launching
* it on the remote grid stie
*
* @return false, as no wrapper executable is being used.
*/
public boolean canSetXBit(){
return false;
}
/**
* Returns the value of the vds profile with key as Pegasus.GRIDSTART_KEY,
* that would result in the loading of this particular implementation.
* It is usually the name of the implementing class without the
* package name.
*
* @return the value of the profile key.
* @see org.griphyn.cPlanner.namespace.Pegasus#GRIDSTART_KEY
*/
public String getVDSKeyValue(){
return this.CLASSNAME;
}
/**
* Returns a short textual description in the form of the name of the class.
*
* @return short textual description.
*/
public String shortDescribe(){
return this.SHORT_NAME;
}
/**
* Returns the SHORT_NAME for the POSTScript implementation that is used
* to be as default with this GridStart implementation.
*
* @return the identifier for the NoPOSTScript POSTScript implementation.
*
* @see POSTScript#shortDescribe()
*/
public String defaultPOSTScript(){
return NoPOSTScript.SHORT_NAME;
}
/**
* Returns a boolean indicating whether we need to set the directory for
* the job or not.
*
* @param job the job for which to set directory.
*
* @return
*/
protected boolean requiresToSetDirectory( Job job ) {
//the cleanup jobs should never have directory set as full path
//is specified
return ( job.getJobType() != Job.CLEANUP_JOB );
}
/**
* Returns the directory in which the job should run.
*
* @param job the job in which the directory has to run.
*
* @return
*/
protected String getDirectory( Job job ){
String execSiteWorkDir = mSiteStore.getInternalWorkDirectory(job);
String workdir = (String) job.globusRSL.removeKey("directory"); // returns old value
workdir = (workdir == null)?execSiteWorkDir:workdir;
return workdir;
}
/**
* Returns the directory that is associated with the job to specify
* the directory in which the job needs to run
*
* @param job the job
*
* @return the condor key . can be initialdir or remote_initialdir
*/
private String getDirectoryKey(Job job) {
/*
String style = (String)job.vdsNS.get( Pegasus.STYLE_KEY );
//remove the remote or initial dir's for the compute jobs
String key = ( style.equalsIgnoreCase( Pegasus.GLOBUS_STYLE ) )?
"remote_initialdir" :
"initialdir";
*/
String universe = (String) job.condorVariables.get( Condor.UNIVERSE_KEY );
return ( universe.equals( Condor.STANDARD_UNIVERSE ) ||
universe.equals( Condor.LOCAL_UNIVERSE) ||
universe.equals( Condor.SCHEDULER_UNIVERSE ) )?
"initialdir" :
"remote_initialdir";
}
/**
* Returns a boolean indicating whether to remove remote directory
* information or not from the job. This is determined on the basis of the
* style key that is associated with the job.
*
* @param job the job in question.
*
* @return boolean
*/
private boolean removeDirectoryKey(Job job){
String style = job.vdsNS.containsKey(Pegasus.STYLE_KEY) ?
null :
(String)job.vdsNS.get(Pegasus.STYLE_KEY);
//is being run. Remove remote_initialdir if there
//condor style associated with the job
//Karan Nov 15,2005
return (style == null)?
false:
style.equalsIgnoreCase(Pegasus.CONDOR_STYLE);
}
/**
* Constructs a condor variable in the condor profile namespace
* associated with the job. Overrides any preexisting key values.
*
* @param job contains the job description.
* @param key the key of the profile.
* @param value the associated value.
*/
private void construct(Job job, String key, String value){
job.condorVariables.construct(key,value);
}
/**
* Writes out the list of filenames file for the job.
*
* @param files the list of <code>PegasusFile</code> objects contains the files
* whose stat information is required.
* @param job the job
* @param suffix the suffix to be applied to files
*
* @return the full path to lof file created, else null if no file is written out.
*/
protected String generateListofFilenamesFile( Set files, Job job, String suffix ){
//sanity check
if ( files == null || files.isEmpty() ){
return null;
}
String result = null;
//writing the stdin file
try {
File f = new File( job.getFileFullPath(mSubmitDir, suffix) );
FileWriter input;
input = new FileWriter( f );
PegasusFile pf;
for( Iterator it = files.iterator(); it.hasNext(); ){
pf = ( PegasusFile ) it.next();
String lfn= pf.getLFN();
StringBuilder sb = new StringBuilder();
//to make sure that kickstart generates lfn attribute in statcall
//element
sb.append( lfn ).append( "=" ).
append( lfn ).append( "\n" );
input.write( sb.toString() );
}
//close the stream
input.close();
result = f.getAbsolutePath();
} catch ( IOException e) {
mLogger.log("Unable to write the lof file for job " + job.getID() + " with suffix " + suffix , e ,
LogManager.ERROR_MESSAGE_LEVEL);
}
return result;
}
/**
* Adds contents to an output stream.
* @param src
* @param out
* @throws java.io.IOException
*/
private void addToFile( File src, OutputStream out ) throws IOException{
InputStream in = new FileInputStream(src);
// Transfer bytes from in to out
byte[] buf = new byte[1024];
int len;
while ((len = in.read(buf)) > 0) {
out.write(buf, 0, len);
}
in.close();
}
/**
* Returns the directory in which the job executes on the worker node.
*
* @param job
*
* @return the full path to the directory where the job executes
*/
public String getWorkerNodeDirectory( Job job ){
StringBuffer workerNodeDir = new StringBuffer();
String destDir = mSiteStore.getEnvironmentVariable( job.getSiteHandle() , "wntmp" );
destDir = ( destDir == null ) ? "/tmp" : destDir;
String relativeDir = mPOptions.getRelativeDirectory();
workerNodeDir.append( destDir ).append( File.separator ).
append( relativeDir.replaceAll( "/" , "-" ) ).
//append( File.separator ).append( job.getCompleteTCName().replaceAll( ":[:]*", "-") );
append( "-" ).append( job.getID() );
return workerNodeDir.toString();
}
public void useFullPathToGridStarts(boolean fullPath) {
throw new UnsupportedOperationException("Not supported yet.");
}
}