/** * Copyright 2007-2008 University Of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.isi.pegasus.planner.transfer.implementation; import edu.isi.pegasus.planner.catalog.site.classes.GridGateway; import edu.isi.pegasus.planner.catalog.site.classes.SiteCatalogEntry; import edu.isi.pegasus.planner.classes.Job; import edu.isi.pegasus.planner.classes.TransferJob; import edu.isi.pegasus.planner.classes.PegasusBag; import edu.isi.pegasus.common.logging.LogManager; import edu.isi.pegasus.planner.transfer.MultipleFTPerXFERJob; import edu.isi.pegasus.planner.catalog.transformation.TransformationCatalogEntry; import edu.isi.pegasus.planner.catalog.transformation.classes.TCType; import edu.isi.pegasus.common.util.Separator; import edu.isi.pegasus.planner.classes.FileTransfer; import java.io.File; import java.io.FileWriter; import java.util.Collection; import java.util.HashSet; import java.util.List; import edu.isi.pegasus.planner.classes.Profile; import java.util.Iterator; /** * An abstract implementation for implementations that can handle multiple * file transfers in a single file transfer job. * * @author Karan Vahi * @version $Revision$ */ public abstract class AbstractMultipleFTPerXFERJob extends Abstract implements MultipleFTPerXFERJob { /** * The overloaded constructor, that is called by the Factory to load the * class. * * @param bag the bag of Pegasus initialization objects */ public AbstractMultipleFTPerXFERJob( PegasusBag bag ) { super( bag ); } /** * Constructs a general transfer job that handles multiple transfers per * transfer job. There are appropriate callouts to generate the implementation * specific details. * * @param job the Job object for the job, in relation to which * the transfer node is being added. Either the transfer * node can be transferring this jobs input files to * the execution pool, or transferring this job's output * files to the output pool. * @param site the site where the transfer job should run. * @param files collection of <code>FileTransfer</code> objects * representing the data files and staged executables to be * transferred. * @param execFiles subset collection of the files parameter, that identifies * the executable files that are being transferred. * @param txJobName the name of transfer node. * @param jobClass the job Class for the newly added job. Can be one of the * following: * stage-in * stage-out * inter-pool transfer * stage-in worker transfer * * @return the created TransferJob. */ public TransferJob createTransferJob( Job job, String site, Collection files, Collection execFiles, String txJobName, int jobClass ) { TransferJob txJob = new TransferJob(); SiteCatalogEntry ePool; GridGateway jobmanager; String tPool = site; String ntptSite = this.getNonThirdPartySite(job, files, jobClass ); if( ntptSite == null && jobClass != Job.STAGE_IN_WORKER_PACKAGE_JOB ){ throw new RuntimeException( "Unable to determine the non third party site for transfer job " + txJobName + " of type " + jobClass ); } mLogger.log( "Non Third Party Transfer site for transfer job " + txJobName + " is " + ntptSite, LogManager.DEBUG_MESSAGE_LEVEL ); txJob.setNonThirdPartySite( ntptSite ); //we first check if there entry for transfer universe, //if no then go for globus ePool = mSiteStore.lookup( tPool ); txJob.jobName = txJobName; txJob.executionPool = tPool; txJob.setUniverse( GridGateway.JOB_TYPE.transfer.toString() ); //PM-833 set the relative submit directory for the transfer //job based on the associated file factory txJob.setRelativeSubmitDirectory( this.mSubmitDirFactory.getRelativeDir( txJob )); TransformationCatalogEntry tcEntry = this.getTransformationCatalogEntry( tPool, jobClass ); if(tcEntry == null){ //should throw a TC specific exception StringBuffer error = new StringBuffer(); error.append( "Could not find entry in TC for lfn " ).append( getCompleteTCName() ). append(" at site " ).append( txJob.getSiteHandle()); error.append( " . " ).append( "Either add an entry in the TC or make sure that PEGASUS_HOME is set as an env profile in the site catalog for site " ). append( txJob.getSiteHandle() ).append( " . " ); mLogger.log( error.toString(), LogManager.ERROR_MESSAGE_LEVEL); throw new RuntimeException( error.toString() ); } txJob.namespace = tcEntry.getLogicalNamespace(); txJob.logicalName = tcEntry.getLogicalName(); txJob.version = tcEntry.getLogicalVersion(); txJob.dvName = this.getDerivationName(); txJob.dvNamespace = this.getDerivationNamespace(); txJob.dvVersion = this.getDerivationVersion(); //this should in fact only be set // for non third party pools /* JIRA PM-277 jobmanager = ePool.selectGridGateway( GridGateway.JOB_TYPE.transfer ); txJob.globusScheduler = (jobmanager == null) ? null : jobmanager.getContact(); */ txJob.jobClass = jobClass; txJob.jobID = job.jobName; txJob.stdErr = ""; txJob.stdOut = ""; txJob.executable = tcEntry.getPhysicalTransformation(); //the i/p and o/p files remain empty //as we doing just copying urls txJob.inputFiles = new HashSet(); //to get the file stat information we need to put //the files as output files of the transfer job txJob.outputFiles = new HashSet( files ); try{ //credentials are handled generically now when the stdin is //written out txJob.stdIn = prepareSTDINAndAssociateCredentials( txJob, files, job.getSiteHandle(), jobClass ); } catch (Exception e) { StringBuffer error = new StringBuffer(); error.append( "Unable to write the stdIn file for job " ). append( txJob.getCompleteTCName() ).append( " " ).append( e.getMessage()); error.append( "Files that were being written out " ).append(files); throw new RuntimeException( error.toString(), e ); } //the profile information from the pool catalog needs to be //assimilated into the job. txJob.updateProfiles( ePool.getProfiles() ); //add any notifications specified in the transformation //catalog for the job. JIRA PM-391 txJob.addNotifications( tcEntry ); //the profile information from the transformation //catalog needs to be assimilated into the job //overriding the one from pool catalog. txJob.updateProfiles(tcEntry); //the profile information from the properties file //is assimilated overidding the one from transformation //catalog. txJob.updateProfiles(mProps); //apply the priority to the transfer job this.applyPriority(txJob); //constructing the arguments to transfer script //they only have to be incorporated after the //profile incorporation txJob.strargs = this.generateArgumentString(txJob); //PM-810 worker node exeucution is per job level now boolean addNodesForSettingXBit = !mPegasusConfiguration.jobSetupForWorkerNodeExecution(job); if(execFiles != null && addNodesForSettingXBit ){ //we need to add setup jobs to change the XBit super.addSetXBitJobs(job,txJob,execFiles); } //a callout that allows the derived transfer implementation //classes do their own specific stuff on the job this.postProcess( txJob ); return txJob; } /** * Determines the correct site to be asscociated with the transfer job, to ensure * cleanup algorithms work correctly * * @param job the associated compute job for which transfer job is being constructed * @param files the files to be transfered * @param jobClass type of transfer job * * @return */ protected String getNonThirdPartySite( Job job, Collection<FileTransfer> files , int jobClass ){ String ntptSite = null; if( jobClass == Job.INTER_POOL_JOB ){ //PM-936 the non third party site for inter site transfer should be //set to be the site handle of the parent job where the inputs are //coming from, to ensure inplace cleanup job is a child to //inter site transfer job for( Iterator it = files.iterator(); it.hasNext(); ){ FileTransfer ft = (FileTransfer)it.next(); ntptSite = ft.getSourceURL().getKey(); break; } } else{ //the non third party site for the transfer job is //always the job execution site for which the transfer //job is being created. ntptSite = job.getStagingSiteHandle() ; } return ntptSite; } /** * Returns a default TC entry to be used in case entry is not found in the * transformation catalog. * * @param namespace the namespace of the transfer transformation * @param name the logical name of the transfer transformation * @param version the version of the transfer transformation * @param executableBasename the basename of the executable * @param site the site for which the default entry is required. * * * @return the default entry. */ protected TransformationCatalogEntry defaultTCEntry( String namespace, String name, String version, String executableBasename, String site ){ TransformationCatalogEntry defaultTCEntry = null; //check if PEGASUS_HOME is set String home = mSiteStore.getPegasusHome( site ); mLogger.log( "Creating a default TC entry for " + Separator.combine( namespace, name, version ) + " at site " + site, LogManager.DEBUG_MESSAGE_LEVEL ); //if home is still null if ( home == null ){ //cannot create default TC mLogger.log( "Unable to create a default entry for " + Separator.combine( namespace, name, version ) + " as PEGASUS_HOME or VDS_HOME is not set in Site Catalog" , LogManager.DEBUG_MESSAGE_LEVEL ); //set the flag back to true return defaultTCEntry; } //get the essential environment variables required to get //it to work correctly List envs = this.getEnvironmentVariables( site ); if( envs == null ){ //cannot create default TC mLogger.log( "Unable to create a default entry for as could not construct necessary environment " + Separator.combine( namespace, name, version ) , LogManager.DEBUG_MESSAGE_LEVEL ); //set the flag back to true return defaultTCEntry; } //remove trailing / if specified home = ( home.charAt( home.length() - 1 ) == File.separatorChar )? home.substring( 0, home.length() - 1 ): home; //construct the path to it StringBuffer path = new StringBuffer(); path.append( home ).append( File.separator ). append( "bin" ).append( File.separator ). append( executableBasename ); defaultTCEntry = new TransformationCatalogEntry( namespace, name, version ); defaultTCEntry.setPhysicalTransformation( path.toString() ); defaultTCEntry.setResourceId( site ); defaultTCEntry.setType( TCType.INSTALLED ); defaultTCEntry.addProfiles( envs ); defaultTCEntry.setSysInfo( this.mSiteStore.lookup( site ).getSysInfo() ); //register back into the transformation catalog //so that we do not need to worry about creating it again try{ mTCHandle.insert( defaultTCEntry , false ); } catch( Exception e ){ //just log as debug. as this is more of a performance improvement //than anything else mLogger.log( "Unable to register in the TC the default entry " + defaultTCEntry.getLogicalTransformation() + " for site " + site, e, LogManager.DEBUG_MESSAGE_LEVEL ); } mLogger.log( "Created entry with path " + defaultTCEntry.getPhysicalTransformation(), LogManager.DEBUG_MESSAGE_LEVEL ); return defaultTCEntry; } /** * Returns the environment profiles that are required for the default * entry to sensibly work. * * @param site the site where the job is going to run. * * @return List of environment variables, else null in case where the * required environment variables could not be found. */ protected abstract List<Profile> getEnvironmentVariables( String site ); /** * An optional method that allows the derived classes to do their own * post processing on the the transfer job before it is returned to * the calling module. * * @param job the <code>TransferJob</code> that has been created. */ public void postProcess( TransferJob job ){ //JIRA PM-538 // change the type of stage worker job back to stage in job if( job.getJobType() == Job.STAGE_IN_WORKER_PACKAGE_JOB ){ job.setJobType( Job.STAGE_IN_JOB ); } } /** * Prepares the stdin for the transfer job. Usually involves writing out a * text file that Condor transfers to the remote end. Additionally, it associates * credentials with the job that are requried to for the transfers. * * @param name the name of the transfer job. * @param files Collection of <code>FileTransfer</code> objects containing * the information about sourceam fin and destURL's. * @param stagingSite the site where the data will be populated by first * level staging jobs. * @param jobClass the job Class for the newly added job. Can be one of the * following: * stage-in * stage-out * inter-pool transfer * * @return the path to the prepared stdin file. * * @throws Exception in case of error. */ protected String prepareSTDINAndAssociateCredentials(TransferJob job, Collection files, String stagingSite, int jobClass )throws Exception{ //writing the stdin file FileWriter stdIn; String basename = job.getName() + ".in"; //PM-833 the .in file is written in the same directory //where the submit file for the job will be written out File dir = new File(mPOptions.getSubmitDirectory(), job.getRelativeSubmitDirectory() ); stdIn = new FileWriter(new File( dir, basename)); writeStdInAndAssociateCredentials(job, stdIn, files, stagingSite, jobClass ); //close the stdin stream stdIn.close(); return basename; } /** * Returns the namespace of the derivation that this implementation * refers to. * * @return the namespace of the derivation. */ protected abstract String getDerivationNamespace(); /** * Returns the logical name of the derivation that this implementation * refers to. * * @return the name of the derivation. */ protected abstract String getDerivationName(); /** * Returns the version of the derivation that this implementation * refers to. * * @return the version of the derivation. */ protected abstract String getDerivationVersion(); /** * It constructs the arguments to the transfer executable that need to be passed * to the executable referred to in this transfer mode. * * @param job the object containing the transfer node. * @return the argument string */ protected abstract String generateArgumentString(TransferJob job); /** * Writes to a FileWriter stream the stdin which goes into the magic script * via standard input * * @param job the transfer job . * @param stdIn the writer to the stdin file. * @param files Collection of <code>FileTransfer</code> objects containing * the information about sourceam fin and destURL's. * @param stagingSite the site where the data will be populated by first * level staging jobs. * @param jobClass the job Class for the newly added job. Can be one of the * following: * stage-in * stage-out * inter-pool transfer * * @throws Exception */ protected abstract void writeStdInAndAssociateCredentials( TransferJob job, FileWriter stdIn, Collection files, String stagingSite, int jobClass ) throws Exception ; /** * Returns the complete name for the transformation that the implementation * is using.. * * @return the complete name. */ protected abstract String getCompleteTCName(); }