/** * Copyright 2007-2008 University Of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.isi.pegasus.planner.parser.pdax; import edu.isi.pegasus.common.logging.LogManagerFactory; import edu.isi.pegasus.planner.code.CodeGeneratorFactory; import edu.isi.pegasus.planner.code.GridStartFactory; import edu.isi.pegasus.planner.classes.ADag; import edu.isi.pegasus.planner.classes.PlannerOptions; import edu.isi.pegasus.planner.classes.Job; import edu.isi.pegasus.common.logging.LogManager; import edu.isi.pegasus.common.util.CondorVersion; import edu.isi.pegasus.planner.common.PegasusProperties; import edu.isi.pegasus.common.util.StreamGobbler; import edu.isi.pegasus.common.util.DefaultStreamGobblerCallback; import edu.isi.pegasus.common.util.StreamGobblerCallback; import edu.isi.pegasus.planner.namespace.Pegasus; import edu.isi.pegasus.planner.partitioner.Partition; import edu.isi.pegasus.planner.partitioner.DAXWriter; import edu.isi.pegasus.planner.catalog.TransformationCatalog; import edu.isi.pegasus.planner.catalog.transformation.TransformationCatalogEntry; import edu.isi.pegasus.planner.catalog.transformation.TransformationFactory; import org.griphyn.vdl.euryale.FileFactory; import org.griphyn.vdl.euryale.HashedFileFactory; import org.griphyn.vdl.euryale.FlatFileFactory; import edu.isi.pegasus.planner.catalog.transformation.classes.TCType; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.PrintWriter; import java.io.BufferedWriter; import java.io.FilenameFilter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.regex.Pattern; import java.text.NumberFormat; import java.text.DecimalFormat; import edu.isi.pegasus.planner.namespace.Condor; import edu.isi.pegasus.planner.namespace.ENV; /** * This callback ends up creating the megadag that contains the smaller dags * each corresponding to the one level as identified in the pdax file * generated by the partitioner. * * @author Karan Vahi * @version $Revision$ */ public class PDAX2MDAG implements Callback { /** * The SubmitWriter that has to be loaded for now. */ public static final String CODE_GENERATOR_CLASS = CodeGeneratorFactory.CONDOR_CODE_GENERATOR_CLASS; /** * The prefix for the submit directory. */ public static final String SUBMIT_DIRECTORY_PREFIX = "run"; /** * The number of jobs into which each job in the partition graph is * expanded to. */ public static final int NUM_OF_EXPANDED_JOBS = 2; /** * The index of the head job. */ public static final int HEAD_INDEX = 0; /** * The index of the tail job. */ public static final int TAIL_INDEX = 1; /** * The logical name with which to query the transformation catalog for * cPlanner executable. */ public static final String CPLANNER_LOGICAL_NAME = "pegasus-plan"; /** * The namespace to use for condor dagman. */ public static final String CONDOR_DAGMAN_NAMESPACE = "condor"; /** * The logical name with which to query the transformation catalog for the * condor_dagman executable, that ends up running the mini dag as one * job. */ public static final String CONDOR_DAGMAN_LOGICAL_NAME = "dagman"; /** * The namespace to which the job in the MEGA DAG being created refer to. */ public static final String NAMESPACE = "pegasus"; /** * The planner utility that needs to be called as a prescript. */ public static final String RETRY_LOGICAL_NAME = "pegasus-plan"; /** * The dagman knobs controlled through property. They map the property name to * the corresponding dagman option. */ public static final String DAGMAN_KNOBS[][]={ { "pegasus.dagman.maxpre", " -MaxPre " }, { "pegasus.dagman.maxpost", " -MaxPost " }, { "pegasus.dagman.maxjobs", " -MaxJobs " }, { "pegasus.dagman.maxidle", " -MaxIdle " }, }; /** * The file Separator to be used on the submit host. */ protected static char mSeparator = File.separatorChar; /** * The directory in which the daxes corresponding to the partitions are * kept. This should be the same directory where the pdax containing the * partition graph resides. */ private String mPDAXDirectory; /** * The root of the submit directory where all the submit directories for * the various partitions reside. */ private String mSubmitDirectory; /** * The abstract dag object that ends up holding the megadag. */ private ADag mMegaDAG; /** * The internal map that maps the partition id to the job responsible * for executing the partition.. */ private Map mJobMap; /** * The internal map that contains maps the job id of the partition to the * head and tail jobs in the linear sequence of jobs to which the partion * job is expanded to. */ //private Map mSequenceMap; /** * The handle to the properties file. */ private PegasusProperties mProps; /** * The handle to the transformation catalog. */ private TransformationCatalog mTCHandle; /** * The handle to the logging object. */ private LogManager mLogger; /** * The object containing the options that were given to the concrete * planner at runtime. */ private PlannerOptions mPOptions; /** * The path to the properties file that is written out and shared by * all partitions in the mega DAG. */ private String mMDAGPropertiesFile; /** * The handle to the file factory, that is used to create the top level * directories for each of the partitions. */ private FileFactory mFactory; /** * An instance of the default stream gobbler callback implementation that * is used for creating symbolic links. */ private StreamGobblerCallback mDefaultCallback; /** * The number formatter to format the run submit dir entries. */ private NumberFormat mNumFormatter; /** * The user name of the user running Pegasus. */ private String mUser; /** * A flag to store whether the parsing is complete or not. */ private boolean mDone; /** * Any extra arguments that need to be passed to dagman, as determined * from the properties file. */ private String mDAGManKnobs; /** * The long value of condor version. */ private long mCondorVersion; /** * The cleanup scope for the workflows. */ private PegasusProperties.CLEANUP_SCOPE mCleanupScope; /** * Bag of initialization objects. */ //private PegasusBag mBag; /** * The overloaded constructor. * * @param directory the directory where the pdax and all the daxes * corresponding to the partitions reside. * @param properties the <code>PegasusProperties</code> to be used. * @param options the options passed to the planner. */ public PDAX2MDAG( String directory, PegasusProperties properties, PlannerOptions options) { mPDAXDirectory = directory; mProps = properties; mLogger = LogManagerFactory.loadSingletonInstance( properties ); mPOptions = options; mTCHandle = TransformationFactory.loadInstance( mProps, mLogger ); mMDAGPropertiesFile = null; mNumFormatter = new DecimalFormat( "0000" ); mDone = false; mUser = mProps.getProperty( "user.name" ) ; if ( mUser == null ){ mUser = "user"; } //the default gobbler callback always log to debug level mDefaultCallback = new DefaultStreamGobblerCallback(LogManager.DEBUG_MESSAGE_LEVEL); mDAGManKnobs = constructDAGManKnobs( properties ); mCleanupScope = mProps.getCleanupScope(); mCondorVersion = CondorVersion.getInstance( mLogger ).numericValue(); if( mCondorVersion == -1 ){ mLogger.log( "Unable to determine the version of condor " , LogManager.WARNING_MESSAGE_LEVEL ); } else{ mLogger.log( "Condor Version detected is " + mCondorVersion , LogManager.DEBUG_MESSAGE_LEVEL ); } } /** * Checks the destination location for existence, if it can * be created, if it is writable etc. * * @param dir is the new base directory to optionally create. * * @throws IOException in case of error while writing out files. */ protected static void sanityCheck( File dir ) throws IOException{ if ( dir.exists() ) { // location exists if ( dir.isDirectory() ) { // ok, isa directory if ( dir.canWrite() ) { // can write, all is well return; } else { // all is there, but I cannot write to dir throw new IOException( "Cannot write to existing directory " + dir.getPath() ); } } else { // exists but not a directory throw new IOException( "Destination " + dir.getPath() + " already " + "exists, but is not a directory." ); } } else { // does not exist, try to make it if ( ! dir.mkdirs() ) { throw new IOException( "Unable to create directory destination " + dir.getPath() ); } } } /** * Callback when the opening tag was parsed. This contains all * attributes and their raw values within a map. This callback can * also be used to initialize callback-specific resources. * * @param attributes is a map of attribute key to attribute value */ public void cbDocument(Map attributes) { mMegaDAG = new ADag(); mJobMap = new HashMap(); //mSequenceMap = new HashMap(); //the name of the mega dag is set to the name //attribute in the pdax mMegaDAG.setLabel( (String)attributes.get("name") ); mMegaDAG.setCount( (String)attributes.get("count") ); mMegaDAG.setIndex( (String)attributes.get("index")); // create files in the directory, unless anything else is known. try { //create a submit directory structure if required String relativeDir = ( mPOptions.getRelativeDirectory() == null ) ? this.createSubmitDirectory( mMegaDAG.getLabel(), mPOptions.getSubmitDirectory(), mUser, mPOptions.getVOGroup(), mProps.useTimestampForDirectoryStructure() ): mPOptions.getRelativeDirectory(); //set the directory structure mPOptions.setSubmitDirectory( mPOptions.getBaseSubmitDirectory(), relativeDir); mSubmitDirectory = mPOptions.getSubmitDirectory(); //we want to set the relative directory as the base working //directory for all the partition on the remote sites. mPOptions.setRandomDir( relativeDir ); mFactory = new FlatFileFactory(mSubmitDirectory); // minimum default } catch ( IOException ioe ) { throw new RuntimeException( "Unable to generate files in the submit directory " , ioe ); } // not in the PDAX format currently String s = (String) attributes.get("partitionCount"); // create hashed, and levelled directories try { HashedFileFactory temp = null; int partCount = ( s == null ) ? //determine at runtime the number of partitions getPartitionCount(mPOptions.getPDAX()) : Integer.parseInt(s) ; //if ( m_minlevel > 0 && m_minlevel > jobCount ) jobCount = m_minlevel; if ( partCount > 0 )temp = new HashedFileFactory( mSubmitDirectory, partCount ); else temp = new HashedFileFactory( mPDAXDirectory ); //each job creates at creates the following files // - submit file // - out file // - error file // - prescript log // - the partition directory temp.setMultiplicator(5); //we want a minimum of one level always for clarity temp.setLevels(1); //for the time being and test set files per directory to 50 /* temp.setFilesPerDirectory( 40 ); temp.setLevelsFromTotals(partCount); */ mFactory = temp; //write out all the properties into a temp file //in the root submit directory //mMDAGPropertiesFile = writeOutProperties( mSubmitDirectory ); mMDAGPropertiesFile = mProps.writeOutProperties( mSubmitDirectory ); } catch ( NumberFormatException nfe ) { String error = ( s == null ) ? "Unspecified number for jobCount": "Illegal number \"" + s + "\" for partition count"; throw new RuntimeException( error ); } catch ( IOException e ) { //figure out where error happened String message = (mMDAGPropertiesFile == null)? "Unable to write out properties file in base submit directory": "Base directory creation"; //wrap into runtime and throw throw new RuntimeException( message, e ); } } /** * Callback for the partition . These partitions are completely * assembled, but each is passed separately. * * @param partition is the PDAX-style partition. */ public void cbPartition(Partition partition) { String name = partition.getName(); int index = partition.getIndex(); ArrayList sequenceList = new ArrayList(NUM_OF_EXPANDED_JOBS); String tailJob; Job job; //get the filename of the dax file containing the partition String dax = DAXWriter.getPDAXFilename(name, index); //construct the path to the file dax = mPDAXDirectory + File.separator + dax; File partitionDirectory; try{ partitionDirectory = mFactory.createFile( getBaseName(partition) ); partitionDirectory.mkdirs(); //construct a symlink to the dax file in the partition directory if (!createSymlink( dax, partitionDirectory)){ mLogger.log("Unable to create symlinks of the dax file to submit dir", LogManager.WARNING_MESSAGE_LEVEL); } } catch(IOException e){ //wrap and throw throw new RuntimeException( "Unable to create partition submit directory ", e ); } //construct the appropriate vds-submit-dag job with the //prescript set as an invocation to gencdag etc. job = constructDAGJob( partition , partitionDirectory, dax); //add to the workflow mMegaDAG.add(job); //map the partition id to the the job that is constructed. mJobMap.put(partition.getID(),job); /** String jobName = getPegasusJobName(name,index); //populate the internal job map with jobname and id mJobMap.put(partition.getID(),getPegasusJobName(name,index)); //add the sub info for it job = constructPegasusJob(jobName, file); mMegaDAG.add(job); //generate the dagman job that ends up submitting //the mini dag corresponding to the partition //mMegaDAG.addNewJob(getJobName(name,index)); tailJob = "condor_submit_" + jobName ; job = constructCondorSubmitJob(tailJob,name,index); mMegaDAG.add(job); //put the sequence list sequenceList.add(HEAD_INDEX,jobName); sequenceList.add(TAIL_INDEX,tailJob); mSequenceMap.put(jobName,sequenceList); //add the relation between jobname and tail job mMegaDAG.addNewRelation(jobName,tailJob); */ } /** * Callback for child and parent relationships from section 3. This ties * in the relations between the partitions to the relations between the jobs * that are responsible for partitions. In addition, appropriate cache * file arguments are generated. * * @param child is the IDREF of the child element. * @param parents is a list of IDREFs of the included parents. */ public void cbParents(String child, List parents) { String cacheName; String cacheArgs = null; //get hold of the constructed job for the child. //the name of the jobs are treated as ID's Job cJob = getJob(child); String cID = cJob.getName(); //glue in the sequences for the expanded things together if(!parents.isEmpty()){ //the megadag should not be invoked with cache option for time being cacheArgs = " --cache "; } //traverse through the parents to put in the relations //and the cache file arguments. String pID; Job pJob; for(Iterator it = parents.iterator();it.hasNext();){ //get the parent job and name pJob = (Job)mJobMap.get(it.next()); pID = pJob.getName(); mLogger.log("Adding Relation " + pID + "->" + cID, LogManager.DEBUG_MESSAGE_LEVEL); mMegaDAG.addNewRelation(pID,cID); //we need to specify the cache files for those partitions //even if they are not constructed. there is a disconnect //as to how the names are being generated. There should be //a call to one function only. cacheName = getCacheFilePath(pJob); cacheArgs += cacheName + ","; } //stuff the arguments back into replanner prescript. //should be a callout to a different function for portability String args = cJob.getPreScriptArguments(); //System.out.println("Arguments are " + args); cJob.setPreScript( cJob.getPreScriptPath(), (cacheArgs == null)? //remains the same args: //remove the last instance of , from cache args args + cacheArgs.substring(0,cacheArgs.lastIndexOf(',')) ); } /** * Callback when the parsing of the document is done. This ends up * triggering the writing of the condor submit files corresponding to the * mega dag. */ public void cbDone() { mDone = true; //generate the classad's options //for the Mega DAG mMegaDAG.generateFlowName(); mMegaDAG.setFlowTimestamp( mPOptions.getDateTime( mProps.useExtendedTimeStamp() )); mMegaDAG.setDAXMTime( new File( mPOptions.getPDAX() ) ); mMegaDAG.generateFlowID(); mMegaDAG.setReleaseVersion(); /* Moved to CPlanner Class Karan Apr 1 2008 CodeGenerator codeGenerator = null; int state = 0; try{ //load the Condor Writer that understands HashedFile Factories. codeGenerator = CodeGeneratorFactory.loadInstance( mBag, CODE_GENERATOR_CLASS ); state = 1; codeGenerator.generateCode( mMegaDAG ); //generate only the braindump file that is required. //no spawning off the tailstatd for time being codeGenerator.startMonitoring(); } catch( FactoryException fe ){ throw new FactoryException("PDAX2MDAG",fe); } catch( Exception e){ throw new RuntimeException("Error while generating code for the workflow",e); } */ } /** * Returns the MEGADAG that is generated * * @return ADag object containing the mega daga */ public Object getConstructedObject(){ if(!mDone) throw new RuntimeException("Method called before the megadag " + " was fully generated"); return mMegaDAG; } /** * Constructs a job that plans and submits the partitioned workflow, * referred to by a Partition. The main job itself is a condor dagman job * that submits the concrete workflow. The concrete workflow is generated by * running the planner in the prescript for the job. * * @param partition the partition corresponding to which the job has to be * constructed. * @param directory the submit directory where the submit files for the * partition should reside. * @param dax the absolute path to the partitioned dax file that * corresponds to this partition. * * @return the constructed DAG job. */ protected Job constructDAGJob( Partition partition , File directory, String dax){ //for time being use the old functions. Job job = new Job(); //the parent directory where the submit file for condor dagman has to //reside. the submit files for the corresponding partition are one level //deeper. String parentDir = directory.getParent(); //set the logical transformation job.setTransformation(CONDOR_DAGMAN_NAMESPACE, CONDOR_DAGMAN_LOGICAL_NAME, null); //set the logical derivation attributes of the job. job.setDerivation(CONDOR_DAGMAN_NAMESPACE, CONDOR_DAGMAN_LOGICAL_NAME, null); //always runs on the submit host job.setSiteHandle("local"); //set the partition id only as the unique id //for the time being. // job.setName(partition.getID()); //set the logical id for the job same as the partition id. job.setLogicalID(partition.getID()); //figure out the relative submit directory where the dagman job should //reside. It should be one level up from the partition directory. String dir = ""; dir += (parentDir.equals(mSubmitDirectory))? //the directory is same as the root dir : //get the relative from root parentDir.substring(mSubmitDirectory.length() ); // job.setSubmitDirectory(dir); //construct the name of the job as a deep lfn with a directory path StringBuffer name = new StringBuffer(); //get the part from the first file separator onwards name.append( (dir.indexOf(File.separatorChar) == 0) ? dir.substring(1) : dir.substring(0)); //append a file separator in the end if dir was some name if( dir.length() > 1) {name.append(File.separatorChar);} //set the basename for the deep lfn name.append(partition.getID()); //System.out.println (" The name is " + name.toString()); job.setName(name.toString()); List entries; TransformationCatalogEntry entry = null; //get the path to condor dagman try{ //try to construct the path from the environment entry = constructTCEntryFromEnvironment( ); //try to construct from the TC if( entry == null ){ entries = mTCHandle.lookup(job.namespace, job.logicalName, job.version, job.getSiteHandle(), TCType.INSTALLED); entry = (entries == null) ? defaultTCEntry( "local") ://construct from site catalog //Gaurang assures that if no record is found then //TC Mechanism returns null (TransformationCatalogEntry) entries.get(0); } } catch(Exception e){ throw new RuntimeException( "ERROR: While accessing the Transformation Catalog",e); } if(entry == null){ //throw appropriate error throw new RuntimeException("ERROR: Entry not found in tc for job " + job.getCompleteTCName() + " on site " + job.getSiteHandle()); } //set the path to the executable and environment string job.executable = entry.getPhysicalTransformation(); //the environment variable are set later automatically from the tc //job.envVariables = entry.envString; //the job itself is the main job of the super node //construct the classad specific information job.jobID = job.getName(); job.jobClass = Job.COMPUTE_JOB; //directory where all the dagman related files for the nested dagman //reside. Same as the directory passed as an input parameter dir = directory.getAbsolutePath(); //make the initial dir point to the submit file dir for the partition //we can do this as we are running this job both on local host, and scheduler //universe. Hence, no issues of shared filesystem or anything. job.condorVariables.construct("initialdir", dir); //construct the argument string, with all the dagman files //being generated in the partition directory. Using basenames as //initialdir has been specified for the job. StringBuffer sb = new StringBuffer(); sb.append(" -f -l . -Debug 3"). append(" -Lockfile ").append( getBasename( partition, ".dag.lock") ). append(" -Dag ").append( getBasename( partition, ".dag")); //specify condor log for condor version less than 7.1.2 if( mCondorVersion < CondorVersion.v_7_1_2 ){ sb.append(" -Condorlog ").append(getBasename( partition, ".log")); } //allow for version mismatch as after 7.1.3 condor does tight //checking on dag.condor.sub file and the condor version used if( mCondorVersion >= CondorVersion.v_7_1_3 ){ sb.append( " -AllowVersionMismatch " ); } //we append the Rescue DAG option only if old version //of Condor is used < 7.1.0. To detect we check for a non //zero value of --rescue option to pegasus-plan //Karan June 27, 2007 mLogger.log( "Number of Resuce retries " + mPOptions.getNumberOfRescueTries() , LogManager.DEBUG_MESSAGE_LEVEL ); if( mCondorVersion >= CondorVersion.v_7_1_0 || mPOptions.getNumberOfRescueTries() > 0 ){ mLogger.log( "Constructing arguments to dagman in 7.1.0 and later style", LogManager.DEBUG_MESSAGE_LEVEL ); sb.append( " -AutoRescue 1 -DoRescueFrom 0 "); } else{ mLogger.log( "Constructing arguments to dagman in pre 7.1.0 style", LogManager.DEBUG_MESSAGE_LEVEL ); sb.append(" -Rescue ").append(getBasename( partition, ".dag.rescue")); } //pass any dagman knobs that were specified in properties file sb.append( this.mDAGManKnobs ); //put in the environment variables that are required job.envVariables.construct("_CONDOR_DAGMAN_LOG", getAbsolutePath( partition, dir,".dag.dagman.out")); job.envVariables.construct("_CONDOR_MAX_DAGMAN_LOG","0"); //set the arguments for the job job.setArguments(sb.toString()); //the environment need to be propogated for exitcode to be picked up job.condorVariables.construct("getenv","TRUE"); job.condorVariables.construct("remove_kill_sig","SIGUSR1"); //the log file for condor dagman for the dagman also needs to be created //it is different from the log file that is shared by jobs of //the partition. That is referred to by Condorlog // keep the log file common for all jobs and dagman albeit without // dag.dagman.log suffix // job.condorVariables.construct("log", getAbsolutePath( partition, dir,".dag.dagman.log")); // String dagName = mMegaDAG.dagInfo.nameOfADag; // String dagIndex= mMegaDAG.dagInfo.index; // job.condorVariables.construct("log", dir + mSeparator + // dagName + "_" + dagIndex + ".log"); //the job needs to be explicitly launched in //scheduler universe instead of local universe job.condorVariables.construct( Condor.UNIVERSE_KEY, Condor.SCHEDULER_UNIVERSE ); //add any notifications specified in the transformation //catalog for the job. JIRA PM-391 job.addNotifications( entry ); //incorporate profiles from the transformation catalog //and properties for the time being. Not from the site catalog. //the profile information from the transformation //catalog needs to be assimilated into the job //overriding the one from pool catalog. job.updateProfiles( entry ); //the profile information from the properties file //is assimilated overidding the one from transformation //catalog. job.updateProfiles(mProps); //constructed the main job. now construct the prescript //the log file resides in the directory where the condor_dagman //job resides i.e the parent directory. StringBuffer log = new StringBuffer(); log.append(parentDir).append(mSeparator).append(partition.getID()). append(".pre.log"); //set the prescript for the job in the dagman namespace setPrescript( job, dax, log.toString()); //construct the braindump file for tailstatd invocations //the dag should be same as the one passed in the arguments string! StringBuffer dag = new StringBuffer(); dag.append(dir).append(mSeparator).append( getBasename( partition, ".dag")); //we do not want the job to be launched via kickstart //Fix for Pegasus bug number 143 //http://bugzilla.globus.org/vds/show_bug.cgi?id=143 job.vdsNS.construct( Pegasus.GRIDSTART_KEY, GridStartFactory.GRIDSTART_SHORT_NAMES[GridStartFactory.NO_GRIDSTART_INDEX] ); return job; } /** * Returns a default TC entry to be used in case entry is not found in the * transformation catalog. * * @param site the site for which the default entry is required. * * * @return the default entry. */ private TransformationCatalogEntry defaultTCEntry( String site ){ //not implemented as we dont have handle to site catalog in this class return null; } /** * Returns a tranformation catalog entry object constructed from the environment * * An entry is constructed if either of the following environment variables * are defined * 1) CONDOR_HOME * 2) CONDOR_LOCATION * * CONDOR_HOME takes precedence over CONDOR_LOCATION * * * @return the constructed entry else null. */ private TransformationCatalogEntry constructTCEntryFromEnvironment( ){ //construct environment profiles Map<String,String> m = System.getenv(); ENV env = new ENV(); String key = "CONDOR_HOME"; if( m.containsKey( key ) ){ env.construct( key, m.get( key ) ); } key = "CONDOR_LOCATION"; if( m.containsKey( key ) ){ env.construct( key, m.get( key ) ); } return constructTCEntryFromEnvProfiles( env ); } /** * Returns a tranformation catalog entry object constructed from the environment * * An entry is constructed if either of the following environment variables * are defined * 1) CONDOR_HOME * 2) CONDOR_LOCATION * * CONDOR_HOME takes precedence over CONDOR_LOCATION * * @param env the environment profiles. * * * @return the entry constructed else null if environment variables not defined. */ private TransformationCatalogEntry constructTCEntryFromEnvProfiles( ENV env ) { TransformationCatalogEntry entry = null; //check if either CONDOR_HOME or CONDOR_LOCATION is defined String key = null; if( env.containsKey( "CONDOR_HOME") ){ key = "CONDOR_HOME"; } else if( env.containsKey( "CONDOR_LOCATION") ){ key = "CONDOR_LOCATION"; } if( key == null ){ //environment variables are not defined. return entry; } mLogger.log( "Constructing path to dagman on basis of env variable " + key, LogManager.DEBUG_MESSAGE_LEVEL ); entry = new TransformationCatalogEntry(); entry.setLogicalTransformation( CONDOR_DAGMAN_NAMESPACE, CONDOR_DAGMAN_LOGICAL_NAME, null ); entry.setType( TCType.INSTALLED ); entry.setResourceId( "local" ); //construct path to condor dagman StringBuffer path = new StringBuffer(); path.append( env.get( key ) ).append( File.separator ). append( "bin" ).append( File.separator). append( "condor_dagman" ); entry.setPhysicalTransformation( path.toString() ); return entry; } /** * Writes out the braindump.txt file for a partition in the partition submit * directory. The braindump.txt file is used for passing to the tailstatd * daemon that monitors the state of execution of the workflow. * * @param directory the directory in which the braindump file needs to * be written to. * @param partition the partition for which the braindump is to be written out. * @param dax the dax file * @param dag the dag file * * @return the absolute path to the braindump file.txt written in the directory. * * @throws IOException in case of error while writing out file. */ protected String writeOutBraindump( File directory, Partition partition, String dax, String dag) throws IOException{ //sanity check on the directory sanityCheck( directory ); //create a writer to the braindump.txt in the directory. File f = new File( directory , "braindump.txt"); PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(f))); //store absolute path to dir just once String absPath = directory.getAbsolutePath(); //assemble all the contents in a buffer before writing out StringBuffer contents = new StringBuffer(); contents.append("dax ").append(dax).append("\n"). append("dag ").append(dag).append("\n"). append("run ").append(absPath).append("\n"). append("jsd ").append(absPath).append(mSeparator).append("jobstate.log").append("\n"). append("rundir ").append(directory.getName()).append("\n"). append("bindir ").append(mProps.getBinDir()).append("\n"). append("vogroup pegasus").append("\n").//for time being append("label " + partition.getName()); writer.write( contents.toString()); writer.close(); return f.getAbsolutePath(); } /** * Writes out the properties to a temporary file in the directory passed. * * @param directory the directory in which the properties file needs to * be written to. * * @return the absolute path to the properties file written in the directory. * * @throws IOException in case of error while writing out file. */ protected String writeOutProperties( String directory ) throws IOException{ File dir = new File(directory); //sanity check on the directory sanityCheck( dir ); //we only want to write out the Pegasus properties for time being Properties properties = mProps.matchingSubset( "pegasus", true ); //create a temporary file in directory File f = File.createTempFile( "pegasus.", ".properties", dir ); //the header of the file StringBuffer header = new StringBuffer(64); header.append("PEGASUS USER PROPERTIES AT RUNTIME \n") .append("#ESCAPES IN VALUES ARE INTRODUCED"); //create an output stream to this file and write out the properties OutputStream os = new FileOutputStream(f); properties.store( os, header.toString() ); os.close(); return f.getAbsolutePath(); } /** * Sets the prescript that ends up calling to the default wrapper that * introduces retry into Pegasus for a particular job. * * @param job the job whose prescript needs to be set. * @param daxURL the path to the dax file on the filesystem. * @param log the file where the output of the prescript needs to be * redirected to. * * @see #RETRY_LOGICAL_NAME */ protected void setPrescript(Job job, String daxURL, String log){ setPrescript( job, daxURL, log, this.NAMESPACE, RETRY_LOGICAL_NAME, null); } /** * Sets the prescript that ends up calling to the default wrapper that * introduces retry into Pegasus for a particular job. * * @param job the job whose prescript needs to be set. * @param daxURL the path to the dax file on the filesystem. * @param log the file where the output of the prescript needs to be * redirected to. * @param namespace the namespace of the replanner utility. * @param name the logical name of the replanner. * @param version the version of the replanner to be picked up. * */ protected void setPrescript(Job job, String daxURL, String log, String namespace, String name, String version){ String site = job.getSiteHandle(); TransformationCatalogEntry entry = null; //get the path to script wrapper from the try{ List entries = mTCHandle.lookup(namespace, name, version, site, TCType.INSTALLED); //get the first entry from the list returned entry = ( entries == null ) ? null : //Gaurang assures that if no record is found then //TC Mechanism returns null ((TransformationCatalogEntry) entries.get(0)); } catch(Exception e){ throw new RuntimeException( "ERROR: While accessing the Transformation Catalog",e); } PlannerOptions options = ( mPOptions == null)? null : (PlannerOptions)mPOptions.clone(); if( options == null ){ throw new RuntimeException( "ERROR: Planner Options passed to setPrescript are null" ); } //construct the prescript path StringBuffer script = new StringBuffer(); if(entry == null){ //log to debug mLogger.log("Constructing the default path to the replanner for prescript", LogManager.DEBUG_MESSAGE_LEVEL); //construct the default path to the executable script.append( mProps.getBinDir() ).append( mSeparator ). append( RETRY_LOGICAL_NAME ); } else{ script.append(entry.getPhysicalTransformation()); } //the output of the prescript i.e submit files should be created //in the directory where the job is being run. options.setSubmitDirectory( (String)job.condorVariables.get("initialdir")); //generate the remote working directory for the paritition String submit = options.getSubmitDirectory(); // like /tmp/vahi/pegasus/blackdiamond/run0001/00/PID1 String remoteBase = mPOptions.getRandomDir(); // like vahi/pegasus/blackdiamond/run0001 String remoteWorkDir = submit.substring( submit.indexOf( remoteBase) ); //gets us vahi/pegasus/blackdiamond/run0001/00/PID1 //trying to use the relative dir option now, Karan April 10, 2008 // options.setRandomDir( remoteWorkDir ); // mLogger.log( "Remote working directory set to " + remoteWorkDir + // " for partition " + job.getID() , // LogManager.DEBUG_MESSAGE_LEVEL ); //set the base and relative submit dir options.setBaseSubmitDirectory( mPOptions.getBaseSubmitDirectory() ); options.setRelativeDirectory( remoteWorkDir ); //set the basename for the nested dag as the ID of the job. //which is actually the basename of the deep lfn job name!! options.setBasenamePrefix( getBasenamePrefix(job)); //set the flag designating that the planning invocation is part //of a deferred planning run options.setPartOfDeferredRun( true ); //in case of deferred planning cleanup wont work //explicitly turn it off if the file cleanup scope if fullahead if( mCleanupScope.equals( PegasusProperties.CLEANUP_SCOPE.fullahead ) ){ options.setCleanup( PlannerOptions.CLEANUP_OPTIONS.none ); } //construct the argument string. //add the jvm options and the pegasus options if any StringBuffer arguments = new StringBuffer(); arguments./*append( mPOptions.toJVMOptions())*/ append( " -Dpegasus.log.*=").append(log). //add other jvm options that user may have specified append( options.toJVMOptions() ). append(" --conf ").append( mMDAGPropertiesFile ). //the dax argument is diff for each partition append(" --dax ").append( daxURL ). //put in all the other options. append( options.toOptions()); //set the path and the arguments to prescript job.setPreScript( script.toString(), arguments.toString()); } /** * Returns the base name of the submit directory in which the submit files * for a particular partition reside. * * @param partition the partition for which the base directory is to be * constructed. * * @return the base name of the partition. */ protected String getBaseName( Partition partition ){ String id = partition.getID(); StringBuffer sb = new StringBuffer( id.length() + 1 ); sb.append('P').append(id); return sb.toString(); } /** * Returns the absolute path to a dagman (usually) related file for a * particular partition in the submit directory that is passed as an input * parameter. This does not create the file, just returns an absolute path * to it. Useful for constructing argument string for condor_dagman. * * @param partition the partition for which the dagman is responsible for * execution. * @param directory the directory where the file should reside. * @param suffix the suffix for the file basename. * * @return the absolute path to a file in the submit directory. */ protected String getAbsolutePath( Partition partition, String directory, String suffix){ StringBuffer sb = new StringBuffer(); //add a prefix P to partition id sb.append( directory ).append(mSeparator). append( getBasename( partition, suffix) ); return sb.toString(); } /** * Returns the basename of a dagman (usually) related file for a particular * partition. * * @param partition the partition for which the dagman is responsible for * execution. * @param suffix the suffix for the file basename. * * @return the basename. */ protected String getBasename( Partition partition, String suffix ){ StringBuffer sb = new StringBuffer( 16 ); //add a prefix P sb.append('P').append(partition.getID()).append( suffix ); return sb.toString(); } /** * Returns the basename prefix of a dagman (usually) related file for a * a job that submits nested dagman. * * @param job the job that submits a nested dagman. * * @return the basename. */ protected String getBasenamePrefix( Job job ){ StringBuffer sb = new StringBuffer( 8 ); //add a prefix P sb.append('P').append(job.getLogicalID()); return sb.toString(); } /** * Returns the full path to a cache file that corresponds for one partition. * The cache file resides in the submit directory for the partition for which * the job is responsible for. * * @param job the job running on the submit host that submits the partition. * * @return the full path to the file. */ protected String getCacheFilePath(Job job){ StringBuffer sb = new StringBuffer(); //cache file is being generated in the initialdir set for the job. //intialdir is set correctly to the submit directory for nested dag. sb.append(job.condorVariables.get("initialdir")). append(File.separatorChar).append(getBasenamePrefix(job)). append(".cache"); return sb.toString(); } /** * Returns the number of partitions referred to in the PDAX file. * * @param source the source file that has to be symlinked. * @param destDir the destination directory where the symlink has to be * placed. * * @return the number of partitions in the pdax file. */ protected boolean createSymlink( String source, File destDir ){ boolean result = false; //do some sanity checks on the source and the destination File f = new File( source ); if( !f.exists() || !f.canRead()){ mLogger.log("The source for symlink does not exist " + source, LogManager.ERROR_MESSAGE_LEVEL); return result; } if( !destDir.exists() || !destDir.isDirectory() || !destDir.canWrite()){ mLogger.log("The destination directory cannot be written to " + destDir, LogManager.ERROR_MESSAGE_LEVEL); return result; } try{ //set the callback and run the grep command Runtime r = Runtime.getRuntime(); String command = "ln -s " + source + " " + destDir.getAbsolutePath(); mLogger.log("Creating symlink " + command, LogManager.DEBUG_MESSAGE_LEVEL); Process p = r.exec(command); //spawn off the gobblers with the already initialized default callback StreamGobbler ips = new StreamGobbler(p.getInputStream(), mDefaultCallback); StreamGobbler eps = new StreamGobbler(p.getErrorStream(), mDefaultCallback); ips.start(); eps.start(); //wait for the threads to finish off ips.join(); eps.join(); //get the status int status = p.waitFor(); if( status != 0){ mLogger.log("Command " + command + " exited with status " + status, LogManager.DEBUG_MESSAGE_LEVEL); return result; } result = true; } catch(IOException ioe){ mLogger.log("IOException while creating symbolic links ", ioe, LogManager.ERROR_MESSAGE_LEVEL); } catch( InterruptedException ie){ //ignore } return result; } /** * Returns the number of partitions referred to in the PDAX file. * * @param pdax the path to the pdax file. * * @return the number of partitions in the pdax file. */ protected int getPartitionCount( String pdax ){ int result = 0; File f = new File( pdax ); if( !f.exists() || !f.canRead()){ throw new RuntimeException( "PDAX File is unreadable " + pdax); } try{ //set the callback and run the grep command String word = "<partition"; GrepCallback c = new GrepCallback(word); Runtime r = Runtime.getRuntime(); String env[] = {"PATH=/bin:/usr/bin"}; String command = "grep " + word + " " + pdax; Process p = r.exec(command, env); //spawn off the gobblers StreamGobbler ips = new StreamGobbler(p.getInputStream(), c); StreamGobbler eps = new StreamGobbler(p.getErrorStream(), new StreamGobblerCallback(){ //we cannot log to any of the default stream LogManager mLogger = LogManagerFactory.loadSingletonInstance(); public void work(String s){ mLogger.log("Output on stream gobller error stream " + s,LogManager.DEBUG_MESSAGE_LEVEL); } }); ips.start(); eps.start(); //wait for the threads to finish off ips.join(); result = c.getCount(); eps.join(); //get the status int status = p.waitFor(); if( status != 0){ mLogger.log("Command " + command + " exited with status " + status, LogManager.WARNING_MESSAGE_LEVEL); } } catch(IOException ioe){ mLogger.log("IOException while determining partition count ", ioe, LogManager.ERROR_MESSAGE_LEVEL); } catch( InterruptedException ie){ //ignore } return result; } /** * Returns the job that has been constructed for a particular partition. * * @param id the partition id. * * @return the corresponding job, else null if not found. */ protected Job getJob(String id){ Object obj = mJobMap.get(id); return (obj == null)?null:(Job)obj; } /** * Creates the submit directory for the workflow. This is not thread safe. * * @param label the label of the workflow being worked upon. * @param dir the base directory specified by the user. * @param user the username of the user. * @param vogroup the vogroup to which the user belongs to. * @param timestampBased boolean indicating whether to have a timestamp based dir or not * * @return the directory name created relative to the base directory passed * as input. * * @throws IOException in case of unable to create submit directory. */ protected String createSubmitDirectory( String label, String dir, String user, String vogroup, boolean timestampBased ) throws IOException { File base = new File( dir ); StringBuffer result = new StringBuffer(); //do a sanity check on the base sanityCheck( base ); //add the user name if possible base = new File( base, user ); result.append( user ).append( File.separator ); //add the vogroup base = new File( base, vogroup ); sanityCheck( base ); result.append( vogroup ).append( File.separator ); //add the label of the DAX base = new File( base, label ); sanityCheck( base ); result.append( label ).append( File.separator ); //create the directory name StringBuffer leaf = new StringBuffer(); if( timestampBased ){ leaf.append( mPOptions.getDateTime( mProps.useExtendedTimeStamp() ) ); } else{ //get all the files in this directory String[] files = base.list( new RunDirectoryFilenameFilter() ); //find the maximum run directory int num, max = 1; for( int i = 0; i < files.length ; i++ ){ num = Integer.parseInt( files[i].substring( SUBMIT_DIRECTORY_PREFIX.length() ) ); if ( num + 1 > max ){ max = num + 1; } } //create the directory name leaf.append( SUBMIT_DIRECTORY_PREFIX ).append( mNumFormatter.format( max ) ); } result.append( leaf.toString() ); base = new File( base, leaf.toString() ); mLogger.log( "Directory to be created is " + base.getAbsolutePath(), LogManager.DEBUG_MESSAGE_LEVEL ); sanityCheck( base ); return result.toString(); } /** * Constructs Any extra arguments that need to be passed to dagman, as determined * from the properties file. * * @param properties the <code>PegasusProperties</code> * * @return any arguments to be added, else empty string */ public static String constructDAGManKnobs( PegasusProperties properties ){ StringBuffer sb = new StringBuffer(); //get all the values for the dagman knows int value; for( int i = 0; i < PDAX2MDAG.DAGMAN_KNOBS.length; i++ ){ value = parseInt( properties.getProperty( PDAX2MDAG.DAGMAN_KNOBS[i][0] ) ); if ( value > 0 ){ //add the option sb.append( PDAX2MDAG.DAGMAN_KNOBS[i][1] ); sb.append( value ); } } return sb.toString(); } /** * Parses a string into an integer. Non valid values returned as -1 * * @param s the String to be parsed as integer * * @return the int value if valid, else -1 */ protected static int parseInt( String s ){ int value = -1; try{ value = Integer.parseInt( s ); } catch( Exception e ){ //ignore } return value; } /** * A small utility method that constructs the name of the Condor files * that are generated when a dag is submitted. The default separator _ is * used. * * @param name the name attribute in the partition element of the pdax. * @param index the partition number of the partition. * @param suffix the suffix that needs to be added to the filename. * * @return the name of the condor file. */ private String getCondorFileName(String name, int index, String suffix){ return getCondorFileName(name,index,suffix,"_"); } /** * A small utility method that constructs the name of the Condor files * that are generated when a dag is submitted. * * @param name the name attribute in the partition element of the pdax. * @param index the partition number of the partition. * @param suffix the suffix that needs to be added to the filename * @param separator the separator that is to be used while constructing * the filename. * * @return the name of the condor file */ private String getCondorFileName(String name, int index, String suffix, String separator){ StringBuffer sb = new StringBuffer(); //all the files reside in the submit file //directory specified by the user. //sb.append(mPOptions.submitFileDir).append(File.separator); sb.append(name).append(separator).append(index).append(suffix); return sb.toString(); } /** * An inner class, that implements the StreamGobblerCallback to count * the occurences of a word in a document. * */ private class GrepCallback implements StreamGobblerCallback{ /** * The word that is to be searched for. */ private String mWord; /** * The length of the word to be searched for. */ private int mWordLength; /** * The number of times the word appears. */ private int mCount; /** * Overloaded Constructor. * * @param word the word to be searched for. */ public GrepCallback( String word ){ mWord = word; mWordLength = (word == null) ? 0 : word.length(); mCount = 0; } /** * Callback whenever a line is read from the stream by the StreamGobbler. * Counts the occurences of the word that are in the line, and * increments to the global counter. * * @param line the line that is read. */ public void work( String line ){ //sanity check to prevent infinite iterations if( mWordLength == 0 ) return; int start = 0; int index; while ( ( index = line.indexOf( mWord, start)) != -1){ mCount++; start = index + mWordLength; } } /** * Returns the number of words counted so far. * * @return the number of words */ public int getCount(){ return mCount; } /** * Resets the internal counters. */ public void reset(){ mCount = 0; } } } /** * A filename filter for identifying the run directory * * @author Karan Vahi vahi@isi.edu */ class RunDirectoryFilenameFilter implements FilenameFilter { /** * Store the regular expressions necessary to parse kickstart output files */ private static final String mRegexExpression = "(" + PDAX2MDAG.SUBMIT_DIRECTORY_PREFIX + ")([0-9][0-9][0-9][0-9])"; /** * Stores compiled patterns at first use, quasi-Singleton. */ private static Pattern mPattern = null; /*** * Tests if a specified file should be included in a file list. * * @param dir the directory in which the file was found. * @param name - the name of the file. * * @return true if and only if the name should be included in the file list * false otherwise. * * */ public boolean accept( File dir, String name) { //compile the pattern only once. if( mPattern == null ){ mPattern = Pattern.compile( mRegexExpression ); } return mPattern.matcher( name ).matches(); } }