/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.cluster;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.common.logging.LogManagerFactory;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.AggregatedJob;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.PCRelation;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.cluster.aggregator.JobAggregatorInstanceFactory;
import edu.isi.pegasus.planner.common.PegasusProperties;
import edu.isi.pegasus.planner.namespace.Pegasus;
import edu.isi.pegasus.planner.partitioner.Partition;
import edu.isi.pegasus.planner.partitioner.graph.GraphNode;
import edu.isi.pegasus.planner.provenance.pasoa.PPS;
import edu.isi.pegasus.planner.provenance.pasoa.XMLProducer;
import edu.isi.pegasus.planner.provenance.pasoa.pps.PPSFactory;
import edu.isi.pegasus.planner.provenance.pasoa.producer.XMLProducerFactory;
import java.util.*;
/**
* The horizontal clusterer, that clusters jobs on the same level.
*
* @author Karan Vahi
* @version $Revision$
*/
public class Horizontal implements Clusterer,
edu.isi.pegasus.planner.refiner.Refiner{//reqd for PASOA integration
/**
* The default collapse factor for collapsing jobs with same logical name
* scheduled onto the same execution pool.
*/
public static final int DEFAULT_COLLAPSE_FACTOR = 1;
/**
* A short description about the partitioner.
*/
public static final String DESCRIPTION = "Horizontal Clustering";
/**
* A singleton access to the job comparator.
*/
private static Comparator mJobComparator = null;
/**
* The handle to the logger object.
*/
protected LogManager mLogger;
/**
* The handle to the properties object holding all the properties.
*/
protected PegasusProperties mProps;
/**
* The handle to the job aggregator factory.
*/
protected JobAggregatorInstanceFactory mJobAggregatorFactory;
/**
* ADag object containing the jobs that have been scheduled by the site
* selector.
*/
private ADag mScheduledDAG;
/**
* Map to hold the jobs sorted by the label of jobs in dax.
* The key is the logical job name and value is the list of jobs with that
* logical name.
*
* This no longer used, and would be removed later.
*/
private Map mJobMap;
/**
* A Map to store all the job(Job) objects indexed by their logical ID found in
* the dax. This should actually be in the ADag structure.
*/
private Map mSubInfoMap;
/**
* Map to hold the collapse values for the various execution pools. The
* values are gotten from the properties file or can be gotten from the
* resource information catalog a.k.a MDS.
*/
private Map mCollapseMap;
/**
* Replacement table, that identifies the corresponding fat job for a job.
*/
private Map mReplacementTable;
/**
* The XML Producer object that records the actions.
*/
private XMLProducer mXMLStore;
/**
* The handle to the provenance store implementation.
*/
private PPS mPPS;
/**
* Singleton access to the job comparator.
*
* @return the job comparator.
*/
private Comparator jobComparator(){
return (mJobComparator == null)?
new JobComparator():
mJobComparator;
}
/**
* The default constructor.
*/
public Horizontal(){
mLogger = LogManagerFactory.loadSingletonInstance();
mJobAggregatorFactory = new JobAggregatorInstanceFactory();
}
/**
* Returns a reference to the workflow that is being refined by the refiner.
*
*
* @return ADAG object.
*/
public ADag getWorkflow(){
return this.mScheduledDAG;
}
/**
* Returns a reference to the XMLProducer, that generates the XML fragment
* capturing the actions of the refiner. This is used for provenace
* purposes.
*
* @return XMLProducer
*/
public XMLProducer getXMLProducer(){
return this.mXMLStore;
}
/**
*Initializes the Clusterer impelementation
*
* @param dag the workflow that is being clustered.
* @param bag the bag of objects that is useful for initialization.
*
* @throws ClustererException in case of error.
*/
public void initialize( ADag dag , PegasusBag bag ) throws ClustererException{
mScheduledDAG = dag;
mProps = bag.getPegasusProperties();
mJobAggregatorFactory.initialize( dag, bag );
mJobMap = new HashMap();
mCollapseMap = this.constructMap(mProps.getCollapseFactors());
mReplacementTable = new HashMap();
mSubInfoMap = new HashMap();
for(Iterator<GraphNode> it = mScheduledDAG.jobIterator();it.hasNext();){
//pass the jobs to the callback
GraphNode node = it.next();
Job job = (Job)node.getContent();
mSubInfoMap.put(job.getLogicalID(), job );
}
//load the PPS implementation
mXMLStore = XMLProducerFactory.loadXMLProducer( mProps );
mPPS = PPSFactory.loadPPS( this.mProps );
mXMLStore.add( "<workflow url=\"" + null + "\">" );
//call the begin workflow method
try{
mPPS.beginWorkflowRefinementStep( this, PPS.REFINEMENT_CLUSTER, false );
}
catch( Exception e ){
throw new ClustererException( "PASOA Exception", e );
}
//clear the XML store
mXMLStore.clear();
}
/**
* Determine the clusters for a partition. The partition is assumed to
* contain independant jobs, and multiple clusters maybe created for the
* partition. Internally the jobs are grouped according to transformation name
* and then according to the execution site. Each group
* (having same transformation name and scheduled on same site), is then
* clustered.
* The number of clustered jobs created for each group is dependant on the
* following Pegasus profiles that can be associated with the jobs.
* <pre>
* 1) bundle (dictates the number of clustered jobs that are created)
* 2) collapse (the number of jobs that make a single clustered job)
* </pre>
*
* In case of both parameters being associated with the jobs in a group, the
* bundle parameter overrides collapse parameter.
*
* @param partition the partition for which the clusters need to be
* determined.
*
* @throws ClustererException in case of error.
*
* @see Pegasus#BUNDLE_KEY
* @see Pegasus#COLLAPSE_KEY
*/
public void determineClusters( Partition partition ) throws ClustererException {
Set s = partition.getNodeIDs();
List l = new ArrayList(s.size());
mLogger.log("Clustering jobs in partition " + partition.getID() +
" " + s,
LogManager.DEBUG_MESSAGE_LEVEL);
for(Iterator it = s.iterator();it.hasNext();){
Job job = (Job)mSubInfoMap.get(it.next());
l.add(job);
}
//group the jobs by their transformation names
Collections.sort( l, jobComparator() );
//traverse through the list and collapse jobs
//referring to same logical transformation
Job previous = null;
List clusterList = new LinkedList();
Job job = null;
for(Iterator it = l.iterator();it.hasNext();){
job = (Job)it.next();
if(previous == null ||
job.getCompleteTCName().equals(previous.getCompleteTCName())){
clusterList.add(job);
}
else{
//at boundary collapse jobs
collapseJobs(previous.getStagedExecutableBaseName(),clusterList,partition.getID());
clusterList = new LinkedList();
clusterList.add(job);
}
previous = job;
}
//cluster the last clusterList
if(previous != null){
collapseJobs(previous.getStagedExecutableBaseName(), clusterList, partition.getID());
}
}
/**
* Am empty implementation of the callout, as state is maintained
* internally to determine the relations between the jobs.
*
* @param partitionID the id of a partition.
* @param parents the list of <code>String</code> objects that contain
* the id's of the parents of the partition.
*
* @throws ClustererException in case of error.
*/
public void parents( String partitionID, List parents ) throws ClustererException{
}
/**
* Collapses the jobs having the same logical name according to the sites
* where they are scheduled.
*
* @param name the logical name of the jobs in the list passed to
* this function.
* @param jobs the list <code>Job</code> objects corresponding
* to the jobs that have the same logical name.
* @param partitionID the ID of the partition to which the jobs belong.
*/
private void collapseJobs( String name, List jobs, String partitionID ){
String key = null;
Job job = null;
List l = null;
//internal map that keeps the jobs according to the execution pool
Map tempMap = new java.util.HashMap();
int[] cFactor = new int[] {0, 0, 0, 0}; //the collapse factor for collapsing the jobs
AggregatedJob fatJob = null;
mLogger.log("Clustering jobs of type " + name,
LogManager.DEBUG_MESSAGE_LEVEL);
//traverse through all the jobs and order them by the
//pool on which they are scheduled
for(Iterator it = jobs.iterator();it.hasNext();){
job = (Job)it.next();
key = job.executionPool;
//check if the job logical name is already in the map
if(tempMap.containsKey(key)){
//add the job to the corresponding list.
l = (List)tempMap.get(key);
l.add(job);
}
else{
//first instance of this logical name
l = new java.util.LinkedList();
l.add(job);
tempMap.put(key,l);
}
}
//iterate through the built up temp map to get jobs per execution pool
String factor = null;
int size = -1;
//the id for the fatjobs. we want ids
//unique across the execution pools for a
//particular type of job being merged.
int id = 1;
for( Iterator it = tempMap.entrySet().iterator();it.hasNext(); ){
Map.Entry entry = (Map.Entry)it.next();
l = (List)entry.getValue();
size= l.size();
//the pool name on which the job is to run is the key
key = (String)entry.getKey();
if( size <= 1 ){
//no need to cluster one job. go to the next iteration
mLogger.log("\t No clustering of jobs mapped to execution site " + key,
LogManager.DEBUG_MESSAGE_LEVEL);
continue;
}
JobAggregator aggregator = mJobAggregatorFactory.loadInstance( (Job)l.get(0) );
if(aggregator.entryNotInTC(key)){
//no need to cluster one job. go to the next iteration
mLogger.log("\t No clustering for jobs mapped to execution site " + key + " as nojob aggregator entry in tc ",
LogManager.WARNING_MESSAGE_LEVEL);
continue;
}
//checks made ensure that l is not empty at this point
cFactor = getCollapseFactor( key, (Job) l.get(0), size );
if( cFactor[0] == 1 && cFactor[1] == 0 ){
mLogger.log("\t Collapse factor of (" + cFactor[0] + "," + cFactor[1] +
") determined for pool. " + key +
". Skipping clustering", LogManager.DEBUG_MESSAGE_LEVEL);
continue;
}
// Does the user prefer runtime based clustering?
if (mProps.getHorizontalClusterPreference() != null
&& mProps.getHorizontalClusterPreference().equalsIgnoreCase("runtime")) {
List<List<Job>> bins = null;
String sMaxRunTime = (String) ((Job) l.get( 0 )).vdsNS.get(Pegasus.MAX_RUN_TIME);
// Does the user prefer to cluster jobs into bins of a fixed capacity?
// If not, cluster jobs evenly into a fixed number of bins.
// The number of bins should be specified through clusters.num property
if (sMaxRunTime != null) {
double maxRunTime = -1;
try {
maxRunTime = Double.parseDouble(sMaxRunTime);
} catch (RuntimeException e) {
throw new RuntimeException( "Profile key "
+ Pegasus.MAX_RUN_TIME
+ " is either not set, or is not a valid number.",
e );
}
mLogger.log( "\t Clustering jobs mapped to execution site " + key
+ " having maximum run time " + cFactor[2],
LogManager.DEBUG_MESSAGE_LEVEL );
Collections.sort(l, getBinPackingComparator());
mLogger.log(
"Job Type: " + ((Job) l.get( 0 )).getCompleteTCName()
+ " max runtime " + maxRunTime,
LogManager.DEBUG_MESSAGE_LEVEL );
mLogger.log( "Clustering into fixed capacity bins " + maxRunTime,
LogManager.DEBUG_MESSAGE_LEVEL );
bins = bestFitBinPack( l, maxRunTime );
} else {
int clusterNum = 1;
String bundle = (String) job.vdsNS.get( Pegasus.BUNDLE_KEY );
if (bundle != null) {
clusterNum = Integer.parseInt(bundle);
} else {
mLogger.log( "Neither " + Pegasus.MAX_RUN_TIME + ", nor " + Pegasus.BUNDLE_KEY +
" specified. Merging all tasks into one job",
LogManager.WARNING_MESSAGE_LEVEL );
}
mLogger.log( "Clustering into fixed number of bins " + clusterNum,
LogManager.DEBUG_MESSAGE_LEVEL );
Collections.sort(l, getBinPackingComparator());
bins = bestFitBinPack( l, clusterNum );
}
mLogger.log( "Jobs are merged into " + bins.size()
+ " clustered jobs.", LogManager.DEBUG_MESSAGE_LEVEL );
for (List<Job> bin : bins) {
fatJob = aggregator.constructAbstractAggregatedJob( bin,
name, constructID( partitionID, id ) );
updateReplacementTable( bin, fatJob );
// increment the id
id++;
// add the fat job to the dag
// use the method to add, else add explicitly to DagInfo
mScheduledDAG.add( fatJob );
// log the refiner action capturing the creation of the job
this.logRefinerAction( fatJob, aggregator );
}
tempMap = null;
return;
}
//we do collapsing in chunks of 3 instead of picking up
//from the properties file. ceiling is (x + y -1)/y
//cFactor = (size + 2)/3;
else {
mLogger.log( "\t Clustering jobs mapped to execution site " + key
+ " with collapse factor " + cFactor[0] + ","
+ cFactor[1], LogManager.DEBUG_MESSAGE_LEVEL );
if (cFactor[0] >= size) {
// means collapse all the jobs in the list as a fat node
// Note: Passing a link to iterator might be more efficient,
// as
// this would only require a single traversal through the
// list
fatJob = aggregator.constructAbstractAggregatedJob(
l.subList( 0, size ), name,
constructID( partitionID, id ) );
updateReplacementTable( l.subList( 0, size ), fatJob );
// increment the id
id++;
// add the fat job to the dag
// use the method to add, else add explicitly to DagInfo
mScheduledDAG.add( fatJob );
// log the refiner action capturing the creation of the job
this.logRefinerAction( fatJob, aggregator );
} else {
// do collapsing in chunks of cFactor
int increment = 0;
for (int i = 0; i < size; i = i + increment) {
// compute the increment and decrement cFactor[1]
increment = (cFactor[1] > 0) ? cFactor[0] + 1
: cFactor[0];
cFactor[1]--;
if (increment == 1) {
// we can exit out of the loop as we do not want
// any merging for single jobs
break;
} else if ((i + increment) < size) {
fatJob = aggregator.constructAbstractAggregatedJob(
l.subList( i, i + increment ), name,
constructID( partitionID, id ) );
updateReplacementTable(
l.subList( i, i + increment ), fatJob );
} else {
fatJob = aggregator.constructAbstractAggregatedJob(
l.subList( i, size ), name,
constructID( partitionID, id ) );
updateReplacementTable( l.subList( i, size ),
fatJob );
}
// increment the id
id++;
// add the fat job to the dag
// use the method to add, else add explicitly to DagInfo
mScheduledDAG.add( fatJob );
// log the refiner action capturing the creation of the
// job
this.logRefinerAction( fatJob, aggregator );
}
}
}
}
//explicity free the map
tempMap = null;
}
/**
* Perform best fit bin packing.
*
* @param jobs
* List of jobs sorted in decreasing order of the job runtime.
* @param maxTime
* The maximum time for which the clustered job should run.
* @return List of List of Jobs where each List <Job> is the set of jobs
* which should be clustered together so as to run in under maxTime.
*/
private List<List<Job>> bestFitBinPack(List<Job> jobs, double maxTime) {
List<List<Job>> bins = new LinkedList<List<Job>>();
List<List<Job>> returnBins = new LinkedList<List<Job>>();
List<Double> binTime = new LinkedList<Double>();
double minJobRunTime = Double.MAX_VALUE;
if (jobs != null && jobs.size() > 0) {
minJobRunTime = Double.parseDouble( getRunTime( jobs.get( jobs
.size() - 1 ) ) );
}
for (Job j : jobs) {
List<Job> bin;
double currentBinTime;
boolean isBreak = false;
double jobRunTime = Double.parseDouble( getRunTime( j ) );
mLogger.log( "Job " + j.getID() + " runtime " + jobRunTime,
LogManager.DEBUG_MESSAGE_LEVEL );
// Create first bin.
if (bins.size() == 0) {
bins.add( new LinkedList<Job>() );
binTime.add( 0, 0d );
}
// Loop through each job.
for (int i = 0, k = bins.size(); i < k; ++i) {
currentBinTime = binTime.get( i );
// Is the job runtime greater than the max allowed runtime? Then
// do not cluster this job.
if (maxTime < jobRunTime) {
mLogger.log( "Job " + j.getID() + " runtime " + jobRunTime
+ " is greater than clusters max run time "
+ maxTime + " specified by the Pegasus profile "
+ Pegasus.MAX_RUN_TIME,
LogManager.DEBUG_MESSAGE_LEVEL );
break;
}
// Can we fit the job in an existing bin?
if (maxTime >= currentBinTime + jobRunTime) {
bin = bins.get( i );
bin.add( j );
binTime.set( i, currentBinTime + jobRunTime );
isBreak = true;
} else if (i == k - 1) {
// We cannot fit the job in any of the open bins, so create
// a new one.
bin = new LinkedList<Job>();
bin.add( j );
bins.add( bin );
binTime.add( binTime.size(), jobRunTime );
}
// Either this bin is full, or it does not even have space to
// fit the job with the smallest run time. So lets avoid trying
// to fit jobs in this bin.
if (binTime.get( i ) + minJobRunTime > maxTime) {
returnBins.add( bins.remove( i ) );
binTime.remove( i );
}
// Job has been assigned a bin, no need to check other bins for
// space.
if (isBreak)
break;
}
}
returnBins.addAll( bins );
return returnBins;
}
/**
* Perform best fit bin packing.
*
* @param jobs List of jobs sorted in decreasing order of the job runtime.
* @param maxBins The fixed-number of bins taht should be created
* @return List of List of Jobs where each List <Job> is the set of jobs
* which should be clustered together so as to run in under maxTime.
*/
private List<List<Job>> bestFitBinPack(List<Job> jobs, int maxBins) {
class Bin {
private List<Job> bin = new LinkedList<Job>();
private double time = 0;
public void addJob(Job j) {
bin.add(j);
double jobRunTime = Double.parseDouble(getRunTime(j));
time += jobRunTime;
}
public List<Job> getJobs() {
return bin;
}
public double getTime() {
return time;
}
}
PriorityQueue<Bin> bins = new PriorityQueue<Bin>(maxBins, new Comparator<Bin>() {
@Override
public int compare(Bin bin1, Bin bin2) {
return (int) (bin1.getTime() - bin2.getTime());
}
});
// Initialize the bins, to the specified number of bins.
// If the number of jobs n is less than @maxBins then create n bins
maxBins = Math.min(maxBins, jobs.size());
for (int i = 0; i < maxBins; ++i) {
bins.add(new Bin());
}
for (Job j : jobs) {
Bin bin;
mLogger.log("Job " + j.getID() + " runtime " + getRunTime(j),
LogManager.DEBUG_MESSAGE_LEVEL);
// Add the job to the bin with the shortest combined runtime
bin = bins.poll();
bin.addJob(j);
bins.offer(bin);
}
List<List<Job>> returnBins = new LinkedList<List<Job>>();
for (Bin b : bins) {
mLogger.log("Bin Size: " + b.getTime(), LogManager.DEBUG_MESSAGE_LEVEL);
returnBins.add(b.getJobs());
}
return returnBins;
}
private String getRunTime(Job job) {
String sTmp = (String) job.vdsNS.get( Pegasus.RUNTIME_KEY );
if (sTmp != null && sTmp.length() > 0) {
return sTmp;
}
sTmp = (String) job.vdsNS.get(Pegasus.DEPRECATED_RUNTIME_KEY );
if (sTmp != null && sTmp.length() > 0) {
mLogger.log( "The profile " + Pegasus.DEPRECATED_RUNTIME_KEY
+ " will be deprecated. It will be replaced with "
+ Pegasus.RUNTIME_KEY, LogManager.WARNING_MESSAGE_LEVEL );
return sTmp;
}
throw new RuntimeException( "Profile Key: " + Pegasus.RUNTIME_KEY
+ " is not set for the job " + job.getID() );
}
/**
* The comparator is used to sort a collection of jobs in decreasing order
of their run times as specified by the Pegasus.DEPRECATED_RUNTIME_KEY property.
*
* @return
*/
private Comparator<Job> getBinPackingComparator() {
return new Comparator<Job>() {
@Override
public int compare(Job job1, Job job2) {
String s1 = getRunTime( job1 );
String s2 = getRunTime( job2 );
double jobTime1 = Double.parseDouble( s1 );
double jobTime2 = Double.parseDouble( s2 );
return (int) (jobTime2 - jobTime1);
}
private String getRunTime (Job job) {
String sTmp = (String) job.vdsNS.get( Pegasus.RUNTIME_KEY );
if (sTmp != null && sTmp.length() > 0) {
return sTmp;
}
sTmp = (String) job.vdsNS.get(Pegasus.DEPRECATED_RUNTIME_KEY );
if (sTmp != null && sTmp.length() > 0) {
return sTmp;
}
throw new RuntimeException( "Profile Key: "
+ Pegasus.RUNTIME_KEY + " is not set for the job "
+ job.getID() );
}
};
}
/**
* Returns the clustered workflow.
*
* @return the <code>ADag</code> object corresponding to the clustered workflow.
*
* @throws ClustererException in case of error.
*/
public ADag getClusteredDAG() throws ClustererException{
//do all the replacement of jobs in the main data structure
//that needs to be returned
replaceJobs();
//should be in the done method. which is currently not htere in the
//Clusterer API
try{
mPPS.endWorkflowRefinementStep( this );
}
catch( Exception e ){
throw new ClustererException( "PASOA Exception while logging end of clustering refinement", e );
}
return mScheduledDAG;
}
/**
* Returns a textual description of the transfer implementation.
*
* @return a short textual description
*/
public String description(){
return this.DESCRIPTION;
}
/**
* Records the refiner action into the Provenace Store as a XML fragment.
*
* @param clusteredJob the clustered job
* @param aggregator the aggregator that was used to create this clustered job
*/
protected void logRefinerAction( AggregatedJob clusteredJob, JobAggregator aggregator ){
StringBuffer sb = new StringBuffer();
String indent = "\t";
sb.append( indent );
sb.append( "<clustered ");
appendAttribute( sb, "job", clusteredJob.getName() );
appendAttribute( sb, "type", aggregator.getClusterExecutableLFN() );
sb.append( ">" ).append( "\n" );
//traverse through all the files
String newIndent = indent + "\t";
List jobs = new ArrayList();
for( Iterator it = clusteredJob.constituentJobsIterator(); it.hasNext(); ){
Job job = ( Job )it.next();
jobs.add( job.getName() );
sb.append( newIndent );
sb.append( "<constitutent " );
appendAttribute( sb, "job", job.getName() );
sb.append( "/>" );
sb.append( "\n" );
}
sb.append( indent );
sb.append( "</clustered>" );
sb.append( "\n" );
//log the action for creating the relationship assertions
try{
mPPS.clusteringOf( clusteredJob.getName(), jobs );
}
catch( Exception e ){
throw new RuntimeException( "PASOA Exception while logging relationship assertion for clustering ",
e );
}
mXMLStore.add( sb.toString() );
}
/**
* Appends an xml attribute to the xml feed.
*
* @param xmlFeed the xmlFeed to which xml is being written
* @param key the attribute key
* @param value the attribute value
*/
protected void appendAttribute( StringBuffer xmlFeed, String key, String value ){
xmlFeed.append( key ).append( "=" ).append( "\"" ).append( value ).
append( "\" " );
}
/**
* A callback that triggers the collapsing of a partition/level of a graph.
*
* @param partition the partition that needs to be collapsed.
*
*/
/*
private void collapseJobs(Partition partition){
Set s = partition.getNodeIDs();
List l = new ArrayList(s.size());
mLogger.log("Clustering jobs in partition " + partition.getID() +
" " + s,
LogManager.DEBUG_MESSAGE_LEVEL);
for(Iterator it = s.iterator();it.hasNext();){
Job job = (Job)mSubInfoMap.get(it.next());
l.add(job);
}
//group the jobs by their transformation names
Collections.sort(l,jobComparator());
//traverse through the list and collapse jobs
//referring to same logical transformation
Job previous = null;
List clusterList = new LinkedList();
Job job = null;
for(Iterator it = l.iterator();it.hasNext();){
job = (Job)it.next();
if(previous == null ||
job.getCompleteTCName().equals(previous.getCompleteTCName())){
clusterList.add(job);
}
else{
//at boundary collapse jobs
collapseJobs(previous.getStagedExecutableBaseName(),clusterList,partition.getID());
clusterList = new LinkedList();
clusterList.add(job);
}
previous = job;
}
//cluster the last clusterList
if(previous != null){
collapseJobs(previous.getStagedExecutableBaseName(), clusterList, partition.getID());
}
//collapse the jobs in list l
// collapseJobs(job.logicalName,l,partition.getID());
}
*/
/**
* Returns the collapse factor, that is used to chunk up the jobs of a
* particular type on a pool. The collapse factor is determined by
* getting the collapse key in the Pegasus namespace/profile associated with the
* job in the transformation catalog. Right now tc overrides the property
* from the one in the properties file that specifies per pool.
* There are two orthogonal notions of bundling and collapsing. In case the
* bundle key is specified, it ends up overriding the collapse key, and
* the bundle value is used to generate the collapse values.
*
* @param pool the pool where the chunking up is occuring
* @param job the <code>Job</code> object containing the job that
* is to be chunked up together.
* @param size the number of jobs that refer to the same logical
* transformation and are scheduled on the same execution pool.
*
* @return int array of size 4 where int[0] is the the collapse factor
* int[1] is the number of jobs for whom collapsing is int[0] + 1.
* int [2] is maximum time for which the clustered job should run.
* int [3] is time for which the single job would run.
*/
public int[] getCollapseFactor(String pool, Job job, int size) {
String factor = null;
int result[] = new int[] { 0, 0, 0, 0 };
// the job should have the collapse key from the TC if
// by the user specified
factor = (String) job.vdsNS.get( Pegasus.COLLAPSE_KEY );
// ceiling is (x + y -1)/y
String bundle = (String) job.vdsNS.get( Pegasus.BUNDLE_KEY );
if (bundle != null) {
int b = Integer.parseInt( bundle );
result[0] = size / b;
result[1] = size % b;
return result;
// doing no boundary condition checks
// return (size + b -1)/b;
}
String runTime = (String) job.vdsNS.get(Pegasus.DEPRECATED_RUNTIME_KEY );
String clusterTime = (String) job.vdsNS.get( Pegasus.MAX_RUN_TIME );
// return the appropriate value
result[0] = (factor == null) ? ((factor = (String) mCollapseMap
.get( pool )) == null) ? this.DEFAULT_COLLAPSE_FACTOR : // the
// default
// value
Integer.parseInt( factor )// use the value in the prop file
:
// return the value found in the TC
Integer.parseInt( factor );
result[2] = clusterTime == null || clusterTime.length() == 0 ? 0
: Integer.parseInt( clusterTime );
result[3] = runTime == null || runTime.length() == 0 ? 0 : Integer
.parseInt( runTime );
return result;
}
/**
* Given an integer id, returns a string id that is used for the clustered
* job.
*
* @param partitionID the id of the partition.
* @param id the integer id from which the string id has to be
* constructed. The id should be unique for all the
* clustered jobs that are formed for a particular
* partition.
*
* @return the id of the clustered job
*/
public String constructID(String partitionID, int id){
StringBuffer sb = new StringBuffer(8);
sb.append("P").append(partitionID).append("_");
sb.append("ID").append(id);
return sb.toString();
}
/**
* Updates the replacement table.
*
* @param jobs the List of jobs that is being replaced.
* @param mergedJob the mergedJob that is replacing the jobs in the list.
*/
private void updateReplacementTable(List jobs, Job mergedJob){
if(jobs == null || jobs.isEmpty())
return;
String mergedJobName = mergedJob.jobName;
for(Iterator it = jobs.iterator();it.hasNext();){
Job job = (Job)it.next();
//put the entry in the replacement table
mReplacementTable.put(job.jobName,mergedJobName);
}
}
/**
* Puts the jobs in the abstract workflow into the job that is index
* by the logical name of the jobs.
*/
private void assimilateJobs(){
List l = null;
String key = null;
for( Iterator<GraphNode> it = mScheduledDAG.jobIterator();it.hasNext(); ){
GraphNode node = it.next();
Job job = ( Job)node.getContent();
key = job.logicalName;
//check if the job logical name is already in the map
if(mJobMap.containsKey(key)){
//add the job to the corresponding list.
l = (List)mJobMap.get(key);
l.add(job);
}
else{
//first instance of this logical name
l = new java.util.LinkedList();
l.add(job);
mJobMap.put(key,l);
}
}
}
/**
* Constructs a map with the numbers/values for the collapsing factors to
* collapse the nodes of same type. The user ends up specifying these through
* the properties file. The value of the property is of the form
* poolname1=value,poolname2=value....
*
* @param propValue the value of the property got from the properties file.
*
* @return the constructed map.
*/
private Map constructMap(String propValue) {
Map map = new java.util.TreeMap();
if (propValue != null) {
StringTokenizer st = new StringTokenizer(propValue, ",");
while (st.hasMoreTokens()) {
String raw = st.nextToken();
int pos = raw.indexOf('=');
if (pos > 0) {
map.put(raw.substring(0, pos).trim(),
raw.substring(pos + 1).trim());
}
}
}
return map;
}
/**
* The relations/edges are changed in local graph structure.
*/
private void replaceJobs(){
boolean val = false;
List l = null;
List nl = null;
Job sub = new Job();
String msg;
//Set mergedEdges = new java.util.HashSet();
//this is temp thing till the hast thing sorted out correctly
List<PCRelation> mergedEdges = new java.util.ArrayList(mScheduledDAG.size());
//traverse the edges and do appropriate replacements
for( Iterator<GraphNode> it = mScheduledDAG.jobIterator(); it.hasNext(); ){
GraphNode node = it.next();
Job childJob = (Job)node.getContent();
for( GraphNode parentNode: node.getParents() ){
Job parentJob = (Job)parentNode.getContent();
PCRelation rel = new PCRelation( parentJob.getID(), childJob.getID());
String parent = rel.getParent();
String child = rel.getChild();
msg = ("\n Replacing " + rel);
String value = (String)mReplacementTable.get(parent);
if(value != null){
rel.parent = value;
}
value = (String)mReplacementTable.get(child);
if(value != null){
rel.child = value;
}
msg += (" with " + rel);
//put in the merged edges set
if(!mergedEdges.contains(rel)){
val = mergedEdges.add(rel);
msg += "Add to set : " + val;
}
else{
msg += "\t Duplicate Entry for " + rel;
}
mLogger.log( msg, LogManager.DEBUG_MESSAGE_LEVEL );
}
}
//the final edges need to be updated
mScheduledDAG.resetEdges();
for( PCRelation pc: mergedEdges){
mScheduledDAG.addEdge( pc.getParent(), pc.getChild());
}
//PM-747 once new edges are added, then remove
//the original nodes that are now clustered
for( Iterator it = mReplacementTable.entrySet().iterator(); it.hasNext(); ){
Map.Entry entry = (Map.Entry)it.next();
String key = (String)entry.getKey();
mLogger.log("Replacing job " + key +" with " + entry.getValue(),
LogManager.DEBUG_MESSAGE_LEVEL);
//remove the old job
//remove by just creating a subinfo object with the same key
sub.jobName = key;
sub.setJobType( Job.COMPUTE_JOB );
val = mScheduledDAG.remove(sub);
if(val == false){
throw new RuntimeException("Removal of job " + key + " while clustering not successful");
}
}
mLogger.log("All clustered jobs removed from the workflow",
LogManager.DEBUG_MESSAGE_LEVEL);
}
/**
* A utility method to print short description of jobs in a list.
*
* @param l the list of <code>Job</code> objects
*/
private void printList(List l){
for(Iterator it = l.iterator();it.hasNext();){
Job job = (Job)it.next();
System.out.print( " "+ /*job.getCompleteTCName() +*/
"[" + job.logicalId + "]");
}
}
/**
* A job comparator, that allows me to compare jobs according to the
* transformation names. It is applied to group jobs in a particular partition,
* according to the underlying transformation that is referred.
* <p>
* This comparator is not consistent with the Job.equals(Object) method.
* Hence, should not be used in sorted sets or Maps.
*/
private static class JobComparator implements Comparator{
/**
* Compares this object with the specified object for order. Returns a
* negative integer, zero, or a positive integer if the first argument is
* less than, equal to, or greater than the specified object. The
* Job are compared by their transformation name.
*
* This implementation is not consistent with the
* Job.equals(Object) method. Hence, should not be used in sorted
* Sets or Maps.
*
* @param o1 is the first object to be compared.
* @param o2 is the second object to be compared.
*
* @return a negative number, zero, or a positive number, if the
* object compared against is less than, equals or greater than
* this object.
* @exception ClassCastException if the specified object's type
* prevents it from being compared to this Object.
*/
public int compare(Object o1, Object o2) {
if (o1 instanceof Job && o2 instanceof Job) {
return ( (Job) o1).getCompleteTCName().compareTo( ( (
Job) o2).getCompleteTCName());
}
else {
throw new ClassCastException("Objects being compared are not SubInfo");
}
}
}
}