/**
* Copyright 2007-2013 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.refiner;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.catalog.site.classes.SiteStore;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.classes.PegasusFile;
import edu.isi.pegasus.planner.classes.PlannerOptions;
import edu.isi.pegasus.planner.common.PegasusProperties;
import edu.isi.pegasus.planner.parser.DAXParserFactory;
import edu.isi.pegasus.planner.parser.Parser;
import edu.isi.pegasus.planner.parser.dax.Callback;
import edu.isi.pegasus.planner.parser.dax.DAXParser;
import edu.isi.pegasus.planner.partitioner.graph.GraphNode;
import edu.isi.pegasus.planner.test.DefaultTestSetup;
import edu.isi.pegasus.planner.test.TestSetup;
import java.io.File;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.Before;
import org.junit.BeforeClass;
/**
* A JUnit Test to test the DataReuseEngine
*
* @author Karan Vahi
*/
public class DataReuseEngineTest {
/**
* The properties used for this test.
*/
private static final String PROPERTIES_BASENAME="properties";
private PegasusBag mBag;
private PegasusProperties mProps;
private LogManager mLogger;
private TestSetup mTestSetup;
private static int mTestNumber =1 ;
@BeforeClass
public static void setUpClass() {
}
@AfterClass
public static void tearDownClass() {
}
public DataReuseEngineTest(){
}
/**
* Setup the logger and properties that all test functions require
*/
@Before
public final void setUp() {
mTestSetup = new DataReuseEngineTestSetup();
mBag = new PegasusBag();
mTestSetup.setInputDirectory( this.getClass() );
System.out.println( "Input Test Dir is " + mTestSetup.getInputDirectory() );
mProps = mTestSetup.loadPropertiesFromFile( PROPERTIES_BASENAME, this.getPropertyKeysForSanitization() );
mBag.add( PegasusBag.PEGASUS_PROPERTIES, mProps );
mLogger = mTestSetup.loadLogger( mProps );
mLogger.setLevel( LogManager.DEBUG_MESSAGE_LEVEL );
mLogger.logEventStart( "test.refiner.datareuse", "setup", "0" );
mBag.add( PegasusBag.PEGASUS_LOGMANAGER, mLogger );
mBag.add( PegasusBag.PLANNER_OPTIONS, mTestSetup.loadPlannerOptions() );
mLogger.logEventCompletion();
}
/**
* Test for cascading of data reuse.
*/
@Test
public void testCascading() {
mLogger.logEventStart( "test.refiner.datareuse", "set", Integer.toString(mTestNumber++) );
ADag dax = ((DataReuseEngineTestSetup)mTestSetup).loadDAX( mBag, "pipeline.dax" );
MyReplicaCatalogBridge rcb = new MyReplicaCatalogBridge( dax, mBag );
Set<String> filesInRC = new HashSet();
filesInRC.add( "HN001_addrepl.bai" );
filesInRC.add( "HN001_addrepl.bam");
filesInRC.add( "HN001_indel_realigned.bai" );
filesInRC.add( "HN001_indel_realigned.bam" );
filesInRC.add( "HN001_aligned_reads.sam");
filesInRC.add( "HN001_reduced_reads.bai");
filesInRC.add( "HN001_reduced_reads.bam");
filesInRC.add( "raw_indel.vcf");
filesInRC.add( "raw_snp.vcf");
rcb.addFilesInReplica(filesInRC);
DataReuseEngine engine = new DataReuseEngine( dax, mBag );
engine.reduceWorkflow(dax, rcb);
Job[] actualDeletedJobs = (Job[]) engine.getDeletedJobs().toArray( new Job[0] );
String[] expectedDeletedJobs ={ "add_replace_ID0000005","alignment_to_reference_ID0000008", "dedup_ID0000006",
"indel_realign_ID0000003", "realign_target_creator_ID0000004", "reduce_reads_ID0000002",
"sort_sam_ID0000007", "unified_genotyper_indel_ID0000011", "unified_genotyper_snp_ID0000009",};
assertArrayEquals( "Deleted Jobs don't match ", expectedDeletedJobs, toSortedStringArray(actualDeletedJobs) );
mLogger.logEventCompletion();
System.out.println("\n");
}
/**
* Tests the cascading of deletion of jobs upwards, where the job to be deleted
* because of cascading has an intermediate output file that exists in the RC
* Normally, the job will not be deleted in the cascading phase as
* the output file is required by the user, unless it was already identified
* for deletion in the first pass.
*
*/
@Test
public void testCascadingIntermediateOutputInRC() {
mLogger.logEventStart( "test.refiner.datareuse", "set", Integer.toString(mTestNumber++) );
ADag dax = ((DataReuseEngineTestSetup)mTestSetup).loadDAX( mBag, "blackdiamond.dax" );
MyReplicaCatalogBridge rcb = new MyReplicaCatalogBridge( dax, mBag );
//retrieve the right findrange job and make sure
//one of the output files has transfer set to true
//and other has it set to false.
GraphNode n = dax.getNode( "findrange_ID0000003");
Job findrange = (Job) n.getContent();
for ( PegasusFile pf : findrange.getOutputFiles() ){
System.out.println( pf );
if ( pf.getLFN().equals( "f.c2") ){
pf.setTransferFlag( "true");
}
else if ( pf.getLFN().equals( "f.c2'") ){
pf.setTransferFlag( "false");
}
System.out.println( pf );
}
Set<String> filesInRC = new HashSet();
filesInRC.add( "f.d");
filesInRC.add( "f.c2" );//only the output file with transfer set to true is in RC
rcb.addFilesInReplica(filesInRC);
DataReuseEngine engine = new DataReuseEngine( dax, mBag );
engine.reduceWorkflow(dax, rcb);
Job[] actualDeletedJobs = (Job[]) engine.getDeletedJobs().toArray( new Job[0] );
//findrange_ID0000003 is deleted in the cascading phase
//because user only wants f.c2 staged to output site and
//that exists in the RC somewhere
String[] expectedDeletedJobs ={"analyze_ID0000004" ,"findrange_ID0000003", };
assertArrayEquals( "Deleted Jobs don't match ", expectedDeletedJobs, toSortedStringArray(actualDeletedJobs) );
mLogger.logEventCompletion();
System.out.println("\n");
}
/**
* Test for reducing the whole workflow.
* In this test, some of intermediate jobs, have output files marked with
* transfer set to true. Hence, those jobs are only removed, if the intermediate
* files also exist in the Replica Catalog
*/
@Test
public void testFullReduction() {
mLogger.logEventStart( "test.refiner.datareuse.fullreduction", "set", Integer.toString(mTestNumber++) );
ADag dax = ((DataReuseEngineTestSetup)mTestSetup).loadDAX( mBag, "pipeline.dax" );
MyReplicaCatalogBridge rcb = new MyReplicaCatalogBridge( dax, mBag );
Set<String> filesInRC = new HashSet();
filesInRC.add( "HN001_addrepl.bai" );
filesInRC.add( "HN001_addrepl.bam");
filesInRC.add( "HN001_indel_realigned.bai" );
filesInRC.add( "HN001_indel_realigned.bam" );
filesInRC.add( "HN001_aligned_reads.sam");
filesInRC.add( "HN001_reduced_reads.bai");
filesInRC.add( "HN001_reduced_reads.bam");
filesInRC.add( "raw_indel.vcf");
filesInRC.add( "raw_snp.vcf");
filesInRC.add( "filtered_indel.vcf");
filesInRC.add( "filtered_snp.vcf");
rcb.addFilesInReplica(filesInRC);
DataReuseEngine engine = new DataReuseEngine( dax, mBag );
ADag reducedDAG = engine.reduceWorkflow(dax, rcb);
Job[] actualDeletedJobs = (Job[]) engine.getDeletedJobs().toArray( new Job[0] );
String[] expectedDeletedJobs ={ "add_replace_ID0000005", "alignment_to_reference_ID0000008", "dedup_ID0000006",
"filtering_indel_ID0000012", "filtering_snp_ID0000010", "indel_realign_ID0000003",
"realign_target_creator_ID0000004", "reduce_reads_ID0000002", "sort_sam_ID0000007",
"unified_genotyper_indel_ID0000011","unified_genotyper_snp_ID0000009", };
assertArrayEquals( "Deleted Jobs don't match ", expectedDeletedJobs, toSortedStringArray(actualDeletedJobs) );
mLogger.logEventCompletion();
System.out.println("\n");
}
/**
* Test for reducing the whole workflow.
*
* In this test, only the leaf jobs, have output files marked with
* transfer set to true. Hence for full reduction, only the outputs of
* the leaf jobs need to be present in the Replica Catalog. All other
* intermediate files in the workflow have transfer set to false
*/
@Test
public void testFullReductionLeafDAX() {
mLogger.logEventStart( "test.refiner.datareuse.fullreduction-leaf", "set", Integer.toString(mTestNumber++) );
//only the leaf jobs have the transfer set to true for output files
ADag dax = ((DataReuseEngineTestSetup)mTestSetup).loadDAX( mBag, "pipeline-leaf.dax" );
MyReplicaCatalogBridge rcb = new MyReplicaCatalogBridge( dax, mBag );
Set<String> filesInRC = new HashSet();
filesInRC.add( "filtered_indel.vcf");
filesInRC.add( "filtered_snp.vcf");
rcb.addFilesInReplica(filesInRC);
DataReuseEngine engine = new DataReuseEngine( dax, mBag );
ADag reducedDAG = engine.reduceWorkflow(dax, rcb);
Job[] actualDeletedJobs = (Job[]) engine.getDeletedJobs().toArray( new Job[0] );
String[] expectedDeletedJobs ={ "add_replace_ID0000005", "alignment_to_reference_ID0000008", "dedup_ID0000006",
"filtering_indel_ID0000012", "filtering_snp_ID0000010", "indel_realign_ID0000003",
"realign_target_creator_ID0000004", "reduce_reads_ID0000002", "sort_sam_ID0000007",
"unified_genotyper_indel_ID0000011", "unified_genotyper_snp_ID0000009", };
assertArrayEquals( "Deleted Jobs don't match ", expectedDeletedJobs, toSortedStringArray(actualDeletedJobs) );
mLogger.logEventCompletion();
System.out.println("\n");
}
/**
* Test for partial data reuse.
*/
@Test
public void testPartialDataReuse() {
mLogger.logEventStart( "test.refiner.datareuse", "set", Integer.toString(mTestNumber++) );
ADag dax = ((DataReuseEngineTestSetup)mTestSetup).loadDAX( mBag, "blackdiamond.dax" );
MyReplicaCatalogBridge rcb = new MyReplicaCatalogBridge( dax, mBag );
//turn on partial data reuse
mProps.setProperty( "pegasus.data.reuse.scope", "partial");
//all output files are in the replica catalog
//however findrange_ID0000002 output file needs to be checked for
//in the RC for datareuse. PM-774
Set<String> filesInRC = new HashSet();
filesInRC.add( "f.a" );
filesInRC.add( "f.b1");
filesInRC.add( "f.b2" );
filesInRC.add( "f.c1" );
filesInRC.add( "f.c2");
filesInRC.add( "f.d");
rcb.addFilesInReplica(filesInRC);
DataReuseEngine engine = new DataReuseEngine( dax, mBag );
engine.reduceWorkflow(dax, rcb);
Job[] actualDeletedJobs = (Job[]) engine.getDeletedJobs().toArray( new Job[0] );
String[] expectedDeletedJobs ={ "findrange_ID0000002",};
assertArrayEquals( "Deleted Jobs don't match ", expectedDeletedJobs, toSortedStringArray(actualDeletedJobs) );
mLogger.logEventCompletion();
System.out.println("\n");
mProps.removeProperty( "pegasus.data.reuse.scope") ;
}
@After
public void tearDown() {
mLogger = null;
mProps = null;
mBag = null;
mTestSetup = null;
}
/**
* Convenience method
*
* @param array
* @return
*/
protected String[] toSortedStringArray( Job[] array ){
String[] result = new String[array.length];
int i = 0;
for( Job job: array){
result[i++] = job.getID();
}
Arrays.sort( result );
return result;
}
/**
* Returns the list of property keys that should be sanitized
*
* @return List<String>
*/
protected List<String> getPropertyKeysForSanitization(){
List<String> keys =new LinkedList();
return keys;
}
private static class MyReplicaCatalogBridge extends ReplicaCatalogBridge {
private final PegasusBag bag;
private Set<String> mFiles;
public MyReplicaCatalogBridge(ADag dax, PegasusBag bag) {
super( dax, bag );
this.bag = bag;
}
public void addFilesInReplica( Set<String> files ){
this.mFiles = files;
}
public Set getFilesInReplica(){
return this.mFiles;
}
}
}
/**
* A default test setup implementation for the junit tests.
*
* @author Karan Vahi
*/
class DataReuseEngineTestSetup implements TestSetup {
/**
* The input directory for the test.
*/
private String mTestInputDir;
/**
* The Default Testup that this uses
*/
private DefaultTestSetup mDefaultTestSetup;
/**
* The default constructor.
*/
public DataReuseEngineTestSetup(){
mTestInputDir = ".";
mDefaultTestSetup = new DefaultTestSetup();
}
/**
* Set the input directory for the test on the basis of the classname of test class
*
* @param testClass the test class.
*/
public void setInputDirectory( Class testClass ){
mDefaultTestSetup.setInputDirectory(testClass);
//append dataruse to the input directory
mDefaultTestSetup.setInputDirectory( mDefaultTestSetup.getInputDirectory() + File.separator + "datareuse");
}
/**
* Set the input directory for the test.
*
* @param directory the directory
*/
public void setInputDirectory( String directory ){
mDefaultTestSetup.setInputDirectory(directory);
}
/**
* Returns the input directory set by the test.
*
* @return
*/
public String getInputDirectory(){
return mDefaultTestSetup.getInputDirectory();
}
/**
* Loads up PegasusProperties properties.
*
* @param sanitizeKeys list of keys to be sanitized
*
* @return
*/
public PegasusProperties loadProperties( List<String> sanitizeKeys ){
return mDefaultTestSetup.loadProperties( sanitizeKeys );
}
/**
* Loads up properties from the input directory for the test.
*
* @param propertiesBasename basename of the properties file in the input directory.
* @param sanitizeKeys list of keys to be sanitized . relative paths replaced
* with full path on basis of test input directory.
*
* @return
*/
public PegasusProperties loadPropertiesFromFile( String propertiesBasename, List<String> sanitizeKeys){
return mDefaultTestSetup.loadPropertiesFromFile(propertiesBasename, sanitizeKeys);
}
/**
* Loads the logger from the properties and sets default level to INFO
*
* @param properties
*
* @return
*/
public LogManager loadLogger( PegasusProperties properties ){
return mDefaultTestSetup.loadLogger(properties);
}
/**
* Loads the planner options for the test
*
* @return
*/
public PlannerOptions loadPlannerOptions( ){
PlannerOptions options = new PlannerOptions();
options.setOutputSite( "local" );
return options;
}
/**
* Parses and loads the DAX
*
* @param dax the dax file basename in the input directory
* @return
*/
public ADag loadDAX( PegasusBag bag, String dax ){
dax = this.getInputDirectory() + File.separator + dax;
//load the parser and parse the dax
Parser p = (Parser)DAXParserFactory.loadDAXParser( bag, "DAX2CDAG", dax );
Callback cb = ((DAXParser)p).getDAXCallback();
p.startParser( dax );
return (ADag)cb.getConstructedObject();
}
/**
* Loads up the SiteStore with the sites passed in list of sites.
*
* @param props the properties
* @param logger the logger
* @param sites the list of sites to load
*
* @return the SiteStore
*/
public SiteStore loadSiteStore( PegasusProperties props , LogManager logger, List<String> sites ){
return mDefaultTestSetup.loadSiteStore(props, logger, sites);
}
/**
* Loads up the SiteStore with the sites passed in list of sites.
*
* @param props the properties
* @param logger the logger
* @param sites the list of sites to load
*
* @return the SiteStore
*/
public SiteStore loadSiteStoreFromFile( PegasusProperties props , LogManager logger, List<String> sites ){
return mDefaultTestSetup.loadSiteStoreFromFile( props, logger, sites );
}
}