package org.seqcode.deepseq.experiments; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.seqcode.data.seqdata.SeqDataLoader; import org.seqcode.deepseq.hitloaders.HitLoader; import org.seqcode.deepseq.hitloaders.HitLoaderFactory; import org.seqcode.genome.Genome; import org.seqcode.genome.GenomeConfig; import org.seqcode.gseutils.Pair; /** * ExperimentManager acts as an interface to all experiment conditions and replicates. * This class serves mainly to initialize the experiment tree. * * @author Shaun Mahony * @version %I%, %G% */ public class ExperimentManager { protected ExptConfig econfig; protected SeqDataLoader sdloader = null; protected Genome gen; protected HitLoaderFactory hlfactory; //Experiment tree elements protected HashMap<String, HitLoader> loaders = new HashMap<String,HitLoader>(); protected List<Sample> samples = new ArrayList<Sample>(); protected List<ControlledExperiment> replicates = new ArrayList<ControlledExperiment>(); protected List<ExperimentCondition> conditions = new ArrayList<ExperimentCondition>(); protected List<ExperimentTarget> targets = new ArrayList<ExperimentTarget>(); protected List<ExperimentType> expttypes = new ArrayList<ExperimentType>(); //Lookups protected HashMap<ExperimentCondition, Integer> conditionIndex = new HashMap<ExperimentCondition, Integer>(); protected HashMap<Integer, ExperimentCondition> indexedCondition = new HashMap<Integer, ExperimentCondition>(); protected HashMap<String, ExperimentCondition> namedCondition = new HashMap<String, ExperimentCondition>(); /** * Constructor: * Using arguments loaded by the ExptConfig, initialize (in this order): * HitLoaders, Samples, Replicates, Conditions, Targets, ExptTypes. * @param c : ExptConfig * @param loadReads : boolean. for some applications, reads do not have to be loaded. Use with caution. */ public ExperimentManager(ExptConfig c){this(c, true);} public ExperimentManager(ExptConfig c, boolean loadReads){ econfig = c; gen = econfig.getGenome(); hlfactory = new HitLoaderFactory(econfig); HashMap<String, Sample> allSamples = new HashMap<String, Sample>(); HashMap<String, ControlledExperiment> allReplicates = new HashMap<String, ControlledExperiment>(); List<ExptDescriptor> descriptors = econfig.getExperimentDescriptors(); int repCount=0, condCount=0, sampCount=0, targCount=0, etypeCount=0; //Pre-step; do we need a SeqDataLoader? boolean makeSeqDataLoader = false; for(ExptDescriptor e : descriptors){ for(Pair<String,String> source : e.sources){ String type = source.cdr(); if(type.equals("READDB")) makeSeqDataLoader=true; } } if(makeSeqDataLoader) try { sdloader = new SeqDataLoader(); } catch (SQLException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } //Firstly, initialize all hit loaders. //This is done in a separate first pass, because it is possible (albeit unlikely) //that multiple conditions share the same hit loader, and you don't want to load things twice. for(ExptDescriptor e : descriptors){ if(econfig.getPrintLoadingProgress()) System.err.println("Processing HitLoaders for:\t"+e.condition+"\t"+e.replicate); for(Pair<String,String> source : e.sources){ String name = source.car(); String type = source.cdr(); if(type.equals("READDB")){ //ReadDB HitLoader if(!loaders.containsKey(name)){ HitLoader hl = hlfactory.makeReadDBHitLoader(sdloader, name); //hit loader does not have to be sourced here -- that happens in the samples part below loaders.put(name, hl); } }else{ //Assume File HitLoader if(!loaders.containsKey(name)){ HitLoader hl = hlfactory.makeFileHitLoader(name, type, econfig.getNonUnique()); //hit loader does not have to be sourced here -- that happens in the samples part below loaders.put(name, hl); } } } } //Secondly, load the samples (load each sample name once) for(ExptDescriptor e : descriptors){ String sampleName = e.getName(); if(econfig.getPrintLoadingProgress() && loadReads) System.err.print("Loading data from "+sampleName); if(!allSamples.containsKey(sampleName)){ Sample samp = new Sample(sampCount, econfig, sampleName, e.perBaseMaxReads, e.signal); allSamples.put(sampleName, samp); samples.add(samp); sampCount++; } for(Pair<String,String> source : e.sources){ String name = source.car(); allSamples.get(sampleName).addHitLoader(loaders.get(name)); } if(loadReads){ allSamples.get(sampleName).initializeCache(econfig.getCacheAllData(), econfig.getInitialCachedRegions()); if(econfig.getPrintLoadingProgress()) System.err.println("\tLoaded."); } } //Merge estimated genomes if necessary if(gen == null){ List<Genome> estGenomes = new ArrayList<Genome>(); for(String s : allSamples.keySet()) estGenomes.add(allSamples.get(s).getGenome()); gen = econfig.mergeEstGenomes(estGenomes); for(String s : allSamples.keySet()) allSamples.get(s).setGenome(gen); } //Thirdly, initialize the replicates for(ExptDescriptor e : descriptors){ if(e.signal){ String repName = e.condition+":"+e.replicate; if(!allReplicates.containsKey(repName)){ Sample sig=null, ctrl=null; if(allSamples.containsKey(repName+":signal")){ //Require that there is a signal (in case of orphan/default controls) sig = allSamples.get(repName+":signal"); if(allSamples.containsKey(repName+":control")) //Ctrl1: if there is a control defined for this condition & replicate ctrl = allSamples.get(repName+":control"); else if(allSamples.containsKey(e.condition+":DEFAULT:control")) //Ctrl2: if there is a default control for this condition ctrl = allSamples.get(e.condition+":DEFAULT:control"); else if(allSamples.containsKey("DEFAULT:DEFAULT:control")) //Ctrl3: if there is a global default control ctrl = allSamples.get("DEFAULT:DEFAULT:control"); //If no control specified, ctrl is still null ControlledExperiment rep = new ControlledExperiment(econfig, repCount, e.condition, e.replicate, sig, ctrl); allReplicates.put(repName, rep); replicates.add(rep); repCount++; } } } } //Fourthly, initialize the conditions (not using Hash any more so that ordering is maintained from design file) List<String> replicatesByConditionNames = new ArrayList<String>(); List<List<ControlledExperiment>> replicatesByConditionReps = new ArrayList<List<ControlledExperiment>>(); for(ExptDescriptor e : descriptors){ String repName = e.condition+":"+e.replicate; if(allReplicates.containsKey(repName)){ if(!replicatesByConditionNames.contains(e.condition)){ replicatesByConditionReps.add(new ArrayList<ControlledExperiment>()); replicatesByConditionNames.add(e.condition); } int index = replicatesByConditionNames.indexOf(e.condition); List<ControlledExperiment> currReps = replicatesByConditionReps.get(index); if(!currReps.contains(allReplicates.get(repName))){ currReps.add(allReplicates.get(repName)); } } } for(String s: replicatesByConditionNames){ int index = replicatesByConditionNames.indexOf(s); conditions.add(new ExperimentCondition(econfig, condCount, s, replicatesByConditionReps.get(index), econfig.getEstimateScaling())); condCount++; } //Fifthly, initialize the targets List<String> replicatesByTargetNames = new ArrayList<String>(); List<List<ControlledExperiment>> replicatesByTargetReps = new ArrayList<List<ControlledExperiment>>(); for(ExptDescriptor e : descriptors){ String repName = e.condition+":"+e.replicate; if(allReplicates.containsKey(repName)){ if(!replicatesByTargetNames.contains(e.target)){ replicatesByTargetReps.add(new ArrayList<ControlledExperiment>()); replicatesByTargetNames.add(e.target); } int index = replicatesByTargetNames.indexOf(e.target); List<ControlledExperiment> currReps = replicatesByTargetReps.get(index); if(!currReps.contains(allReplicates.get(repName))){ currReps.add(allReplicates.get(repName)); } } } for(String s: replicatesByTargetNames){ int index = replicatesByTargetNames.indexOf(s); targets.add(new ExperimentTarget(econfig, targCount, s, replicatesByConditionReps.get(index))); targCount++; } //Sixthly, initialize the types List<String> replicatesByExptTypeNames = new ArrayList<String>(); List<List<ControlledExperiment>> replicatesByExptTypeReps = new ArrayList<List<ControlledExperiment>>(); for(ExptDescriptor e : descriptors){ String repName = e.condition+":"+e.replicate; if(allReplicates.containsKey(repName)){ if(!replicatesByExptTypeNames.contains(e.expttype)){ replicatesByExptTypeReps.add(new ArrayList<ControlledExperiment>()); replicatesByExptTypeNames.add(e.expttype); } int index = replicatesByExptTypeNames.indexOf(e.expttype); List<ControlledExperiment> currReps = replicatesByExptTypeReps.get(index); if(!currReps.contains(allReplicates.get(repName))){ currReps.add(allReplicates.get(repName)); } } } for(String s: replicatesByExptTypeNames){ int index = replicatesByExptTypeNames.indexOf(s); expttypes.add(new ExperimentType(econfig, etypeCount, s, replicatesByConditionReps.get(index))); etypeCount++; } //Finally, index everything for(int i=0; i<getNumConditions(); i++){ conditionIndex.put(conditions.get(i), i); indexedCondition.put(i, conditions.get(i)); namedCondition.put(conditions.get(i).getName(), conditions.get(i)); } if(econfig.getPrintLoadingProgress()){ System.err.println("Loaded all experiments:"); for(ExperimentCondition cond : getConditions()){ System.err.println(" Condition "+cond.getName()+":\t#Replicates:\t"+cond.getReplicates().size()); for(ControlledExperiment r : cond.getReplicates()){ System.err.println("\tReplicate:\t"+r.getName()); if(r.getControl()==null) System.err.println(String.format("\t\tSignal:\t%.1f", r.getSignal().getHitCount())); else System.err.println(String.format("\t\tSignal:\t%.1f\tControl:\t%.1f\tScalingFactor:\t%.3f", r.getSignal().getHitCount(), r.getControl().getHitCount(), r.getControlScaling())); } if(cond.getTotalControlCount()>0) System.err.println(String.format("\tPooled replicates for condition:\t%s\n\t\tSignal:\t%.1f\tControl:%.1f\tScalingFactor:%.3f",cond.getName(), cond.getTotalSignalCount(), cond.getTotalControlCount(), cond.getPooledSampleControlScaling())); else System.err.println(String.format("\tPooled replicates for condition:\t%s\n\t\tSignal:\t%.1f",cond.getName(), cond.getTotalSignalCount())); } } if(sdloader!=null) sdloader.close(); } //Accessors public List<Sample> getSamples(){return samples;} public List<ExperimentCondition> getConditions(){return conditions;} public List<ControlledExperiment> getReplicates(){return replicates;} public List<ExperimentTarget> getTargets(){return targets;} public List<ExperimentType> getExptTypes(){return expttypes;} public int getConditionIndex(ExperimentCondition c){return conditionIndex.get(c);} public ExperimentCondition getIndexedCondition(int index){return indexedCondition.get(index);} public ExperimentCondition getNamedCondition(String name){return namedCondition.get(name);} public int getNumConditions(){return conditions.size();} /** * Call any cleanup methods */ public void close(){ for(String l : loaders.keySet()){ loaders.get(l).cleanup(); } for(Sample s : samples){ s.close(); } } /** * This main method is only for testing the ExperimentManager system * @param args */ public static void main(String[] args){ GenomeConfig gconfig = new GenomeConfig(args); ExptConfig econfig = new ExptConfig(gconfig.getGenome(), args); if(econfig.helpWanted()){ System.err.println("ExperimentManager debugging:"); System.err.println(econfig.getArgsList()); }else{ ExperimentManager manager = new ExperimentManager(econfig); System.err.println("ExptTypes:\t"+manager.getExptTypes().size()); for(ExperimentType t : manager.getExptTypes()){ System.err.println("ExptType "+t.getName()+":\t#Experiments:\t"+t.getExptTypeExperiments().size()); } System.err.println("ExptTargets:\t"+manager.getTargets().size()); for(ExperimentTarget t : manager.getTargets()){ System.err.println("Target "+t.getName()+":\t#Experiments:\t"+t.getTargetExperiments().size()); } manager.close(); } } }