package org.seqcode.deepseq.experiments; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.seqcode.data.seqdata.SeqDataLoader; import org.seqcode.deepseq.hitloaders.HitLoader; import org.seqcode.deepseq.hitloaders.HitLoaderFactory; import org.seqcode.genome.Genome; import org.seqcode.genome.GenomeConfig; import org.seqcode.gseutils.Pair; /** * SampleLoader allows ad hoc loading of a set of individual Samples and associated HitLoaders. * No organization/relationship is assumed or recorded between the loaded Samples. * ExperimentManager is the preferred way to load a set of experiments. * In rare cases, an application may just need to load one or two Samples without requiring the * overhead that comes with the Experiment tree. That's where this class comes in. * * @author mahony */ public class SampleLoader { protected ExptConfig econfig; protected HitLoaderFactory hlfactory; protected SeqDataLoader sdloader = null; protected Genome gen; protected boolean loadReads=true; //Experiment tree elements protected HashMap<String, HitLoader> loaders = new HashMap<String,HitLoader>(); protected List<Sample> samples = new ArrayList<Sample>(); protected Map<String, Sample> samplesByName = new HashMap<String, Sample>(); /** * Constructor: * Using arguments loaded by the ExptConfig, initialize (in this order): * HitLoaders, Samples. * @param c : ExptConfig * @param loadReads : boolean. for some applications, reads do not have to be loaded. Use with caution. */ public SampleLoader(ExptConfig c){this(c, true);} public SampleLoader(ExptConfig c, boolean loadReads){ econfig = c; gen = econfig.getGenome(); hlfactory = new HitLoaderFactory(econfig); this.loadReads = loadReads; List<ExptDescriptor> descriptors = econfig.getExperimentDescriptors(); if(descriptors!=null && descriptors.size()>0) loadSamples(descriptors); } /** * Load some samples * @param descriptors */ public void loadSamples(List<ExptDescriptor> descriptors){ HashMap<String, Sample> allSamples = new HashMap<String, Sample>(); int sampCount=0; //Pre-step; do we need a SeqDataLoader? boolean makeSeqDataLoader = false; for(ExptDescriptor e : descriptors){ for(Pair<String,String> source : e.sources){ String type = source.cdr(); if(type.equals("READDB")) makeSeqDataLoader=true; } } if(makeSeqDataLoader) try { sdloader = new SeqDataLoader(); } catch (SQLException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } //Firstly, initialize all hit loaders. //This is done in a separate first pass, because it is possible (albeit unlikely) //that multiple conditions share the same hit loader, and you don't want to load things twice. for(ExptDescriptor e : descriptors){ if(econfig.getPrintLoadingProgress()) System.err.println("Processing HitLoaders for:\t"+e.condition+"\t"+e.replicate); for(Pair<String,String> source : e.sources){ String name = source.car(); String type = source.cdr(); if(type.equals("READDB")){ //ReadDB HitLoader if(!loaders.containsKey(name)){ HitLoader hl = hlfactory.makeReadDBHitLoader(sdloader, name); //hit loader does not have to be sourced here -- that happens in the samples part below loaders.put(name, hl); } }else{ //Assume File HitLoader if(!loaders.containsKey(name)){ HitLoader hl = hlfactory.makeFileHitLoader(name, type, econfig.getNonUnique()); //hit loader does not have to be sourced here -- that happens in the samples part below loaders.put(name, hl); } } } } //Secondly, load the samples (load each sample name once) for(ExptDescriptor e : descriptors){ String sampleName = e.getName(); if(econfig.getPrintLoadingProgress() && loadReads) System.err.print("Loading data from "+sampleName); if(!allSamples.containsKey(sampleName)){ Sample samp = new Sample(sampCount, econfig, sampleName, e.perBaseMaxReads, e.signal); allSamples.put(sampleName, samp); samples.add(samp); samplesByName.put(sampleName, samp); sampCount++; } for(Pair<String,String> source : e.sources){ String name = source.car(); allSamples.get(sampleName).addHitLoader(loaders.get(name)); } if(loadReads){ allSamples.get(sampleName).initializeCache(econfig.getCacheAllData(), econfig.getInitialCachedRegions()); if(econfig.getPrintLoadingProgress()) System.err.println(String.format("\tLoaded:\t%.1f", allSamples.get(sampleName).getHitCount())); } } //Merge estimated genomes if necessary (v. messy if Samples are loaded during an analysis...) if(gen == null){ List<Genome> estGenomes = new ArrayList<Genome>(); for(String s : allSamples.keySet()) estGenomes.add(allSamples.get(s).getGenome()); gen = econfig.mergeEstGenomes(estGenomes); for(String s : allSamples.keySet()) allSamples.get(s).setGenome(gen); } if(sdloader!=null){ sdloader.close(); sdloader=null; } } //Accessors public List<Sample> getSamples(){return samples;} public Sample getSample(String s){return samplesByName.get(s);} /** * Call any cleanup methods */ public void close(){ for(String l : loaders.keySet()){ loaders.get(l).cleanup(); } for(Sample s : samples){ s.close(); } } /** * This main method is only for testing the ExperimentManager system * @param args */ public static void main(String[] args){ GenomeConfig gconfig = new GenomeConfig(args); ExptConfig econfig = new ExptConfig(gconfig.getGenome(), args); if(econfig.helpWanted()){ System.err.println("ExperimentManager debugging:"); System.err.println(econfig.getArgsList()); }else{ ExperimentManager manager = new ExperimentManager(econfig); System.err.println("ExptTypes:\t"+manager.getExptTypes().size()); for(ExperimentType t : manager.getExptTypes()){ System.err.println("ExptType "+t.getName()+":\t#Experiments:\t"+t.getExptTypeExperiments().size()); } System.err.println("ExptTargets:\t"+manager.getTargets().size()); for(ExperimentTarget t : manager.getTargets()){ System.err.println("Target "+t.getName()+":\t#Experiments:\t"+t.getTargetExperiments().size()); } manager.close(); } } }