/* * The MIT License * * Copyright (c) 2012 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.metrics; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import picard.PicardException; import picard.analysis.MetricAccumulationLevel; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; /** * MultiLevelCollector handles accumulating Metrics at different MetricAccumulationLevels(ALL_READS, SAMPLE, LIBRARY, READ_GROUP). * Based on the accumulationLevels and readGroup records passed to its constructor, MultiLevelCollector * will instantiate the number of PerUnitMetricCollector's needed to generate metrics for each of the levels provided. * * To Use: * * Instantiate a MultiLevelCollector and call setup(see thoughts about extending MultiLevelCollector below) * setup will create the underlying classes that will handle the accumulation level logic. * Pass all reads you wish to collect data against to MultiLevelCollector via the acceptRecord method * Call finish and use addAllLevelsToFile to add all of the metrics at each accumulation level to the given file. * * Extend MultiLevelCollector and implement makeArg and makeChildCollector * You will most likely want to make a class that extends PerUnitMetricCollector. This class should do the work of keeping * track of values for one specific "accumulation unit" (e.g. for one library, or for one read group depending on what levels * you are accumulating at). * * If a record has any expensive calculations to be done (that don't need to be done differently depending * on what sample/library/read group the read is for) then create a container class for the results of these calculations and pass * this class as the ARGTYPE of both the PerUnitMetricCollector and MultiLevelCollector. You can then do these calculations in the makeArg * method and they will only be done once per record. * * @param <METRIC_TYPE> The type of metrics being collected * @param <Histogram_KEY> If there is are Histograms related to metrics of type <BEAN> then <HKEY> is the key value to these Histograms * @param <ARGTYPE> The type of argument passed to individual PerUnitMetricCollector (see SAMRecordMultilevelCollector and PerUnitMetricCollector) */ public abstract class MultiLevelCollector<METRIC_TYPE extends MetricBase, Histogram_KEY extends Comparable, ARGTYPE> { public static final String UNKNOWN = "unknown"; //The collector that will accept all records (allReads is NULL if !calculateAll) private PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> allReadCollector; //A list of Distributor that is at most length 3, 1 for each (SAMPLE, LIBRARY, READ_GROUP) accumulation levels //these will be listed in the order in which their children would be added to a metric file private List<Distributor> outputOrderedDistributors; //Convert the current SAMRecord and the ReferenceSequence for that record into an ARGTYPE object //see accept record for use protected abstract ARGTYPE makeArg(final SAMRecord samRec, final ReferenceSequence refSeq); /** * Construct a PerUnitMetricCollector with the given arguments. * @param sample If aggregating by ALL_READS this will be null, otherwise the sample that will be used to identify * this collector * @param library If aggregating by SAMPLE this will be null, otherwise the library that will be used to identify * this collector * @param readGroup If aggregating by LIBRARY this will be null, otherwise the readGroup that will be used to identify * this collector * @return A PerUnitMetricCollector parameterized by the given arguments */ protected abstract PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeChildCollector(final String sample, final String library, final String readGroup); //These are exposed here (rather than being encapsulated in the Distributor subclasses below in order //to provide subclasses with an explicit point to add initialization (specific to accumulation level) for //a PerUnitMetricCollector it is creating protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeAllReadCollector() { return makeChildCollector(null, null, null); } protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeSampleCollector(final SAMReadGroupRecord rg) { return makeChildCollector(rg.getSample(), null, null); } protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeLibraryCollector(final SAMReadGroupRecord rg) { return makeChildCollector(rg.getSample(), rg.getLibrary(), null); } protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeReadGroupCollector(final SAMReadGroupRecord rg) { return makeChildCollector(rg.getSample(), rg.getLibrary(), rg.getPlatformUnit()); } /** * Distributors group PerUnitMetricCollectors based on a AccumulationLevel. Their structure mimics * PerUnitMetricCollectors but instead of adding records to metrics they identify which * PerUnitMetricCollector should receive a specific record and distribute records on to the that collector * * There were will be 0 or 1 Distributors for each of the following MetriAcummulationLevels: * ALL_READS, SAMPLE, LIBRARY, READ_GROUP */ private abstract class Distributor { //A Map mapping the key for a specific record (as determined by getKey) to the appropriate collector private final Map<String, PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE>> collectors; //Given a SAMReadGroupRecord, return the key that identifies the collector for the corresponding SAMRecord protected abstract String getKey(final SAMReadGroupRecord rg); //Make a PerUnitMetricCollector for this given Distributor protected abstract PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeCollector(final SAMReadGroupRecord rg); protected abstract PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeUnknownCollector(); public Distributor(final List<SAMReadGroupRecord> rgRecs) { collectors = new LinkedHashMap<String, PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE>>(); for(final SAMReadGroupRecord rg : rgRecs) { final String key = getKey(rg); if(!collectors.containsKey(key)) { collectors.put(key, makeCollector(rg)); } } } /** Call finish on each PerUnitMetricCollector in this Aggregate Collector */ public void finish() { for(final PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> collector : collectors.values()) { collector.finish(); } } /** Call acceptRecord(args) on the record collector identified by getKey */ public void acceptRecord(final ARGTYPE args, final SAMReadGroupRecord rg) { String key = UNKNOWN; if(rg != null) { final String computedKey = getKey(rg); if(computedKey != null) { key = computedKey; } } PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> collector = collectors.get(key); if (collector == null) { if (!UNKNOWN.equals(key)) { throw new PicardException("Could not find collector for " + key); } collector = makeUnknownCollector(); collectors.put(key, collector); } collector.acceptRecord(args); } /** Add all records to the MetricsFile passed in, this will happen in the order they were * found in the input ReadGroup records */ public void addToFile(final MetricsFile<METRIC_TYPE, Histogram_KEY> file) { for(final PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> collector : collectors.values()) { collector.addMetricsToFile(file); } } } /** A dummy Distributor to handle the ALL_READS accumulation level. No distribution is required * since there should only ever be one PerUnitMetricCollector for ALL_READS. */ private class AllReadsDistributor extends Distributor { public AllReadsDistributor(final List<SAMReadGroupRecord> rgRecs) { super(new ArrayList<SAMReadGroupRecord>()); makeCollector(null); } @Override protected String getKey(SAMReadGroupRecord rg) { return null; } public void acceptRecord(final ARGTYPE args, final SAMReadGroupRecord rg) { allReadCollector.acceptRecord(args); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeCollector(final SAMReadGroupRecord rg) { allReadCollector = makeAllReadCollector(); return allReadCollector; } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeUnknownCollector() { throw new UnsupportedOperationException("Should not happen"); } @Override public void finish() { allReadCollector.finish(); } @Override public void addToFile(final MetricsFile<METRIC_TYPE, Histogram_KEY> file) { allReadCollector.addMetricsToFile(file); } } //Discriminates between records based on sample name, and calls acceptRecord on the appropriate PerUnitMetricCollectors private class SampleDistributor extends Distributor { public SampleDistributor(final List<SAMReadGroupRecord> rgRecs) { super(rgRecs); } @Override protected String getKey(SAMReadGroupRecord rg) { return rg.getSample(); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeCollector(SAMReadGroupRecord rg) { return makeSampleCollector(rg); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeUnknownCollector() { return makeChildCollector(UNKNOWN, null, null); } } //Discriminates between records based on library name, and calls acceptRecord on the appropriate PerUnitMetricCollectors private class LibraryDistributor extends Distributor { public LibraryDistributor(final List<SAMReadGroupRecord> rgRecs) { super(rgRecs); } @Override protected String getKey(SAMReadGroupRecord rg) { return rg.getLibrary(); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeCollector(SAMReadGroupRecord rg) { return makeLibraryCollector(rg); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeUnknownCollector() { return makeChildCollector(UNKNOWN, UNKNOWN, null); } } //Discriminates between records based on read group name, and calls acceptRecord on the appropriate PerUnitMetricCollectors private class ReadGroupCollector extends Distributor { public ReadGroupCollector(final List<SAMReadGroupRecord> rgRecs) { super(rgRecs); } @Override protected String getKey(SAMReadGroupRecord rg) { return rg.getPlatformUnit(); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeCollector(SAMReadGroupRecord rg) { return makeReadGroupCollector(rg); } @Override protected PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> makeUnknownCollector() { return makeChildCollector(UNKNOWN, UNKNOWN, UNKNOWN); } } /** * Use an init method so that overloaded methods in subclasses can pass use parameters that are initialized in their constructor * @param accumulationLevels PerUnitMetricCollectors will only be created for the levels identified by accumulationLevels * @param samRgRecords PerUnitMetricCollectors will be created for each of the different samples, libraries, and * readGroups found in the records depending on the accumulationLevels provided */ protected void setup(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords) { outputOrderedDistributors = new ArrayList<Distributor>(4); if(accumulationLevels.contains(MetricAccumulationLevel.ALL_READS)) { outputOrderedDistributors.add(new AllReadsDistributor(samRgRecords)); } if (accumulationLevels.contains(MetricAccumulationLevel.SAMPLE)) { outputOrderedDistributors.add(new SampleDistributor(samRgRecords)); } if(accumulationLevels.contains(MetricAccumulationLevel.LIBRARY)) { outputOrderedDistributors.add(new LibraryDistributor(samRgRecords)); } if(accumulationLevels.contains(MetricAccumulationLevel.READ_GROUP)) { outputOrderedDistributors.add(new ReadGroupCollector(samRgRecords)); } } /** * Construct a argument of ARGTYPE using the given SAMRecord and ReferenceSequence then pass * this value to all collectors that should include this record */ public void acceptRecord(final SAMRecord record, final ReferenceSequence refSeq) { final ARGTYPE arg = makeArg(record, refSeq); for(final Distributor collector : outputOrderedDistributors) { collector.acceptRecord(arg, record.getReadGroup()); } } /** * Call finish on all PerUnitMetricCollectors */ public void finish() { for(final Distributor collector : outputOrderedDistributors) { collector.finish(); } } /** Get the PerUnitMetricCollector that collects reads for all levels */ public PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> getAllReadsCollector() { return allReadCollector; } /** Add all metrics to the given file in the following MetricAccumulationLevel order * ALL_READS, SAMPLE, LIBRARY, READ_GROUP. */ public void addAllLevelsToFile(final MetricsFile<METRIC_TYPE, Histogram_KEY> file) { for(final Distributor collector : outputOrderedDistributors) { collector.addToFile(file); } } }