/* * The MIT License * * Copyright (c) 2017 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.vcf; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.*; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.Metrics; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.*; /** * Combines multiple Variant Calling Metrics files into a single file. * @author Eric Banks */ @CommandLineProgramProperties( usage = "Combines multiple Variant Calling Metrics files into a single file. This tool is used in cases where the metrics are calculated" + " separately for different (genomic) shards of the same callset and we want to combine them into a single result over the entire callset." + " The shards are expected to contain the same samples (although it will not fail if they do not) and to not have been run over overlapping genomic positions.", usageShort = "Combines multiple Variant Calling Metrics files into a single file", programGroup = Metrics.class ) public class AccumulateVariantCallingMetrics extends CommandLineProgram { @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Paths (except for the file extensions) of Variant Calling Metrics files to read and merge.", minElements=1) public List<File> INPUT; @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Path (except for the file extension) of output metrics files to write.") public File OUTPUT; @Override protected int doWork() { final String outputPrefix = OUTPUT.getAbsolutePath() + "."; final File detailOutputFile = new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()); final File summaryOutputFile = new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()); IOUtil.assertFileIsWritable(detailOutputFile); IOUtil.assertFileIsWritable(summaryOutputFile); // set up the collectors final Map<String, Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics>> sampleDetailsMap = new HashMap<>(); final Collection<CollectVariantCallingMetrics.VariantCallingSummaryMetrics> summaries = new ArrayList<>(); for (final File file : INPUT) { final String inputPrefix = file.getAbsolutePath() + "."; try { // read in the detailed metrics file final File detail = new File(inputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()); IOUtil.assertFileIsReadable(detail); MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, ?> detailedMetricsFile = getMetricsFile(); detailedMetricsFile.read(new FileReader(detail)); // for each sample in the detailed metrics... long totalHetDepth = 0L; for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics detailedMetrics : detailedMetricsFile.getMetrics()) { // re-calculate internal fields from derived fields detailedMetrics.calculateFromDerivedFields(); totalHetDepth += detailedMetrics.TOTAL_HET_DEPTH; // add it to the list of metrics for that sample so that we can merge them later sampleDetailsMap.computeIfAbsent(detailedMetrics.SAMPLE_ALIAS, f -> new ArrayList<>()).add(detailedMetrics); } // next, read in the summary metrics final File summary = new File(inputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()); IOUtil.assertFileIsReadable(summary); MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, ?> summaryMetricsFile = getMetricsFile(); summaryMetricsFile.read(new FileReader(summary)); if (summaryMetricsFile.getMetrics().size() != 1) { throw new PicardException(String.format("Expected 1 row in the summary metrics file but saw %d", summaryMetricsFile.getMetrics().size())); } // re-calculate internal fields from derived fields and add it to the list of summary metrics final CollectVariantCallingMetrics.VariantCallingSummaryMetrics summaryMetrics = summaryMetricsFile.getMetrics().get(0); summaryMetrics.calculateFromDerivedFields(totalHetDepth); summaries.add(summaryMetrics); } catch (IOException e) { throw new PicardException(String.format("Cannot read from metrics files with prefix %s", inputPrefix)); } } // now merge all of the accumulated metrics final Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics> collapsedDetails = new ArrayList<>(); sampleDetailsMap.values().forEach(sampleDetails -> { final CollectVariantCallingMetrics.VariantCallingDetailMetrics collapsed = new CollectVariantCallingMetrics.VariantCallingDetailMetrics(); CollectVariantCallingMetrics.VariantCallingDetailMetrics.foldInto(collapsed, sampleDetails); collapsed.calculateDerivedFields(); collapsedDetails.add(collapsed); }); final CollectVariantCallingMetrics.VariantCallingSummaryMetrics collapsedSummary = new CollectVariantCallingMetrics.VariantCallingSummaryMetrics(); CollectVariantCallingMetrics.VariantCallingSummaryMetrics.foldInto(collapsedSummary, summaries); collapsedSummary.calculateDerivedFields(); // prepare and write the finalized merged metrics final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile(); final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile(); summary.addMetric(collapsedSummary); collapsedDetails.forEach(detail::addMetric); detail.write(detailOutputFile); summary.write(summaryOutputFile); return 0; } }