/* * The MIT License * * Copyright (c) 2014 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.illumina; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.Illumina; import picard.illumina.parser.*; import java.io.File; import java.io.FileNotFoundException; import java.util.*; import java.util.stream.Collectors; import javax.xml.parsers.DocumentBuilderFactory; /** * Command-line wrapper around {@link IlluminaLaneMetricsCollector}. * @author mccowan */ @CommandLineProgramProperties( usage = CollectIlluminaLaneMetrics.USAGE_SUMMARY + CollectIlluminaLaneMetrics.USAGE_DETAILS, usageShort = CollectIlluminaLaneMetrics.USAGE_SUMMARY, programGroup = Illumina.class ) public class CollectIlluminaLaneMetrics extends CommandLineProgram { static final String USAGE_SUMMARY = "Collects Illumina lane metrics for the given BaseCalling analysis directory. "; static final String USAGE_DETAILS = "This tool produces quality control metrics on cluster density for each lane of an Illumina flowcell." + " This tool takes Illumina TileMetrics data and places them into directories containing lane- and phasing-level metrics. " + "In this context, phasing refers to the fraction of molecules that fall behind or jump ahead (prephasing) during a read cycle." + "" + "<h4>Usage example:</h4>" + "<pre>" + "java -jar picard.jar CollectIlluminaLaneMetrics \\<br />" + " RUN_DIR=test_run \\<br />" + " OUTPUT_DIRECTORY=Lane_output_metrics \\<br />" + " OUTPUT_PREFIX=experiment1 \\<br />" + " READ_STRUCTURE=25T8B25T " + "</pre>" + "<p>Please see the CollectIlluminaLaneMetrics " + "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectIlluminaLaneMetrics'>definitions</a> " + "for a complete description of the metrics produced by this tool.</p>" + "<hr />" ; @Option(doc = "The Illumina run directory of the run for which the lane metrics are to be generated") public File RUN_DIRECTORY; @Option(doc = "The directory to which the output file will be written") public File OUTPUT_DIRECTORY; @Option(doc = "The prefix to be prepended to the file name of the output file; an appropriate suffix will be applied", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) public String OUTPUT_PREFIX; @Option(doc = ReadStructure.PARAMETER_DOC + "\nIf not given, will use the RunInfo.xml in the run directory.", shortName = "RS", optional = true) public ReadStructure READ_STRUCTURE; @Option(shortName = "EXT", doc="Append the given file extension to all metric file names (ex. OUTPUT.illumina_lane_metrics.EXT). None if null", optional=true) public String FILE_EXTENSION = null; @Override protected int doWork() { final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile = this.getMetricsFile(); final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile = this.getMetricsFile(); if (READ_STRUCTURE == null) { final File runInfo = new File(RUN_DIRECTORY + "/" + "RunInfo.xml"); IOUtil.assertFileIsReadable(runInfo); try { final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(runInfo); final NodeList reads = document.getElementsByTagName("Read"); final List<ReadDescriptor> descriptors = new ArrayList<>(reads.getLength()); for (int i = 0; i < reads.getLength(); i++) { final Node read = reads.item(i); final NamedNodeMap attributes = read.getAttributes(); final int readNumber = Integer.parseInt(attributes.getNamedItem("Number").getNodeValue()); final int numCycles = Integer.parseInt(attributes.getNamedItem("NumCycles").getNodeValue()); final boolean isIndexedRead = attributes.getNamedItem("IsIndexedRead").getNodeValue().toUpperCase().equals("Y"); if (readNumber != i + 1) throw new PicardException("Read number in RunInfo.xml was out of order: " + (i+1) + " != " + readNumber); descriptors.add(new ReadDescriptor(numCycles, isIndexedRead ? ReadType.Barcode: ReadType.Template)); } READ_STRUCTURE = new ReadStructure(descriptors); } catch (final Exception e) { throw new PicardException(e.getMessage()); } } IlluminaLaneMetricsCollector.collectLaneMetrics(RUN_DIRECTORY, OUTPUT_DIRECTORY, OUTPUT_PREFIX, laneMetricsFile, phasingMetricsFile, READ_STRUCTURE, FILE_EXTENSION == null ? "" : FILE_EXTENSION); return 0; } public static void main(final String[] args) { new CollectIlluminaLaneMetrics().instanceMainWithExit(args); } /** * Utility for collating Tile records from the Illumina TileMetrics file into lane-level and phasing-level metrics. */ public static class IlluminaLaneMetricsCollector { private final static Log LOG = Log.getInstance(IlluminaLaneMetricsCollector.class); /** Returns a partitioned collection of lane number to Tile objects from the provided basecall directory. */ public static Map<Integer, ? extends Collection<Tile>> readLaneTiles(final File illuminaRunDirectory, final ReadStructure readStructure) { final Collection<Tile> tiles; try { tiles = TileMetricsUtil.parseTileMetrics(TileMetricsUtil.renderTileMetricsFileFromBasecallingDirectory(illuminaRunDirectory), readStructure); } catch (final FileNotFoundException e) { throw new PicardException("Unable to open laneMetrics file.", e); } return tiles.stream().collect(Collectors.groupingBy(Tile::getLaneNumber)); } /** Parses the tile data from the basecall directory and writes to both the lane and phasing metrics files */ public static void collectLaneMetrics(final File runDirectory, final File outputDirectory, final String outputPrefix, final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile, final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile, final ReadStructure readStructure, final String fileExtension) { final Map<Integer, ? extends Collection<Tile>> laneTiles = readLaneTiles(runDirectory, readStructure); writeLaneMetrics(laneTiles, outputDirectory, outputPrefix, laneMetricsFile, fileExtension); writePhasingMetrics(laneTiles, outputDirectory, outputPrefix, phasingMetricsFile, fileExtension); } public static File writePhasingMetrics(final Map<Integer, ? extends Collection<Tile>> laneTiles, final File outputDirectory, final String outputPrefix, final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile, final String fileExtension) { laneTiles.entrySet().stream().forEach(entry -> IlluminaPhasingMetrics.getPhasingMetricsForTiles(entry.getKey().longValue(), entry.getValue()).forEach(phasingMetricsFile::addMetric)); return writeMetrics(phasingMetricsFile, outputDirectory, outputPrefix, IlluminaPhasingMetrics.getExtension() + fileExtension); } public static File writeLaneMetrics(final Map<Integer, ? extends Collection<Tile>> laneTiles, final File outputDirectory, final String outputPrefix, final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile, final String fileExtension) { laneTiles.entrySet().stream().forEach(entry -> { final IlluminaLaneMetrics laneMetric = new IlluminaLaneMetrics(); laneMetric.LANE = entry.getKey().longValue(); laneMetric.CLUSTER_DENSITY = calculateLaneDensityFromTiles(entry.getValue()); laneMetricsFile.addMetric(laneMetric); }); return writeMetrics(laneMetricsFile, outputDirectory, outputPrefix, IlluminaLaneMetrics.getExtension() + fileExtension); } private static File writeMetrics(final MetricsFile<MetricBase, Comparable<?>> metricsFile, final File outputDirectory, final String outputPrefix, final String outputExtension) { final File outputFile = new File(outputDirectory, String.format("%s.%s", outputPrefix, outputExtension)); LOG.info(String.format("Writing %s lane metrics to %s ...", metricsFile.getMetrics().size(), outputFile)); metricsFile.write(outputFile); return outputFile; } private static double calculateLaneDensityFromTiles(final Collection<Tile> tiles) { double area = 0; double clusters = 0; for (final Tile tile : tiles) { area += (tile.getClusterCount() / tile.getClusterDensity()); clusters += tile.getClusterCount(); } return clusters / area; } } }