/* * The MIT License * * Copyright (c) 2011 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.illumina; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.IOUtil; import org.testng.Assert; import org.testng.annotations.AfterTest; import org.testng.annotations.BeforeTest; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.illumina.parser.ClusterData; import picard.illumina.parser.IlluminaDataProvider; import picard.illumina.parser.IlluminaDataProviderFactory; import picard.illumina.parser.IlluminaDataType; import picard.illumina.parser.ReadStructure; import picard.illumina.parser.readers.BclQualityEvaluationStrategy; import picard.util.BasicInputParser; import java.io.File; import java.io.FileReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * @author alecw@broadinstitute.org */ public class ExtractIlluminaBarcodesTest extends CommandLineProgramTest { private static final File SINGLE_DATA_DIR = new File("testdata/picard/illumina/25T8B25T/Data/Intensities/BaseCalls"); private static final File DUAL_DATA_DIR = new File("testdata/picard/illumina/25T8B8B25T/Data/Intensities/BaseCalls"); private static final String[] BARCODES = { "CAACTCTC", "CAACTCTG", // This one is artificial -- one edit away from the first one "ACAGGTAT", "GACCGTTG", "ATTATCAA", "TGCTGCTG", "AACAATGG", "TGTAATCA", "GCCGTCGA", "GTCCACAG", "TTGTCTAT", "GTGGAGAC", "TTGCAAAT" }; private File basecallsDir; private File dual; private File qual; public String getCommandLineProgramName() { return ExtractIlluminaBarcodes.class.getSimpleName(); } @BeforeTest private void setUp() throws Exception { basecallsDir = File.createTempFile("eib.", ".tmp"); Assert.assertTrue(basecallsDir.delete()); Assert.assertTrue(basecallsDir.mkdir()); IOUtil.copyDirectoryTree(SINGLE_DATA_DIR, basecallsDir); dual = File.createTempFile("eib_dual", ".tmp"); Assert.assertTrue(dual.delete()); Assert.assertTrue(dual.mkdir()); IOUtil.copyDirectoryTree(DUAL_DATA_DIR, dual); qual = File.createTempFile("eib_qual", ".tmp"); Assert.assertTrue(qual.delete()); Assert.assertTrue(qual.mkdir()); IOUtil.copyDirectoryTree(DUAL_DATA_DIR, qual); } @AfterTest private void tearDown() { IOUtil.deleteDirectoryTree(basecallsDir); IOUtil.deleteDirectoryTree(dual); IOUtil.deleteDirectoryTree(qual); } @Test public void testSingleEndWithBarcodeAtStart() throws Exception { final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "8B25T"); Assert.assertEquals(metricsFile.getMetrics().get(11).PERFECT_MATCHES, 1); } @Test public void testSingleEndWithBarcodeAtEnd() throws Exception { final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T8B"); Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5); } @Test public void testPairedEndWithBarcodeOnFirstEnd() throws Exception { final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T8B25T"); Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5); } @Test public void testPairedEndWithBarcodeOnSecondEnd() throws Exception { final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T25T8B"); Assert.assertEquals(metricsFile.getMetrics().get(12).PERFECT_MATCHES, 1); } @Test public void testNonWritableOutputFile() throws Exception { final File existingFile = new File(basecallsDir, "s_1_1101_barcode.txt.gz"); try { existingFile.setReadOnly(); final String readStructure = "25T8B25T"; final int lane = 1; final File metricsFile = File.createTempFile("eib.", ".metrics"); metricsFile.deleteOnExit(); final List<String> args = new ArrayList<String>(Arrays.asList( "BASECALLS_DIR=" + basecallsDir.getPath(), "LANE=" + lane, "READ_STRUCTURE=" + readStructure, "METRICS_FILE=" + metricsFile.getPath(), "COMPRESS_OUTPUTS=true" )); for (final String barcode : BARCODES) { args.add("BARCODE=" + barcode); } Assert.assertEquals(runPicardCommandLine(args), 4); } finally { existingFile.setWritable(true); } } /** * 4 cases tested: * * exact match to ACAGTG * * inexact match within threshold to TGACCA * * inexact match not within threshold to TGACCA * * inexact match where the next match is too close to ACAGTG * @throws Exception */ @Test public void testBarcodeMatching() throws Exception { final int lane = 1; final int barcodePosition = 26; final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(lane, "25T8B25T"); ExtractIlluminaBarcodes.BarcodeMetric metricOne = null; ExtractIlluminaBarcodes.BarcodeMetric metricTwo = null; ExtractIlluminaBarcodes.BarcodeMetric metricNoMatch = null; for (final ExtractIlluminaBarcodes.BarcodeMetric metric : metricsFile.getMetrics()) { if (metric.BARCODE.equals(BARCODES[0])) { metricOne = metric; } else if (metric.BARCODE.equals(BARCODES[2])) { metricTwo = metric; } else if (metric.BARCODE.equals("NNNNNNNN")) { metricNoMatch = metric; } } Assert.assertEquals(metricOne.PERFECT_MATCHES, 5); Assert.assertEquals(metricOne.ONE_MISMATCH_MATCHES, 0); Assert.assertEquals(metricOne.PF_READS, 3); Assert.assertEquals(metricOne.READS, 5); // one inexact match Assert.assertEquals(metricTwo.READS, 4); Assert.assertEquals(metricTwo.ONE_MISMATCH_MATCHES, 0); Assert.assertEquals(metricNoMatch.READS, 140); Assert.assertEquals(metricNoMatch.PF_READS, 112); // Check the barcode files themselves final File[] barcodeFiles = IOUtil.getFilesMatchingRegexp(basecallsDir, "s_" + lane + "_\\d{4}_barcode.txt"); Arrays.sort(barcodeFiles); final BasicInputParser barcodeParser = new BasicInputParser(true, barcodeFiles); // Exact match String[] illuminaFields = barcodeParser.next(); Assert.assertEquals(illuminaFields[1], "Y"); Assert.assertEquals(illuminaFields[2], "CAACTCTC"); // Inexact match illuminaFields = barcodeParser.next(); Assert.assertEquals(illuminaFields[1], "Y"); Assert.assertEquals(illuminaFields[2], "ACAGGTAT"); // Too many mismatches illuminaFields = barcodeParser.next(); Assert.assertEquals(illuminaFields[1], "N"); barcodeParser.close(); // Tack on test of barcode-informed Illumina Basecall parsing final ReadStructure rs = new ReadStructure("25T8B25T"); final IlluminaDataProviderFactory factory = new IlluminaDataProviderFactory(basecallsDir, lane, rs, new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY), IlluminaDataType.BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Barcodes); testParsing(factory, rs, metricOne, barcodePosition); } @Test public void testDualBarcodes() throws Exception { final File metricsFile = File.createTempFile("dual.", ".metrics"); metricsFile.deleteOnExit(); final String[] args = new String[] { "BASECALLS_DIR=" + dual.getAbsolutePath(), "LANE=" + 1, "METRICS_FILE=" + metricsFile.getPath(), "READ_STRUCTURE=" + "25T8B8B25T", "BARCODE=" + "CAATAGTCCGACTCTC" }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer> result = new MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer>(); result.read(new FileReader(metricsFile)); Assert.assertEquals(result.getMetrics().get(0).PERFECT_MATCHES, 1, "Got wrong number of perfect matches"); Assert.assertEquals(result.getMetrics().get(0).ONE_MISMATCH_MATCHES, 0, "Got wrong number of one-mismatch matches"); } /** * Testing the quality thresholding. Looking at a single barcode (ACAGTG) with a min quality of 25 and no mismatches */ @Test(dataProvider = "qualityBarcodeData") public void testQualityBarcodes(final int quality, final int maxMismatches, final int perfectMatches, final int oneMismatch, final String testName) throws Exception { final File metricsFile = File.createTempFile("qual.", ".metrics"); metricsFile.deleteOnExit(); final String[] args = new String[] { "BASECALLS_DIR=" + qual.getPath(), "LANE=" + 1, "READ_STRUCTURE=25T8B25T", "METRICS_FILE=" + metricsFile.getPath(), "MINIMUM_BASE_QUALITY=" + quality, "MAX_MISMATCHES=" + maxMismatches, "BARCODE=CAATAGTC" }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer> result = new MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer>(); result.read(new FileReader(metricsFile)); Assert.assertEquals(result.getMetrics().get(0).PERFECT_MATCHES, perfectMatches, "Got wrong number of perfect matches for test: '" + testName + "'"); Assert.assertEquals(result.getMetrics().get(0).ONE_MISMATCH_MATCHES, oneMismatch, "Got wrong number of one-mismatch matches for test: '" + testName + "'"); } @DataProvider(name = "qualityBarcodeData") public Object[][] getQualityTestData() { return new Object[][] { {16, 0, 1, 0, "Barcode has good quality, 1 match"}, {25, 0, 0, 0, "Barcode has quality failures, no matches"} }; } private void testParsing(final IlluminaDataProviderFactory factory, final ReadStructure readStructure, final ExtractIlluminaBarcodes.BarcodeMetric metricACAGTG, final int barcodePosition) { int numReads = 0; final IlluminaDataProvider dataProvider = factory.makeDataProvider(); while (dataProvider.hasNext()) { final ClusterData cluster = dataProvider.next(); if(metricACAGTG.BARCODE.equals(cluster.getMatchedBarcode())) { ++numReads; } Assert.assertEquals(cluster.getRead(readStructure.templates.getIndices()[0]).getQualities().length, barcodePosition - 1); Assert.assertEquals(cluster.getRead(readStructure.templates.getIndices()[0]).getBases().length, barcodePosition - 1); } Assert.assertEquals(numReads, metricACAGTG.READS); dataProvider.close(); } private MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> runIt(final int lane, final String readStructure) throws Exception { final File metricsFile = File.createTempFile("eib.", ".metrics"); metricsFile.deleteOnExit(); final List<String> args = new ArrayList<String>(Arrays.asList( "BASECALLS_DIR=" + basecallsDir.getPath(), "LANE=" + lane, "READ_STRUCTURE=" + readStructure, "METRICS_FILE=" + metricsFile.getPath() )); for (final String barcode : BARCODES) { args.add("BARCODE=" + barcode); } return runIt(args, metricsFile); } private MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> runIt(final List<String> args, final File metricsFile) throws Exception { // Generate _barcode.txt files and metrics file. Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer> retval = new MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric,Integer>(); retval.read(new FileReader(metricsFile)); return retval; } }