/* * The MIT License * * Copyright (c) 2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.sam.markduplicates; import htsjdk.samtools.util.Histogram; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.sam.markduplicates.util.OpticalDuplicateFinder; import java.util.ArrayList; import static org.testng.Assert.assertTrue; import static picard.sam.markduplicates.EstimateLibraryComplexity.PairedReadSequence; public class ElcIdenticalBasesDuplicatesFinderTest { static final double MAX_DIFF_RATE = 0.03; static final int MAX_READ_LENGTH = Integer.MAX_VALUE; static final int MIN_IDENTICAL_BASES = 5; static final boolean USE_BARCODES = false; static final OpticalDuplicateFinder OPTICAL_DUPLICATE_FINDER = new OpticalDuplicateFinder(); private ElcDuplicatesFinder duplicatesFinder = new ElcIdenticalBasesDuplicatesFinder( MAX_DIFF_RATE, MAX_READ_LENGTH, MIN_IDENTICAL_BASES, USE_BARCODES, OPTICAL_DUPLICATE_FINDER ); @DataProvider(name = "fillHistogramDataProvider") public Object[][] fillHistogramDataProvider() { return new Object[][]{ // empty dups, that mean increment first bin { duplicatesFinder, new Histogram<>(), new Histogram<>(), generatePairedReadSequence(false), generatePairedReadSequences(0, false), 1, 0, 0 }, // 10 dups + 1 identical paired-read, that mean increment 11 bin { duplicatesFinder, new Histogram<>(), new Histogram<>(), generatePairedReadSequence(false), generatePairedReadSequences(10, false), 11, 0, 0 }, // 10 dups + 1 identical paired-read, that mean increment 11 bin and isOpticalDuplicates is true, then // increment opticalHisto 11 bin 10 times { duplicatesFinder, new Histogram<>(), new Histogram<>(), generatePairedReadSequence(true), generatePairedReadSequences(10, true), 11, 11, 10 } }; } @DataProvider(name = "searchDuplicatesDataProvider") public Object[][] searchDuplicatesDataProvider() { return new Object[][]{ // empty dups, that mean increment first bin { duplicatesFinder, new Histogram<>(), new Histogram<>(), generatePairedReadSequences(1, false), 1, 0, 0 }, // 10 dups, that mean increment 10 bin { duplicatesFinder, new Histogram<>(), new Histogram<>(), generatePairedReadSequences(10, false), 10, 0, 0 }, // 10 dups, that mean increment 10 bin and isOpticalDuplicates is true, then // increment opticalHisto 10 bin 9 times { duplicatesFinder, new Histogram<>(), new Histogram<>(), generatePairedReadSequences(10, true), 10, 10, 9 }, // 10 paired-reads but only 9 dups, that mean increment 9 bin and isOpticalDuplicates is true, then // increment opticalHisto 9 bin 8 times { duplicatesFinder, new Histogram<>(), new Histogram<>(), generateSeqsWithNoDup(10, true), 9, 9, 8 } }; } @Test(dataProvider = "fillHistogramDataProvider") public void testFillHistogram(ElcDuplicatesFinder duplicatesFinder, Histogram<Integer> duplicationHisto, Histogram<Integer> opticalHisto, PairedReadSequence prs, ArrayList<PairedReadSequence> dupes, int dupHistoIndex, int optHistoIndex, int optHistoValue) throws Exception { duplicatesFinder.fillHistogram(duplicationHisto, opticalHisto, prs, dupes); assertTrue(duplicationHisto.get(dupHistoIndex).getValue() == 1); if (optHistoIndex > 0) { assertTrue(opticalHisto.get(optHistoIndex).getValue() == optHistoValue); } } @Test(dataProvider = "searchDuplicatesDataProvider") public void testSearchDuplicates(ElcDuplicatesFinder duplicatesFinder, Histogram<Integer> duplicationHisto, Histogram<Integer> opticalHisto, ArrayList<PairedReadSequence> dupes, int dupHistoIndex, int optHistoIndex, int optHistoValue) throws Exception { duplicatesFinder.searchDuplicates(dupes, duplicationHisto, opticalHisto); assertTrue(duplicationHisto.get(dupHistoIndex).getValue() == 1); if (optHistoIndex > 0) { assertTrue(opticalHisto.get(optHistoIndex).getValue() == optHistoValue); } } protected ArrayList<PairedReadSequence> generatePairedReadSequences(int seqsSize, boolean isOpticalDuplicates) { ArrayList<PairedReadSequence> seq = new ArrayList<>(seqsSize); for (int i = 0; i < seqsSize; i++) { seq.add(generatePairedReadSequence(isOpticalDuplicates)); } return seq; } protected ArrayList<PairedReadSequence> generateSeqsWithNoDup(int seqsSize, boolean isOpticalDuplicates) { ArrayList<PairedReadSequence> seq = generatePairedReadSequences(seqsSize, isOpticalDuplicates); PairedReadSequence prs = seq.get(seq.size() - 1); changeReadContent(prs.read1); changeReadContent(prs.read2); return seq; } protected PairedReadSequence generatePairedReadSequence(boolean isOpticalDuplicates) { PairedReadSequence prs = new PairedReadSequence(); prs.read1 = new byte[100]; prs.read2 = new byte[100]; if (isOpticalDuplicates) { short readGroup = 1; short tile = Short.MAX_VALUE; prs.setReadGroup(readGroup); prs.setTile(tile); prs.setX(127); prs.setY(255); } return prs; } private void changeReadContent(byte[] read) { for (int i = MIN_IDENTICAL_BASES; i <= MIN_IDENTICAL_BASES + read.length * MAX_DIFF_RATE; i++) { read[i] += 1; } } }