/** * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. **/ package htsjdk.samtools; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import static org.testng.Assert.assertEquals; /** * @author aaron * @version 1.0 * @date May 20, 2009 * <p/> * Class SamFileHeaderMergerTest * <p/> * Tests the ability of the SamFileHeaderMerger class to merge sequence dictionaries. */ public class SamFileHeaderMergerTest { private static File TEST_DATA_DIR = new File("testdata/htsjdk/samtools"); /** tests that if we've set the merging to false, we get a SAMException for bam's with different dictionaries. */ @Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class) public void testMergedException() { File INPUT[] = {new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome1to10.bam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome5to9.bam")}; final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); for (final File inFile : INPUT) { IOUtil.assertFileIsReadable(inFile); final SAMFileReader in = new SAMFileReader(inFile); headers.add(in.getFileHeader()); } new SamFileHeaderMerger(SAMFileHeader.SortOrder.unsorted, headers, false); } /** Tests that we can successfully merge two files with */ @Test public void testMerging() { File INPUT[] = {new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome1to10.bam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome5to9.bam")}; final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(); final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); for (final File inFile : INPUT) { IOUtil.assertFileIsReadable(inFile); final SAMFileReader in = new SAMFileReader(inFile); // We are now checking for zero-length reads, so suppress complaint about that. in.setValidationStringency(ValidationStringency.SILENT); readers.add(in); headers.add(in.getFileHeader()); } final MergingSamRecordIterator iterator; final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.unsorted, headers, true); iterator = new MergingSamRecordIterator(headerMerger, readers, false); headerMerger.getMergedHeader(); // count the total reads, and record read counts for each sequence Map<Integer, Integer> seqCounts = new HashMap<Integer, Integer>(); int totalCount = 0; while (iterator.hasNext()) { SAMRecord r = iterator.next(); if (seqCounts.containsKey(r.getReferenceIndex())) { seqCounts.put(r.getReferenceIndex(), seqCounts.get(r.getReferenceIndex()) + 1); } else { seqCounts.put(r.getReferenceIndex(), 1); } ++totalCount; } assertEquals(totalCount, 1500); for (Integer i : seqCounts.keySet()) { if (i < 4 || i > 8) { // seqeunce 5 - 9 should have 200 reads (indices 4 - 8) assertEquals(seqCounts.get(i).intValue(), 100); } else { // the others should have 100 assertEquals(seqCounts.get(i).intValue(), 200); } } } private static final String sq1 = "@SQ\tSN:chr1\tLN:1000\n"; private static final String sq2 = "@SQ\tSN:chr2\tLN:1000\n"; private static final String sq3 = "@SQ\tSN:chr3\tLN:1000\n"; private static final String sq4 = "@SQ\tSN:chr4\tLN:1000\n"; private static final String sq5 = "@SQ\tSN:chr5\tLN:1000\n"; @Test public void testSequenceDictionaryMerge() { final String sd1 = sq1 + sq2 + sq5; final String sd2 = sq2 + sq3 + sq4; SAMFileReader reader1 = new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd1))); SAMFileReader reader2 = new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd2))); final List<SAMFileHeader> inputHeaders = Arrays.asList(reader1.getFileHeader(), reader2.getFileHeader()); SamFileHeaderMerger merger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, inputHeaders, true); final SAMFileHeader mergedHeader = merger.getMergedHeader(); for (final SAMFileHeader inputHeader : inputHeaders) { int prevTargetIndex = -1; for (final SAMSequenceRecord sequenceRecord : inputHeader.getSequenceDictionary().getSequences()) { final int targetIndex = mergedHeader.getSequenceIndex(sequenceRecord.getSequenceName()); Assert.assertNotSame(targetIndex, -1); Assert.assertTrue(prevTargetIndex < targetIndex); prevTargetIndex = targetIndex; } } } @Test(dataProvider="data") public void testProgramGroupAndReadGroupMerge(File inputFiles[], File expectedOutputFile) throws IOException { BufferedReader reader = new BufferedReader( new FileReader(expectedOutputFile) ); String line; String expected_output = ""; while((line = reader.readLine()) != null) { expected_output += line + "\n"; } final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(); final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); for (final File inFile : inputFiles) { IOUtil.assertFileIsReadable(inFile); final SAMFileReader in = new SAMFileReader(inFile); // We are now checking for zero-length reads, so suppress complaint about that. in.setValidationStringency(ValidationStringency.SILENT); readers.add(in); headers.add(in.getFileHeader()); } final MergingSamRecordIterator iterator; final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers,true); iterator = new MergingSamRecordIterator(headerMerger, readers, false); ByteArrayOutputStream baos = new ByteArrayOutputStream(); SAMFileWriter writer = new SAMFileWriterFactory().makeSAMWriter(headerMerger.getMergedHeader(), true, baos); while (iterator.hasNext()) { writer.addAlignment(iterator.next()); } writer.close(); String actual_output = StringUtil.bytesToString(baos.toByteArray()); List<String> actual = Arrays.asList(actual_output.split("\\n")); List<String> expected = Arrays.asList(expected_output.split("\\n")); for (int i = 0; i < expected.size(); i++) { if (expected.get(i).startsWith("@")) { Assert.assertTrue(headersEquivalent(actual.get(i), expected.get(i))); } else { List<String> expectedSamParts = Arrays.asList(expected.get(i).split("\\s*")); List<String> actualSamParts = Arrays.asList(actual.get(i).split("\\s*")); for (String exp : expectedSamParts) { Assert.assertTrue(actualSamParts.contains(exp)); } for (String act : actualSamParts) { Assert.assertTrue(expectedSamParts.contains(act)); } } } } private static final boolean headersEquivalent(String a, String b) { if (a.length() != b.length()) return false; List<String> remaining = new LinkedList<String>(Arrays.asList(a.split("\\t"))); for (final String item : b.split("\\t")) { if (!remaining.remove(item)) return false; } return remaining.isEmpty(); } @DataProvider(name="data") private Object[][] getProgramGroupAndReadGroupMergeData() { return new Object[][] { { new File[] { new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case1/chr11sub_file1.sam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case1/chr11sub_file2.sam") }, new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case1/expected_output.sam") }, { new File[] { new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case2/chr11sub_file1.sam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case2/chr11sub_file2.sam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case2/chr11sub_file3.sam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case2/chr11sub_file4.sam") }, new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/case2/expected_output.sam") } }; } @Test(expectedExceptions = {SAMException.class}) public void testUnmergeableSequenceDictionary() { final String sd1 = sq1 + sq2 + sq5; final String sd2 = sq2 + sq3 + sq4 + sq1; SAMFileReader reader1 = new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd1))); SAMFileReader reader2 = new SAMFileReader(new ByteArrayInputStream(StringUtil.stringToBytes(sd2))); final List<SAMFileHeader> inputHeaders = Arrays.asList(reader1.getFileHeader(), reader2.getFileHeader()); new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, inputHeaders, true); } @DataProvider(name="fourDigitBase36StrPositiveData") public Object[][] positiveFourDigitBase36StrData() { return new Object[][] { {0, "0"}, {15, "F"}, {36, "10"}, {1200000, "PPXC"}, {36*36*36*36 - 2, "ZZZY"}, {36*36*36*36 - 1, "ZZZZ"}, }; } @Test(dataProvider = "fourDigitBase36StrPositiveData") public void fourDigitBase36StrPositiveTest(final int toConvert, final String expectedValue) { final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, new ArrayList<SAMFileHeader>(),true); Assert.assertEquals(expectedValue, headerMerger.positiveFourDigitBase36Str(toConvert)); } }