package picard.sam;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;
import picard.cmdline.CommandLineProgramTest;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.testng.Assert.assertEquals;
public class PositionBasedDownsampleSamTest extends CommandLineProgramTest {
final static String sample = "TestSample";
final static String readGroupId = "TestReadGroup";
final static String platform = "ILLUMINA";
final static String library = "TestLibrary";
final static File TEST_DIR = new File("testdata/picard/sam/PositionalDownsampleSam/");
final File dict = new File(TEST_DIR, "header.dict");
File tempDir;
File tempSamFile;
SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroupId);
@Override
public String getCommandLineProgramName() {
return PositionBasedDownsampleSam.class.getSimpleName();
}
//create a samfile with one tile and randomly placed reads within it.
@BeforeTest
void setupBuilder() throws IOException {
final int numReads = 10000;
final String flowCellBarcode = "TESTBARCODE";
final int maxX = 10000;
final int maxY = 20000;
final int minX = 1000;
final int minY = 2000;
final String separator = ":";
final int lane = 1;
final int tile = 2203;
//use fixed random seed to eliminate possibility of intermittent failures
final Random rg = new Random(31);
setBuilder.setReadGroup(readGroupRecord);
setBuilder.setUseNmFlag(true);
for (int i = 0; i < numReads; i++) {
final int x = rg.nextInt(maxX) + minX;
final int y = rg.nextInt(maxY) + minY;
final String readName = flowCellBarcode + separator + lane + separator + tile + separator + x + separator + y;
setBuilder.addPair(readName, 1, 1, 100);
}
tempDir = IOUtil.createTempDir("pds_test", "PositionalDownsampling");
tempSamFile = File.createTempFile("PositionalDownsampleSam", ".bam", tempDir);
BufferedLineReader bufferedLineReader = null;
try {
bufferedLineReader = new BufferedLineReader(new FileInputStream(dict));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
final SAMFileHeader header = codec.decode(bufferedLineReader, dict.toString());
readGroupRecord.setSample(sample);
readGroupRecord.setPlatform(platform);
readGroupRecord.setLibrary(library);
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
header.addReadGroup(readGroupRecord);
setBuilder.setHeader(header);
final SAMFileWriter writer = new SAMFileWriterFactory()
.setCreateIndex(true).makeBAMWriter(header, false, tempSamFile);
for (final SAMRecord record : setBuilder) {
writer.addAlignment(record);
}
writer.close();
}
@AfterTest
private void tearDown() {
IOUtil.deleteDirectoryTree(tempDir);
}
@Test //this is actually a test of SAMRecordSetBuilder
public void TestBuilder() {
final ValidateSamFile validateSamFile = new ValidateSamFile();
validateSamFile.INPUT = tempSamFile;
assertEquals(validateSamFile.doWork(), 0);
}
@DataProvider(name = "ValidArgumentsTestProvider")
public Object[][] ValidArgumentsTestProvider() {
final List<Object[]> objects = new ArrayList<Object[]>();
for (double i = 0.3; i <= 1; i += .1) {
final Object[] array = {i};
objects.add(array);
}
return objects.toArray(new Object[1][]);
}
// test removing some reads from a sparse, single tile bam
@Test(dataProvider = "ValidArgumentsTestProvider")
public void testDownsampleSingleTile(final double fraction) throws IOException {
testDownsampleWorker(tempSamFile, fraction);
}
public void testDownsampleWorker(final File samFile, final double fraction) throws IOException {
final File downsampled = File.createTempFile("PositionalDownsampleSam", ".bam", tempDir);
final String[] args = new String[]{
"INPUT=" + samFile.getAbsolutePath(),
"OUTPUT=" + downsampled.getAbsolutePath(),
"FRACTION=" + fraction,
"CREATE_INDEX=true"
};
// make sure results is successful
assertEquals(runPicardCommandLine(args), 0);
// make sure that the resulting BAM is valid.
final ValidateSamFile validateSamFile = new ValidateSamFile();
validateSamFile.INPUT = downsampled;
assertEquals(validateSamFile.doWork(), 0);
//make sure that the total number of record in the resulting file in in the ballpark:
assertGreaterThan(countSamTotalRecord(downsampled), fraction * .8 * countSamTotalRecord(samFile));
assertLessThan(countSamTotalRecord(downsampled), fraction * 1.2 * countSamTotalRecord(samFile));
}
private long countSamTotalRecord(final File samFile) {
final SamReader reader = SamReaderFactory.make().open(samFile);
assert reader.hasIndex();
long total = 0;
for (int i = 0; i < reader.getFileHeader().getSequenceDictionary().size(); i++) {
total += reader.indexing().getIndex().getMetaData(i).getAlignedRecordCount();
total += reader.indexing().getIndex().getMetaData(i).getUnalignedRecordCount();
}
return total;
}
@DataProvider(name="allowTwiceData")
public Object[][] allowTwiceData(){
return new Object[][]{{true},{false}};
}
// test that downsampling twice yields an error.
@Test(dataProvider = "allowTwiceData")
public void TestInvalidTwice(final boolean allowMultiple) throws IOException {
final File samFile = tempSamFile;
final File downsampled = File.createTempFile("PositionalDownsampleSam", ".bam", tempDir);
downsampled.deleteOnExit();
final double fraction = .1;
downsampled.deleteOnExit();
final String[] args1 = new String[]{
"INPUT=" + samFile.getAbsolutePath(),
"OUTPUT=" + downsampled.getAbsolutePath(),
"FRACTION=" + fraction
};
runPicardCommandLine(args1);
final File downsampledTwice = File.createTempFile("PositionalDownsampleSam", ".bam", tempDir);
downsampledTwice.deleteOnExit();
final List<String> args2 = new ArrayList<String>();
args2.add("INPUT=" + downsampled.getAbsolutePath());
args2.add("OUTPUT=" + downsampledTwice.getAbsolutePath());
args2.add("FRACTION=" + fraction);
if(allowMultiple) args2.add("ALLOW_MULTIPLE_DOWNSAMPLING_DESPITE_WARNINGS=true");
//should blow up due to bad inputs
if(allowMultiple)
runPicardCommandLine(args2);
else
try {
runPicardCommandLine(args2);
throw new PicardException("Should have thrown an error!!");
} catch (final PicardException ignored){
}
}
// test that program fails on p<0 or p>1
@DataProvider(name = "InvalidArgumentsTestProvider")
public Object[][] InvalidArgumentsTestProvider() {
return new Object[][]{{-1.0}, {-.00001}, {-5.0}, {1.00001}, {5.0}, {50.0}, {Double.MAX_VALUE}, {Double.POSITIVE_INFINITY}, {Double.NEGATIVE_INFINITY}};
}
@Test(dataProvider = "InvalidArgumentsTestProvider")
public void TestInvalidArguments(final double fraction) throws IOException {
final File samFile = tempSamFile;
final File downsampled = File.createTempFile("PositionalDownsampleSam", ".bam", tempDir);
downsampled.deleteOnExit();
final String[] args = new String[]{
"INPUT=" + samFile.getAbsolutePath(),
"OUTPUT=" + downsampled.getAbsolutePath(),
"FRACTION=" + fraction
};
//should blow up due to bad inputs
assert runPicardCommandLine(args) != 0;
}
// these fit in a TestNG.Utils class, but there isn't one in picard-public...
static public void assertGreaterThan(final double lhs, final double rhs) {
Assert.assertTrue(lhs > rhs, String.format("Expected inequality is not true: %g > %g", lhs, rhs));
}
static public void assertLessThan(final double lhs, final double rhs) {
Assert.assertTrue(lhs < rhs, String.format("Expected inequality is not true: %g < %g", lhs, rhs));
}
}