package picard.vcf; import htsjdk.samtools.util.CloseableIterator; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextComparator; import htsjdk.variant.vcf.VCFFileReader; import org.testng.Assert; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgram; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Queue; public abstract class AbstractVcfMergingClpTester { protected static final String TEST_DATA_PATH = "testdata/picard/vcf/"; protected abstract CommandLineProgram getProgram(); protected void runClp(final List<File> inputs, final File output, final int expectedReturnCode) { runClp(inputs, output, Collections.<String>emptyList(), expectedReturnCode); } protected void runClp(final List<File> inputs, final File output, final List<String> otherArguments, final int expectedReturnCode) { final ArrayList<String> arguments = new ArrayList<String>(); for (final File input : inputs) { arguments.add("INPUT=" + input); } arguments.add("OUTPUT=" + output); for (final String argument : otherArguments) { arguments.add(argument); } Assert.assertEquals(getProgram().instanceMain(arguments.toArray(new String[arguments.size()])), expectedReturnCode); } @Test (expectedExceptions = IllegalArgumentException.class) public void testFailsOnDissimilarContigLists() { final File dissimilarContigs = new File(TEST_DATA_PATH, "CEUTrio-indels-dissimilar-contigs.vcf"); final File snpInputFile = new File(TEST_DATA_PATH, "CEUTrio-snps.vcf"); final File output = new File("/dev/null/blah"); final List<String> indexing = Arrays.asList("CREATE_INDEX=false"); runClp(Arrays.asList(dissimilarContigs, snpInputFile), output, indexing, 0); } @Test (expectedExceptions = IllegalArgumentException.class) public void testFailsOnNoContigList() { final File contiglessIndelFile = new File(TEST_DATA_PATH, "CEUTrio-indels-no-contigs.vcf"); final File snpInputFile = new File(TEST_DATA_PATH, "CEUTrio-snps.vcf"); final File output = new File("/dev/null"); runClp(Arrays.asList(contiglessIndelFile, snpInputFile), output, 1); } @Test (expectedExceptions = IllegalArgumentException.class) public void testFailsOnDissimilarSampleLists() { final File badSampleIndelFile = new File(TEST_DATA_PATH, "CEUTrio-indels-bad-samples.vcf"); final File snpInputFile = new File(TEST_DATA_PATH, "CEUTrio-snps.vcf"); final File output = new File("/dev/null"); runClp(Arrays.asList(badSampleIndelFile, snpInputFile), output, 1); } @Test public void testMergeIndelsSnps() throws IOException { final File indelInputFile = new File(TEST_DATA_PATH, "CEUTrio-indels.vcf"); final File snpInputFile = new File(TEST_DATA_PATH, "CEUTrio-snps.vcf"); final File output = File.createTempFile("merge-indels-snps-test-output.", ".vcf"); final List<String> indexing = Arrays.asList("CREATE_INDEX=false"); output.deleteOnExit(); final Queue<String> indelContigPositions = loadContigPositions(indelInputFile); final Queue<String> snpContigPositions = loadContigPositions(snpInputFile); runClp(Arrays.asList(indelInputFile, snpInputFile), output, indexing, 0); validateSnpAndIndelResults(output, indelContigPositions, snpContigPositions); } /** * Make sure that the order of the output file is identical to the order * of the input files by iterating through the output, making sure that, * if the context is an indel (snp), the next genomic position in the indel * (snp) queue is the same. Also make sure that the context is in the order * specified by the input files. */ private void validateSnpAndIndelResults(final File output, final Queue<String> indelContigPositions, final Queue<String> snpContigPositions) { final VCFFileReader outputReader = new VCFFileReader(output, false); final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator(); VariantContext last = null; final CloseableIterator<VariantContext> iterator = outputReader.iterator(); while (iterator.hasNext()) { final VariantContext outputContext = iterator.next(); if (outputContext.isIndel()) Assert.assertEquals(getContigPosition(outputContext), indelContigPositions.poll()); if (outputContext.isSNP()) Assert.assertEquals(getContigPosition(outputContext), snpContigPositions.poll()); if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0); last = outputContext; } iterator.close(); // We should have polled everything off the indel (snp) queues Assert.assertEquals(indelContigPositions.size(), 0); Assert.assertEquals(snpContigPositions.size(), 0); } @Test public void testMergeRandomScatter() throws IOException { final File zero = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-0.vcf"); final File one = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-1.vcf"); final File two = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-2.vcf"); final File three = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-3.vcf"); final File four = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-4.vcf"); final File five = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-5.vcf"); final List<File> inputs = Arrays.asList(zero, one, two, three, four, five); final List<Queue<String>> positionQueues = new ArrayList<Queue<String>>(6); positionQueues.add(0, loadContigPositions(zero)); positionQueues.add(1, loadContigPositions(one)); positionQueues.add(2, loadContigPositions(two)); positionQueues.add(3, loadContigPositions(three)); positionQueues.add(4, loadContigPositions(four)); positionQueues.add(5, loadContigPositions(five)); final List<String> indexing = Arrays.asList("CREATE_INDEX=false"); final File output = File.createTempFile("random-scatter-test-output.", ".vcf"); output.deleteOnExit(); runClp(inputs, output, indexing, 0); validateResultsForMultipleInputs(output, positionQueues); } private void validateResultsForMultipleInputs(final File output, final List<Queue<String>> positionQueues) { final VCFFileReader outputReader = new VCFFileReader(output, false); final VariantContextComparator outputComparator = outputReader.getFileHeader().getVCFRecordComparator(); VariantContext last = null; final CloseableIterator<VariantContext> iterator = outputReader.iterator(); while (iterator.hasNext()) { final VariantContext outputContext = iterator.next(); final String position = getContigPosition(outputContext); for (final Queue<String> positionQueue : positionQueues) { if (position.equals(positionQueue.peek())) { positionQueue.poll(); break; } } if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) <= 0); last = outputContext; } iterator.close(); for (final Queue<String> positionQueue : positionQueues) { Assert.assertEquals(positionQueue.size(), 0); } } static Queue<String> loadContigPositions(final File inputFile) { final VCFFileReader reader = new VCFFileReader(inputFile, false); final Queue<String> contigPositions = new LinkedList<String>(); final CloseableIterator<VariantContext> iterator = reader.iterator(); while (iterator.hasNext()) contigPositions.add(getContigPosition(iterator.next())); iterator.close(); reader.close(); return contigPositions; } static String getContigPosition(final VariantContext context) { return context.getChr() + "-" + Integer.toString(context.getStart()); } }