// Copyright (C) 2011-2012 CRS4. // // This file is part of Seal. // // Seal is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // Seal is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with Seal. If not, see <http://www.gnu.org/licenses/>. package tests.it.crs4.seal.demux; import it.crs4.seal.common.IMRContext; import it.crs4.seal.common.TestContext; import it.crs4.seal.demux.Demux; import it.crs4.seal.demux.DemuxReducer; import it.crs4.seal.common.SequenceId; import org.seqdoop.hadoop_bam.SequencedFragment; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import java.util.ArrayList; import java.util.Collections; import java.util.Set; import java.util.List; import java.io.StringReader; import java.io.File; import java.io.PrintWriter; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import org.junit.*; import static org.junit.Assert.*; public class TestDemuxReducer { private DemuxReducer reducer; private TestContext<Text, SequencedFragment> context; private List<SequenceId> keys; private List<SequencedFragment> fragments; private File tempSampleSheet; private static final String sampleSheet = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"b0396abxx\",1,\"csct_007083\",\"Human\",\"ATCACGA\",\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n" + "\"b0396abxx\",1,\"csct_007084\",\"Human\",\"CGATGTA\",\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n" + "\"b0396abxx\",1,\"csct_007085\",\"Human\",\"TTAGGCA\",\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n" + "\"b0396abxx\",1,\"csct_007090\",\"Human\",\"TGACCAA\",\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n"; private static final String sampleSheetNoIndex = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"b0396abxx\",1,\"csct_007083\",\"Human\",,\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n" + "\"b0396abxx\",2,\"csct_007084\",\"Human\",,\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n"; private static final String sampleSheetLongIndex = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"b0396abxx\",1,\"csct_007083\",\"Human\",\"GCATCACG\",\"Whole-genome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"MANU\"\n"; @Before public void setup() throws IOException { context = new TestContext<Text, SequencedFragment>(); keys = new ArrayList<SequenceId>(); fragments = new ArrayList<SequencedFragment>(); } @After public void tearDown() throws IOException { if (tempSampleSheet != null) tempSampleSheet.delete(); } private void writeSampleSheet(String contents) throws IOException { tempSampleSheet = File.createTempFile("test_sample_sheet", "csv"); PrintWriter out = new PrintWriter( new BufferedWriter( new FileWriter(tempSampleSheet) ) ); out.write(contents); out.close(); } private void setupReducer(String sampleSheet, Configuration conf) throws IOException { reducer = new DemuxReducer(); writeSampleSheet(sampleSheet); reducer.setup(tempSampleSheet.getAbsolutePath(), conf == null ? new Configuration() : conf); } private void setupReadsPairedMultiplexed(int lane) { SequenceId key; SequencedFragment fragment; key = new SequenceId("machine:240:" + lane + ":1111:2222:3333", 1); fragment = new SequencedFragment(); fragment.setSequence(new Text(".CCAGTACAAGCACCATGCTTAACAAAAGACTGTCCAAAATAAACATGCAA")); fragment.setQuality(new Text("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")); fragment.setInstrument("machine"); fragment.setRunNumber(240); fragment.setLane(lane); fragment.setTile(1111); fragment.setXpos(2222); fragment.setYpos(3333); fragment.setIndexSequence("0"); fragment.setRead(1); fragment.setFilterPassed(true); keys.add(key); fragments.add(fragment); key = new SequenceId("machine:240:" + lane + ":1111:2222:3333", 2); fragment = new SequencedFragment(); fragment.setSequence(new Text("ATCACGA")); fragment.setQuality(new Text("bbb")); fragment.setInstrument("machine"); fragment.setRunNumber(240); fragment.setLane(lane); fragment.setTile(1111); fragment.setXpos(2222); fragment.setYpos(3333); fragment.setIndexSequence("0"); fragment.setRead(2); fragment.setFilterPassed(true); keys.add(key); fragments.add(fragment); key = new SequenceId("machine:240:" + lane + ":1111:2222:3333", 3); fragment = new SequencedFragment(); fragment.setSequence(new Text(".CCAGTACAAGCACCATGCTTAACAAAAGACTGTCCAAAATAAACATGCAA")); fragment.setQuality(new Text("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")); fragment.setInstrument("machine"); fragment.setRunNumber(240); fragment.setLane(lane); fragment.setTile(1111); fragment.setXpos(2222); fragment.setYpos(3333); fragment.setIndexSequence("0"); fragment.setRead(3); fragment.setFilterPassed(true); keys.add(key); fragments.add(fragment); } @Test public void testReduce() throws IOException, InterruptedException { setupReducer(sampleSheet, null); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 list.add(fragments.get(1)); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); assertEquals(2, context.getCounterValue("Sample reads", "DefaultProject/csct_007083")); assertEquals(2, context.getNumWrites()); Set<Text> keySet = context.getKeys(); assertEquals(1, keySet.size()); Text key = keySet.iterator().next(); assertEquals("DefaultProject/csct_007083", key.toString()); List<SequencedFragment> values = context.getValuesForKey(key); String indexSeq = fragments.get(1).getSequence().toString(); fragments.get(0).setIndexSequence(indexSeq); fragments.get(2).setIndexSequence(indexSeq); assertEquals(fragments.get(0), values.get(0)); assertEquals(fragments.get(2), values.get(1)); } @Test public void testReduceIndexNotSpecifiedL1() throws IOException, InterruptedException { setupReducer(sampleSheetNoIndex, null); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 list.add(fragments.get(1)); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); assertEquals(2, context.getCounterValue("Sample reads", "DefaultProject/csct_007083")); assertEquals(0, context.getCounterValue("Sample reads", "DefaultProject/csct_007084")); assertEquals(2, context.getNumWrites()); } @Test public void testReduceIndexNotSpecifiedL2() throws IOException, InterruptedException { setupReducer(sampleSheetNoIndex, null); setupReadsPairedMultiplexed(2); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 list.add(fragments.get(1)); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); // sample csct_007084 is in lane 2 assertEquals(0, context.getCounterValue("Sample reads", "DefaultProject/csct_007083")); assertEquals(2, context.getCounterValue("Sample reads", "DefaultProject/csct_007084")); assertEquals(2, context.getNumWrites()); } @Test public void testReduceNotMultiplexed() throws IOException, InterruptedException { Configuration conf = new Configuration(); conf.setBoolean(Demux.CONF_NO_INDEX_READS, true); setupReducer(sampleSheetNoIndex, conf); setupReadsPairedMultiplexed(2); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // We'll only insert reads 1 and 2 (real two, not barcode) into the list to be reduced. // They should be identified as sample csct_007084 only by their lane number. SequencedFragment read2 = fragments.get(2); assertEquals("BUG! Got read " + read2.getRead() + " instead of read 3", new Integer(3), read2.getRead()); read2.setRead(2); list.add(fragments.get(0)); list.add(read2); reducer.reduce(keys.get(1), list, context); // sample csct_007084 is in lane 2 assertEquals(0, context.getCounterValue("Sample reads", "DefaultProject/csct_007083")); assertEquals(2, context.getCounterValue("Sample reads", "DefaultProject/csct_007084")); assertEquals(2, context.getNumWrites()); } @Test public void testReduceSingleRead() throws IOException, InterruptedException { setupReducer(sampleSheet, null); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // We'll only insert the barcode read and read 1. // They should be identified as sample csct_007083 only by their lane number. list.add(fragments.get(1)); list.add(fragments.get(0)); reducer.reduce(keys.get(1), list, context); // sample csct_007084 is in lane 2 assertEquals(1, context.getCounterValue("Sample reads", "DefaultProject/csct_007083")); assertEquals(0, context.getCounterValue("Sample reads", "DefaultProject/csct_007084")); assertEquals(1, context.getNumWrites()); } @Test public void testReduceSingleReadNotMultiplexed() throws IOException, InterruptedException { Configuration conf = new Configuration(); conf.setBoolean(Demux.CONF_NO_INDEX_READS, true); setupReducer(sampleSheetNoIndex, conf); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // We'll only insert read 1. // They should be identified as sample csct_007083 only by their lane number. list.add(fragments.get(0)); reducer.reduce(keys.get(0), list, context); // sample csct_007084 is in lane 2 assertEquals(1, context.getCounterValue("Sample reads", "DefaultProject/csct_007083")); assertEquals(0, context.getCounterValue("Sample reads", "DefaultProject/csct_007084")); assertEquals(1, context.getNumWrites()); } @Test public void testUnknownBarcode() throws IOException, InterruptedException { setupReducer(sampleSheet, null); setupReadsPairedMultiplexed(1); String barcode = "ATCANN"; fragments.get(1).setSequence(new Text(barcode + "N")); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 list.add(fragments.get(1)); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); Set<Text> keySet = context.getKeys(); assertEquals(1, keySet.size()); assertEquals("./unknown", keySet.iterator().next().toString()); } @Test public void testSeparateReads() throws IOException, InterruptedException { Configuration conf = new Configuration(); conf.setBoolean(Demux.CONF_SEPARATE_READS, true); setupReducer(sampleSheetNoIndex, conf); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 list.add(fragments.get(1)); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); Set<Text> keySet = context.getKeys(); assertEquals(2, keySet.size()); ArrayList<Text> array = new ArrayList<Text>(keySet.size()); array.addAll(keySet); Collections.sort(array); assertEquals("DefaultProject/csct_007083/1", array.get(0).toString()); assertEquals("DefaultProject/csct_007083/2", array.get(1).toString()); } @Test(expected=RuntimeException.class) public void testReduceIndexTooShort() throws IOException, InterruptedException { setupReducer(sampleSheetLongIndex, null); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 SequencedFragment index = fragments.get(1); index.setSequence(new Text("TCACGA")); // length: 5 + A list.add(index); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); } @Test(expected=RuntimeException.class) public void testReduceIndexTooLong() throws IOException, InterruptedException { setupReducer(sampleSheetLongIndex, null); setupReadsPairedMultiplexed(1); List<SequencedFragment> list = new ArrayList<SequencedFragment>(3); // the values should be in the order 2, 1, 3 SequencedFragment index = fragments.get(1); index.setSequence(new Text("CATCATCATCATCA")); // length: 13 + A list.add(index); list.add(fragments.get(0)); list.add(fragments.get(2)); reducer.reduce(keys.get(1), list, context); } public static void main(String args[]) { org.junit.runner.JUnitCore.main(TestDemuxReducer.class.getName()); } }