// Copyright (C) 2011-2012 CRS4. // // This file is part of Seal. // // Seal is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // Seal is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with Seal. If not, see <http://www.gnu.org/licenses/>. package tests.it.crs4.seal.demux; import it.crs4.seal.demux.SampleSheet; import java.io.StringReader; import java.util.Collection; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import org.junit.*; import static org.junit.Assert.*; public class TestSampleSheet { private SampleSheet sheet; private StringReader sampleReader; private String sampleSheet = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"CGATGT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",2,\"snia_001611\",\"Human\",\"TTAGGC\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",2,\"snia_025487\",\"Human\",\"TGACCA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",3,\"snia_041910\",\"Human\",\"ACAGTG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",3,\"snia_001612\",\"Human\",\"GCCAAT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\""; private String smallSampleSheet = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"CGATGT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",2,\"snia_000268\",\"Human\",\"TTAGGC\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",2,\"snia_025487\",\"Human\",\"TGACCA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" ; private String sampleSheetWithProject = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\",\"SampleProject\"\n" + "\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\",\"Proj1\"\n" + "\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"CGATGT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\",\"Proj1\"\n" + "\"81DJ0ABXX\",2,\"snia_025487\",\"Human\",\"TGACCA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\",\"Proj2\"\n" ; private String dupSampleSheet = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" + "\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\""; private String badLaneSheet = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",0,\"snia_000269\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\""; private String invalidIndexLength = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",0,\"snia_000269\",\"Human\",\"ATCACGG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\""; private String oneEntrySheet = "\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\""; private String reorderedOneEntrySheet = "Lane,FCID,SampleID,Index,SampleRef,Description,Control,Recipe,Operator\n" + "1,81DJ0ABXX,snia_000268,ATCACG,Human,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String withoutQuotes = "FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator\n" + "81DJ0ABXX,1,snia_000268,Human,ATCACG,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String extraneousWhitespace = "FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator\n" + "81DJ0ABXX,1,snia_000268,Human,ATCACG ,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String blankIndexSampleSheet = "FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator\n" + "81DJ0ABXX,1,snia_000268,Human,,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String missingLaneColumn = "\"FCID\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" + "\"81DJ0ABXX\",\"snia_000269\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\""; private String missingSampleIdColumn = "FCID,Lane,SampleRef,Index,Description,Control,Recipe,Operator\n" + "81DJ0ABXX,1,Human,ATCACG,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String missingIndexColumn = "FCID,Lane,SampleID,SampleRef,Description,Control,Recipe,Operator\n" + "81DJ0ABXX,1,snia_000269,Human,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String missingFcidColumn = "Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator\n" + "1,snia_000269,Human,ATCACG,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO"; private String missingRecipeColumn = "FCID,Lane,SampleID,SampleRef,Index,Description,Control,Operator\n" + "81DJ0ABXX,1,snia_000269,Human,ATCACG,Whole-Transcriptome Sequencing Project,N,ROBERTO"; private String eightBaseIndex = "FCID,Lane,SampleID,SampleRef,Index,Description,Control,Operator\n" + "81DJ0ABXX,1,snia_000269,Human,ATCACGTC,Whole-Transcriptome Sequencing Project,N,ROBERTO"; private String thirteenBaseIndex = "FCID,Lane,SampleID,SampleRef,Index,Description,Control,Operator\n" + "81DJ0ABXX,1,snia_000269,Human,ATCACGTCAGATA,Whole-Transcriptome Sequencing Project,N,ROBERTO"; @Before public void setup() { sheet = new SampleSheet(); sampleReader = new StringReader(sampleSheet); } @Test public void testDontCrashOnEmpty() { assertTrue(sheet.getSamples().isEmpty()); } @Test(expected=SampleSheet.FormatException.class) public void testDetectDuplicateIndex() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(dupSampleSheet)); } @Test(expected=SampleSheet.FormatException.class) public void testInvalidLaneNo() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(badLaneSheet)); } @Test(expected=SampleSheet.FormatException.class) public void testInvalidIndexLength() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(invalidIndexLength)); } @Test public void testEightBaseIndex() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(eightBaseIndex)); assertFalse(sheet.isEmpty()); } @Test(expected=SampleSheet.FormatException.class) public void testThirteenBaseIndex() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(thirteenBaseIndex)); assertFalse(sheet.isEmpty()); } @Test public void testIsEmpty() throws java.io.IOException, SampleSheet.FormatException { assertTrue(sheet.isEmpty()); sheet.loadTable(sampleReader); assertFalse(sheet.isEmpty()); } @Test public void testNSamples() throws java.io.IOException, SampleSheet.FormatException { assertEquals(0, sheet.size()); sheet.loadTable(sampleReader); assertEquals(6, sheet.size()); } @Test public void testGetSamples() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(smallSampleSheet)); Collection<String> samples = sheet.getSamples(); assertEquals(3, samples.size()); for (String s: new String[]{"snia_000268", "snia_000269", "snia_025487" }) assertTrue("Sample " + s + " is missing", samples.contains(s)); } @Test public void testGetSamplesInLane() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(smallSampleSheet)); Set<String> samples = sheet.getSamplesInLane(1); assertEquals(2, samples.size()); for (String s: new String[]{"snia_000268", "snia_000269" }) assertTrue("Sample " + s + " is missing", samples.contains(s)); samples = sheet.getSamplesInLane(2); assertEquals(2, samples.size()); for (String s: new String[]{"snia_000268", "snia_025487" }) assertTrue("Sample " + s + " is missing", samples.contains(s)); samples = sheet.getSamplesInLane(3); assertEquals(0, samples.size()); } @Test(expected=IllegalArgumentException.class) public void testGetSamplesInLaneInvalidLane() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(smallSampleSheet)); Set<String> samples = sheet.getSamplesInLane(0); } @Test public void testSimpleIterator() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(oneEntrySheet)); Iterator<SampleSheet.Entry> it = sheet.iterator(); assertTrue(it.hasNext()); SampleSheet.Entry e = it.next(); assertNotNull(e); assertOneEntrySheet(e); assertFalse(it.hasNext()); } @Test(expected=NoSuchElementException.class) public void testIteratorNextOverEnd() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(oneEntrySheet)); Iterator<SampleSheet.Entry> it = sheet.iterator(); it.next(); // good it.next(); // should raise } @Test public void testMultiLaneIteration() throws java.io.IOException, SampleSheet.FormatException { HashSet<String> returnedSamples = new HashSet<String>(); sheet.loadTable(sampleReader); for (SampleSheet.Entry e: sheet) returnedSamples.add(e.getSampleId()); // expect to get all 6 samples assertEquals(6, returnedSamples.size()); } @Test(expected=UnsupportedOperationException.class) public void testUnsupportedIteratorRemove() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(sampleReader); Iterator<SampleSheet.Entry> it = sheet.iterator(); it.next(); it.remove(); } @Test public void testWithoutQuotes() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(withoutQuotes)); Iterator<SampleSheet.Entry> it = sheet.iterator(); assertTrue(it.hasNext()); SampleSheet.Entry e = it.next(); assertNotNull(e); assertEquals("81DJ0ABXX", e.getFlowcellId()); assertEquals(1, e.getLane()); assertEquals("snia_000268", e.getSampleId()); assertEquals("Human", e.getSampleRef()); assertEquals("ATCACG", e.getIndex()); assertEquals("Whole-Transcriptome Sequencing Project", e.getDescription()); assertEquals("N", e.getControl()); assertEquals("tru-seq multiplex", e.getRecipe()); assertEquals("ROBERTO", e.getOperator()); assertFalse(it.hasNext()); } @Test public void testExtraneousWhitespace() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(extraneousWhitespace)); Iterator<SampleSheet.Entry> it = sheet.iterator(); assertTrue(it.hasNext()); SampleSheet.Entry e = it.next(); assertNotNull(e); assertEquals("81DJ0ABXX", e.getFlowcellId()); assertEquals(1, e.getLane()); assertEquals("snia_000268", e.getSampleId()); assertEquals("Human", e.getSampleRef()); assertEquals("ATCACG", e.getIndex()); assertEquals("Whole-Transcriptome Sequencing Project", e.getDescription()); assertEquals("N", e.getControl()); assertEquals("tru-seq multiplex", e.getRecipe()); assertEquals("ROBERTO", e.getOperator()); assertFalse(it.hasNext()); } @Test public void testBlankBarcode() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(blankIndexSampleSheet)); Iterator<SampleSheet.Entry> it = sheet.iterator(); assertTrue(it.hasNext()); SampleSheet.Entry e = it.next(); assertNotNull(e); assertEquals("snia_000268", e.getSampleId()); assertEquals("", e.getIndex()); } @Test(expected=SampleSheet.FormatException.class) public void testMissingLaneColumn() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(missingLaneColumn)); } @Test(expected=SampleSheet.FormatException.class) public void testMissingSampleIdColumn() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(missingSampleIdColumn)); } @Test(expected=SampleSheet.FormatException.class) public void testMissingIndexColumn() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(missingIndexColumn)); } @Test public void testMissingFcidColumn() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(missingFcidColumn)); } @Test public void testMissingRecipeColumn() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(missingRecipeColumn)); // no failure expected. Recipe isn't a required column } @Test public void testProject() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(sampleSheetWithProject)); HashMap<String, String> sample_project = new HashMap<String, String>(); for (SampleSheet.Entry e: sheet) sample_project.put(e.getSampleId(), e.getProject()); assertEquals("Proj1", sample_project.get("snia_000268")); assertEquals("Proj1", sample_project.get("snia_000269")); assertEquals("Proj2", sample_project.get("snia_025487")); } @Test public void testProjectBackwardsCompatible() throws java.io.IOException, SampleSheet.FormatException { // when the project is missing, getProject should simply return null. sheet.loadTable(sampleReader); for (SampleSheet.Entry e: sheet) assertNull(e.getProject()); } @Test public void testReorderedColumns() throws java.io.IOException, SampleSheet.FormatException { sheet.loadTable(new StringReader(reorderedOneEntrySheet)); Iterator<SampleSheet.Entry> it = sheet.iterator(); SampleSheet.Entry e = it.next(); assertOneEntrySheet(e); } // assert that the values in Entry e match the values in oneEntrySheet private static void assertOneEntrySheet(SampleSheet.Entry e) { assertEquals("81DJ0ABXX", e.getFlowcellId()); assertEquals(1, e.getLane()); assertEquals("snia_000268", e.getSampleId()); assertEquals("Human", e.getSampleRef()); assertEquals("ATCACG", e.getIndex()); assertEquals("Whole-Transcriptome Sequencing Project", e.getDescription()); assertEquals("N", e.getControl()); assertEquals("tru-seq multiplex", e.getRecipe()); assertEquals("ROBERTO", e.getOperator()); } public static void main(String args[]) { org.junit.runner.JUnitCore.main(TestSampleSheet.class.getName()); } }