// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package tests.it.crs4.seal.demux;
import it.crs4.seal.demux.BarcodeLookup;
import it.crs4.seal.demux.SampleSheet;
import java.io.StringReader;
import java.util.Collection;
import java.util.Set;
import org.junit.*;
import static org.junit.Assert.*;
public class TestBarcodeLookup
{
private SampleSheet sheet;
private StringReader sampleReader;
private BarcodeLookup lookup;
private String sampleSheet =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"CGATGT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",2,\"snia_001611\",\"Human\",\"TTAGGC\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",2,\"snia_025487\",\"Human\",\"TGACCA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",3,\"snia_041910\",\"Human\",\"ACAGTG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",3,\"snia_001612\",\"Human\",\"GCCAAT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"";
private String sampleSheetOne =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"\"81DJ0ABXX\",1,\"one\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"";
private String sampleSheetTwo =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"\"81DJ0ABXX\",1,\"one\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",1,\"one\",\"Human\",\"GCACTA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"";
// in the following sample sheet, the second record's barcode is only one substitution away from the first one.
private String sampleSheetTooClose =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"\"81DJ0ABXX\",1,\"first\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",1,\"too_close\",\"Human\",\"ATCACA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" ;
private String sampleSheetAmbiguous =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"81DJ0ABXX,1,snia_000268,Human,,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO\n" +
"81DJ0ABXX,1,snia_001612,Human,,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO";
private String lanesOOOSheet =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"\"81DJ0ABXX\",3,\"snia_041910\",\"Human\",\"ACAGTG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"CGATGT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",2,\"snia_001611\",\"Human\",\"TTAGGC\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",3,\"snia_001612\",\"Human\",\"GCCAAT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",2,\"snia_025487\",\"Human\",\"TGACCA\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"";
private String sampleSheetSkippingLanes =
"\"FCID\",\"Lane\",\"SampleID\",\"SampleRef\",\"Index\",\"Description\",\"Control\",\"Recipe\",\"Operator\"\n" +
"\"81DJ0ABXX\",1,\"snia_000268\",\"Human\",\"ATCACG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",1,\"snia_000269\",\"Human\",\"CGATGT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",3,\"snia_041910\",\"Human\",\"ACAGTG\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"\n" +
"\"81DJ0ABXX\",3,\"snia_001612\",\"Human\",\"GCCAAT\",\"Whole-Transcriptome Sequencing Project\",\"N\",\"tru-seq multiplex\",\"ROBERTO\"";
private String sampleSheetBlankIndex =
"FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator\n" +
"81DJ0ABXX,1,snia_000268,Human,,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO\n" +
"81DJ0ABXX,3,snia_001612,Human,,Whole-Transcriptome Sequencing Project,N,tru-seq multiplex,ROBERTO";
@Before
public void setup()
{
sheet = new SampleSheet();
lookup = new BarcodeLookup(sheet, 0);
}
@Test
public void testIsEmpty() throws java.io.IOException, SampleSheet.FormatException
{
assertTrue(lookup.isEmpty());
sheet.loadTable(new StringReader(sampleSheet));
lookup.load(sheet, 0);
assertFalse(lookup.isEmpty());
}
@Test
public void testNSamples() throws java.io.IOException, SampleSheet.FormatException
{
assertEquals(0, lookup.getNumSamples());
sheet.loadTable(new StringReader(sampleSheetOne));
lookup.load(sheet, 0);
assertEquals(1, lookup.getNumSamples());
sheet.loadTable(new StringReader(sampleSheet));
lookup.load(sheet, 0);
assertEquals(6, sheet.size());
assertEquals(6, lookup.getNumSamples());
}
@Test
public void testDontCrashOnEmpty()
{
assertNull(lookup.getSampleId(1, "aaaaaa"));
}
@Test(expected=IllegalArgumentException.class)
public void testInvalidLaneNo()
{
lookup.getSampleId(0, "aaaaaa");
}
@Test(expected=IllegalArgumentException.class)
public void testInvalidIndexLength()
{
lookup.getSampleId(1, "A");
}
@Test
public void testLanesOutOfOrder() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(lanesOOOSheet));
lookup.load(sheet, 0);
BarcodeLookup.Match m;
m = lookup.getSampleId(3, "ACAGTG");
assertEquals("snia_041910", m.getEntry().getSampleId());
assertEquals(0, m.getMismatches());
m = lookup.getSampleId(2, "TTAGGC");
assertEquals("snia_001611", m.getEntry().getSampleId());
assertEquals(0, m.getMismatches());
}
@Test
public void testLanesInOrder() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheet));
lookup.load(sheet, 0);
assertEquals("snia_041910", lookup.getSampleId(3, "ACAGTG").getEntry().getSampleId());
assertEquals("snia_001611", lookup.getSampleId(2, "TTAGGC").getEntry().getSampleId());
}
@Test
public void testSkippingLanes() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetSkippingLanes));
lookup.load(sheet, 0);
assertEquals("snia_041910", lookup.getSampleId(3, "ACAGTG").getEntry().getSampleId());
assertEquals("snia_000268", lookup.getSampleId(1, "ATCACG").getEntry().getSampleId());
assertNull(lookup.getSampleId(2, "ATCACG"));
}
@Test(expected=IllegalArgumentException.class)
public void testNegativeMismatchLimit() throws java.io.IOException, SampleSheet.FormatException
{
lookup.load(sheet, -1);
}
@Test(expected=IllegalArgumentException.class)
public void testUnreasonableMismatchLimit() throws java.io.IOException, SampleSheet.FormatException
{
lookup.load(sheet, 100);
}
@Test(expected=RuntimeException.class)
public void testMismatchLimitTooHigh() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetTooClose));
lookup.load(sheet, 2);
}
@Test
public void testQueryWithOneSampleInLane() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetOne));
lookup.load(sheet, 0);
int lane = 1; // lane where the samples in sampleSheetTwo reside
BarcodeLookup.Match m;
// exact query
m = lookup.getSampleId(lane, "RANDOM");
assertEquals("one", m.getEntry().getSampleId());
assertEquals(0, m.getMismatches());
assertNull(lookup.getSampleId(lane+1, "ATCANN")); // empty lane
}
@Test
public void testQueryWithOneMismatch() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetTwo));
lookup.load(sheet, 1); // parameter: support 1 mismatch
int lane = 1; // lane where the samples in sampleSheetTwo reside
BarcodeLookup.Match m;
// exact query
m = lookup.getSampleId(lane, "ATCACG");
assertEquals("one", m.getEntry().getSampleId());
assertEquals(0, m.getMismatches());
// various queries with 1 mismatch
String[] queries = new String[] { "GTCACG", "AGCACG", "ATGACG", "ATCTCG", "ATCANG", "ATCACN" };
//for (int i = 0; i < queries.length; ++i)
for (String q: queries)
{
m = lookup.getSampleId(1, q);
assertEquals("one", m.getEntry().getSampleId());
assertEquals(1, m.getMismatches());
}
assertNull(lookup.getSampleId(1, "ATCANN")); // 2 mismatches
}
@Test
public void testQueryWithTwoMismatches() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetTwo));
lookup.load(sheet, 2); // parameter: support 1 mismatch
int lane = 1; // lane where the samples in sampleSheetTwo reside
BarcodeLookup.Match m;
// exact query
m = lookup.getSampleId(lane, "ATCACG");
assertEquals("one", m.getEntry().getSampleId());
assertEquals(0, m.getMismatches());
// one mismatch
m = lookup.getSampleId(lane, "ATCNCG");
assertEquals("one", m.getEntry().getSampleId());
assertEquals(1, m.getMismatches());
// various queries with 2 mismatches
String[] queries = new String[] { "GGCACG", "AGTACG", "GTGACG", "CTCTCG", "CTCANG", "NTCACN" };
for (int i = 0; i < queries.length; ++i)
{
m = lookup.getSampleId(1, queries[i]);
assertEquals("one", m.getEntry().getSampleId());
assertEquals(2, m.getMismatches());
}
}
@Test
public void testQueryBlankIndex() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetBlankIndex));
lookup.load(sheet, 0);
assertEquals("snia_000268", lookup.getSampleId(1, "RANDOM").getEntry().getSampleId());
assertEquals("snia_000268", lookup.getSampleId(1, "").getEntry().getSampleId());
assertEquals("snia_001612", lookup.getSampleId(3, "RANDOM").getEntry().getSampleId());
assertEquals("snia_001612", lookup.getSampleId(3, "").getEntry().getSampleId());
assertNull(lookup.getSampleId(2, "ATCANN")); // empty lane
}
@Test(expected=SampleSheet.FormatException.class)
public void testAmbiguousSampleSheet() throws java.io.IOException, SampleSheet.FormatException
{
sheet.loadTable(new StringReader(sampleSheetAmbiguous));
lookup.load(sheet, 0);
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main(TestBarcodeLookup.class.getName());
}
}