// Copyright (C) 2011-2012 CRS4.
//
// This file is part of Seal.
//
// Seal is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// Seal is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with Seal. If not, see <http://www.gnu.org/licenses/>.
package tests.it.crs4.seal.recab;
import it.crs4.seal.common.IMRContext;
import it.crs4.seal.common.TestContext;
import it.crs4.seal.common.FormatException;
import it.crs4.seal.common.ReadPair;
import it.crs4.seal.common.SamInputFormat;
import it.crs4.seal.common.Utils;
import it.crs4.seal.recab.RecabTable;
import it.crs4.seal.recab.RecabTableMapper;
import it.crs4.seal.recab.ObservationCount;
import it.crs4.seal.recab.VariantRegion;
import it.crs4.seal.recab.VariantReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import org.junit.*;
import static org.junit.Assert.*;
public class TestRecabTableMapper
{
private static class DaVariantReader implements VariantReader {
public ArrayList<VariantRegion> snpList = new ArrayList<VariantRegion>();
public Iterator<VariantRegion> iterator = null;
public boolean nextEntry(VariantRegion dest) throws FormatException, IOException
{
if (iterator == null)
iterator = snpList.iterator();
if (iterator.hasNext())
{
VariantRegion result = iterator.next();
dest.set(result);
return true;
}
else
return false;
}
}
private static final String littleSam = "LITTLE 67 chr6 1 37 3M = 6 9 AGC BCD 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String littleReversedSam = "LITTLE 115 chr6 1 37 3M = 6 9 AGC BCD 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String littleSamRead2 = "LITTLE 131 chr6 6 37 3M = 1 9 AGC BCD 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String littleReversedSamRead2 = "LITTLE 179 chr6 6 37 3M = 1 9 AGC BCD 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String littleSamWithN = "LITTLE 67 chr6 1 37 3M = 6 9 ANC B#D 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String littleSamBaseQ0 = "LITTLE 67 chr6 1 37 3M = 6 9 AGC B!D 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String littleSamUnmapped = "LITTLE 71 * * 0 * = 6 9 AGC BCD 5:C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:3 RG:Z:test";
private static final String bigSam = "ERR020229.100000/1 89 chr6 3558357 37 91M = 3558678 400 AGCTTCTTTGACTCTCGAATTTTAGCACTAGAAGAAATAGTGAGGATTATATATTTCAGAAGTTCTCACCCAGGATATCAGAACACATTCA 5:CB:CCBCCB>:C@;BBBB??B;?>1@@=C=4ACCAB3A8=CC=C?CBC=CBCCCCCCCCCCCCC@5>?=?CAAB=3=>====5>=AC?C XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:91 RG:Z:test";
private static final String readAdapterLeftSam = "LEFT 145 1 3104333 37 21M = 3104336 18 TCTGGATATAGGGAGACTCAG GGGGGGGGGGGGGGGGGGGGG MD:Z:0C0A0A18 RG:Z:DCW97JN1_264:1";
private static final String readAdapterRightSam = "RIGHT 97 1 3104336 37 21M = 3104333 -18 GGATATAGGGAGACTCAGAGA GGGGGGGGGGGGGGGGGGGGG MD:Z:18G1C0 RG:Z:DCW97JN1_264:1";
private static final String deletion = "DELETE 107 chr12 1 60 3M2D2M * * * AAGTT ABCDE MD:Z:3^CA2 RG:Z:test";
private static final String insertion = "INSERT 107 chr12 1 60 3M4I2M * * * AAGCTATTT ABCDEFGHI MD:Z:5 RG:Z:test";
private RecabTableMapper mapper;
private TestContext<Text, ObservationCount> context;
private DaVariantReader reader;
private Configuration conf;
private SamInputFormat.SamRecordReader samReader;
private File tempFile;
@Before
public void setup() throws IOException
{
mapper = new RecabTableMapper();
context = new TestContext<Text, ObservationCount>();
reader = new DaVariantReader();
conf = new Configuration();
samReader = null;
tempFile = File.createTempFile("test_recab_table", ".sam");
}
@After
public void teadDown() throws IOException
{
tempFile.delete();
}
private void setupReader(String sam) throws IOException
{
// write a temporary SAM file
PrintWriter out = new PrintWriter( new BufferedWriter( new FileWriter(tempFile) ) );
out.write(sam);
out.close();
FileSplit split = new FileSplit(new Path(tempFile.toURI().toString()), 0, sam.length(), null);
samReader = new SamInputFormat.SamRecordReader();
samReader.initialize(split, Utils.getTaskAttemptContext(conf));
}
private List<ReadPair> makeReadPairs(String sam) throws IOException
{
ArrayList<ReadPair> retval = new ArrayList<ReadPair>();
setupReader(sam);
while (samReader.nextKeyValue())
retval.add(samReader.getCurrentValue());
return retval;
}
@Test
public void testSetup() throws IOException
{
mapper.setup(reader, context, conf);
// check the counters
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "Processed"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "FilteredUnmapped"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantMismatches"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantBases"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "NonVariantMismatches"));
}
@Test
public void testSimpleMap() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSam);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
// check counters
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "Processed"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "FilteredUnmapped"));
assertEquals(3, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "BadBases"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantMismatches"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantBases"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "NonVariantMismatches"));
// how many pairs emitted?
assertEquals(3, context.getNumWrites());
// check keys
Set<Text> keys = context.getKeys();
assertTrue( keys.contains(prepKey("test", "33", "1", "NN")) );
assertTrue( keys.contains(prepKey("test", "34", "2", "AG")) );
assertTrue( keys.contains(prepKey("test", "35", "3", "GC")) );
// check values (they should all be (1,0) )
List<ObservationCount> counts = context.getAllValues();
for (ObservationCount c: counts)
{
assertEquals(1, c.getObservations());
assertEquals(0, c.getMismatches());
}
}
@Test
public void testReverseSimpleMap() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleReversedSam);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(3, context.getNumWrites());
List<ObservationCount> counts = context.getAllValues();
for (ObservationCount c: counts)
{
assertEquals(1, c.getObservations());
assertEquals(0, c.getMismatches());
}
Set<Text> keys = context.getKeys();
assertTrue( keys.contains(prepKey("test", "35", "1", "NN")) );
assertTrue( keys.contains(prepKey("test", "34", "2", "GC")) );
assertTrue( keys.contains(prepKey("test", "33", "3", "CT")) );
}
@Test
public void testMapRead2() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSamRead2);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(3, context.getNumWrites());
List<ObservationCount> counts = context.getAllValues();
for (ObservationCount c: counts)
{
assertEquals(1, c.getObservations());
assertEquals(0, c.getMismatches());
}
Set<Text> keys = context.getKeys();
assertTrue( keys.contains(prepKey("test", "33", "-1", "NN")) );
assertTrue( keys.contains(prepKey("test", "34", "-2", "AG")) );
assertTrue( keys.contains(prepKey("test", "35", "-3", "GC")) );
}
@Test
public void testReverseMapRead2() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleReversedSamRead2);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(3, context.getNumWrites());
List<ObservationCount> counts = context.getAllValues();
for (ObservationCount c: counts)
{
assertEquals(1, c.getObservations());
assertEquals(0, c.getMismatches());
}
Set<Text> keys = context.getKeys();
assertTrue( keys.contains(prepKey("test", "35", "-1", "NN")) );
assertTrue( keys.contains(prepKey("test", "34", "-2", "GC")) );
assertTrue( keys.contains(prepKey("test", "33", "-3", "CT")) );
}
@Test
public void testSamWithN() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSamWithN);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(2, context.getNumWrites());
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "BadBases"));
assertEquals(2, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
}
@Test
public void testSamWithBaseQ0() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSamBaseQ0);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(2, context.getNumWrites());
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "BadBases"));
assertEquals(2, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
}
@Test
public void testSamWithUnmapped() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSamUnmapped);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(0, context.getNumWrites());
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "FilteredUnmapped"));
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "Processed"));
}
@Test
public void testSamWithMapQ0() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSam.replaceFirst("37", "0"));
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(0, context.getNumWrites());
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "FilteredMapQ"));
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$ReadCounters", "Processed"));
}
@Test
public void testWithVariant() throws IOException, InterruptedException
{
reader.snpList.add(new VariantRegion("chr6", 2)); // falls right in the middle of the read
List<ReadPair> pairs = makeReadPairs(littleSam);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
// how many pairs emitted?
assertEquals(2, context.getNumWrites());
assertEquals(2, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantMismatches"));
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantBases"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "NonVariantMismatches"));
// check keys
Set<Text> keys = context.getKeys();
assertTrue( keys.contains(prepKey("test", "33", "1", "NN")) );
assertTrue( keys.contains(prepKey("test", "35", "3", "GC")) );
// check values (they should all be (1,0) )
List<ObservationCount> counts = context.getAllValues();
for (ObservationCount c: counts)
{
assertEquals(1, c.getObservations());
assertEquals(0, c.getMismatches());
}
}
@Test
public void testWithVariantWithMismatch() throws IOException, InterruptedException
{
reader.snpList.add(new VariantRegion("chr6", 2)); // falls right in the middle of the read
List<ReadPair> pairs = makeReadPairs(littleSam.replace("MD:Z:3", "MD:Z:1A1"));
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantMismatches"));
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "VariantBases"));
assertEquals(0, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "NonVariantMismatches"));
}
@Test
public void testWithNonVariantMismatch() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSam.replace("MD:Z:3", "MD:Z:1A1"));
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(1, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "NonVariantMismatches"));
}
@Test
public void testDeletion() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(deletion);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(5, context.getNumWrites());
assertEquals(5, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
Set<Text> keys = context.getKeys();
assertEquals(5, keys.size());
assertTrue( keys.contains(prepKey("test", "32", "1", "NN")) );
assertTrue( keys.contains(prepKey("test", "33", "2", "AA")) );
assertTrue( keys.contains(prepKey("test", "34", "3", "AG")) );
assertTrue( keys.contains(prepKey("test", "35", "4", "GT")) );
assertTrue( keys.contains(prepKey("test", "36", "5", "TT")) );
}
@Test
public void testInsertion() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(insertion);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(5, context.getNumWrites());
assertEquals(5, context.getCounterValue("it.crs4.seal.recab.RecabTableMapper$BaseCounters", "Used"));
Set<Text> keys = context.getKeys();
assertEquals(5, keys.size());
assertTrue( keys.contains(prepKey("test", "32", "1", "NN")) );
assertTrue( keys.contains(prepKey("test", "33", "2", "AA")) );
assertTrue( keys.contains(prepKey("test", "34", "3", "AG")) );
assertTrue( keys.contains(prepKey("test", "39", "8", "TT")) );
assertTrue( keys.contains(prepKey("test", "40", "9", "TT")) );
}
@Test(expected=RuntimeException.class)
public void testNoReadGroup() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(littleSam.replaceFirst("\tRG:Z.*", ""));
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
}
@Test
public void testSkipReadAdapterOnRight() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(readAdapterRightSam);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(18, context.getNumWrites());
// ensure cycles 19, 20, and 21 have been filtered
Set<Text> keys = context.getKeys();
Set<String> cycles = new HashSet<String>();
for (Text k: keys)
cycles.add( k.toString().split(RecabTable.TableDelim)[2] );
assertFalse( cycles.contains("19") );
assertFalse( cycles.contains("20") );
assertFalse( cycles.contains("21") );
}
@Test
public void testSkipReadAdapterOnLeft() throws IOException, InterruptedException
{
List<ReadPair> pairs = makeReadPairs(readAdapterLeftSam);
mapper.setup(reader, context, conf);
mapper.map(new LongWritable(0), pairs.get(0), context);
assertEquals(18, context.getNumWrites());
// ensure cycles 1, 2, and 3 have been filtered
Set<Text> keys = context.getKeys();
Set<String> cycles = new HashSet<String>();
for (Text k: keys)
cycles.add( k.toString().split(RecabTable.TableDelim)[2] );
assertFalse( cycles.contains("1") );
assertFalse( cycles.contains("2") );
assertFalse( cycles.contains("3") );
}
/**
* Public for re-use in other tests.
*/
public static Text prepKey(String rg, String quality, String cycle, String dinuc)
{
return new Text(
rg + RecabTable.TableDelim +
quality + RecabTable.TableDelim +
cycle + RecabTable.TableDelim +
dinuc + RecabTable.TableDelim);
}
public static void main(String args[]) {
org.junit.runner.JUnitCore.main(TestRecabTableMapper.class.getName());
}
}