package picard.sam.markduplicates.util;
import htsjdk.samtools.util.Log;
import org.testng.annotations.Test;
import org.testng.Assert;
import picard.sam.util.PhysicalLocation;
import picard.sam.util.PhysicalLocationInt;
import picard.sam.util.ReadNameParser;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
* Tests for OpticalDuplicateFinder
*
* @author Nils Homer
*/
public class OpticalDuplicateFinderTest {
@Test
public void testDefaultRegex() {
final String readName1 = "000000000-ZZZZZ:1:1105:17981:23325";
final String readName2 = "000000000-ZZZZZ:1:1109:22981:17995";
final int[] tokens = new int[3];
Assert.assertEquals(ReadNameParser.getLastThreeFields(readName1, ':', tokens), 5);
Assert.assertEquals(ReadNameParser.getLastThreeFields(readName2, ':', tokens), 5);
final OpticalDuplicateFinder opticalDuplicateFinder = new OpticalDuplicateFinder();
final PhysicalLocation loc1 = new ReadEndsForMarkDuplicates();
final PhysicalLocation loc2 = new ReadEndsForMarkDuplicates();
Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName1, loc1));
Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName2, loc2));
final boolean[] opticalDuplicateFlags = opticalDuplicateFinder.findOpticalDuplicates(Arrays.asList(loc1, loc2), null);
for (final boolean opticalDuplicateFlag : opticalDuplicateFlags) {
Assert.assertFalse(opticalDuplicateFlag);
}
}
@Test
public void testVeryLongReadNames() {
final String readName1 = "M01234:123:000000000-ZZZZZ:1:1105:17981:23325";
final String readName2 = "M01234:123:000000000-ZZZZZ:1:1109:22981:17995";
final int[] tokens = new int[3];
Assert.assertEquals(ReadNameParser.getLastThreeFields(readName1, ':', tokens), 7);
Assert.assertEquals(ReadNameParser.getLastThreeFields(readName2, ':', tokens), 7);
final OpticalDuplicateFinder opticalDuplicateFinder = new OpticalDuplicateFinder();
final PhysicalLocation loc1 = new ReadEndsForMarkDuplicates();
final PhysicalLocation loc2 = new ReadEndsForMarkDuplicates();
Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName1, loc1));
Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName2, loc2));
final boolean[] opticalDuplicateFlags = opticalDuplicateFinder.findOpticalDuplicates(Arrays.asList(loc1, loc2), null);
for (final boolean opticalDuplicateFlag : opticalDuplicateFlags) {
Assert.assertFalse(opticalDuplicateFlag);
}
}
@Test
public void testKeeper() {
final Log log = Log.getInstance(OpticalDuplicateFinderTest.class);
final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX, 100, log);
List<PhysicalLocation> locs = Arrays.asList(
loc(7, 1500, 1500),
loc(7, 1501, 1501),
loc(5, 1500, 1500),
loc(7, 1490, 1502),
loc(7, 2500, 2500),
loc(7, 10, 10)
);
assertEquals(finder.findOpticalDuplicates(locs, null ), new boolean[] {false, true, false, true, false, false});
assertEquals(finder.findOpticalDuplicates(locs, locs.get(0)), new boolean[] {false, true, false, true, false, false});
assertEquals(finder.findOpticalDuplicates(locs, locs.get(1)), new boolean[] {true, false, false, true, false, false});
assertEquals(finder.findOpticalDuplicates(locs, locs.get(3)), new boolean[] {true, true, false, false, false, false});
for (int i=0; i<100; ++i) {
final Random random = new Random(i);
final List<PhysicalLocation> shuffled = new ArrayList<>(locs);
final List<PhysicalLocation> keepers = Arrays.asList(locs.get(0), locs.get(1), locs.get(3));
final PhysicalLocation keeper = keepers.get(random.nextInt(keepers.size()));
Collections.shuffle(shuffled);
int opticalDupeCount = countTrue(finder.findOpticalDuplicates(shuffled, keeper));
Assert.assertEquals(opticalDupeCount, 2);
}
}
/**
* Tests the case where the "keeper" record is not in the list that is passed to the OpticalDuplicateFinder. This can happen
* when there are, e.g. FR and RF reads, which can all be molecular duplicates of one another, but cannot be duplicates of one
* another and are thus partitioned into two sets for optical duplicate checking.
*/
@Test
public void testKeeperNotInList() {
final Log log = Log.getInstance(OpticalDuplicateFinderTest.class);
final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX, 100, log);
List<PhysicalLocation> locs = Arrays.asList(
loc(1, 100, 100),
loc(1, 101, 101),
loc(1, 99, 99),
loc(1, 99, 102)
);
Assert.assertEquals(countTrue(finder.findOpticalDuplicates(locs, loc(7, 5000, 5000))), 3);
}
@Test
public void testKeeperAtEndWithinCliqueOfAllOpticalDuplicates() {
final Log log = Log.getInstance(OpticalDuplicateFinderTest.class);
final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX, 15, log);
List<PhysicalLocation> locs = Arrays.asList(
loc(1, 10, 0),
loc(1, 20, 0),
loc(1, 30, 0)
);
assertEquals(finder.findOpticalDuplicates(locs, locs.get(2)), new boolean[] {true, true, false});
}
/** Helper method to create a physical location. */
private PhysicalLocation loc(final int tile, final int x, final int y) {
final PhysicalLocation l = new PhysicalLocationInt() {
@Override
public short getReadGroup() { return 1; }
};
l.setTile((short) tile);
l.setX(x);
l.setY(y);
return l;
}
void assertEquals(final boolean[] actual, final boolean[] expected) {
if (!Arrays.equals(actual, expected)) {
throw new AssertionError("expected: " + Arrays.toString(expected) + " but was: " + Arrays.toString(actual));
}
}
/** Simply counts the true values in a boolean array. */
int countTrue(final boolean[] bs) {
int count = 0;
for (final boolean b : bs) if (b) ++count;
return count;
}
}