package picard.illumina.parser; import htsjdk.samtools.util.IOUtil; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.PicardException; import picard.illumina.parser.readers.BclQualityEvaluationStrategy; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import static picard.illumina.parser.BinTdUtil.*; public class BclParserTest { public static final File TEST_DATA_DIR = new File("testdata/picard/illumina/25T8B25T/Data/Intensities/BaseCalls/L001"); public static final File MULTI_TILE_DATA_DIR = new File("/seq/tng/jcarey/testdata/NextSeq/Data/Intensities/BaseCalls/L001"); public static final String READ_STRUCTURE = "25T8B25T"; public static final String READ_STRUCTURE_WSKIPS = "25S8B25S"; public static final String READ_STRUCTURE_WSKIPS_PARTIAL = "10T5S10T8B5T20S"; public static final String READ_STRUCTURE_WSKIPS_BAD = "10T5S10T2B4S2B5T20S"; public static final int[] READ_LENGTHS = new int[]{25, 8, 25}; public static final int LANE = 1; public static final IlluminaDataType DATA_TYPES[] = {IlluminaDataType.BaseCalls, IlluminaDataType.QualityScores}; public static final int TILE_SIZES = 60; public static Integer[] boxArr(final int[] ints) { final Integer[] boxArr = new Integer[ints.length]; for (int i = 0; i < boxArr.length; i++) { boxArr[i] = ints[i]; } return boxArr; } @DataProvider(name = "tileMaps") public Object[][] getTileMaps() { return new Object[][]{ //TILES, NUM_CLUSTER TO BE READ, SEEK AT THIS READ, INDEX TO TILETOSEEK, ORDERED TILE INDEX (FOR OUT OF ORDER TILES) {new int[]{1101, 1201, 2101}, 180, -1, -1, -1}, {new int[]{1101, 2101, 1201}, 180, -1, -1, -1}, {new int[]{2101, 1201}, 120, -1, -1, -1}, {new int[]{1101, 2101}, 120, -1, -1, -1}, {new int[]{1101}, 60, -1, -1, -1}, //Cases with seeking {new int[]{1101, 1201, 2101}, 206, 25, 0, 0}, {new int[]{1101, 2101, 1201}, 206, 25, 0, 0}, {new int[]{2101, 1201}, 80, 19, 0, 1}, {new int[]{1101, 2101}, 156, 35, 0, 0}, {new int[]{1101}, 66, 5, 0, 0} }; } @DataProvider(name = "multiTileMaps") public Object[][] getMultiTileMaps() { return new Object[][]{ //TILES, NUM_CLUSTER TO BE READ, SEEK AT THIS READ, INDEX TO TILETOSEEK, ORDERED TILE INDEX (FOR OUT OF ORDER TILES) {new int[]{11101, 11102, 11103}, 341292, -1, -1, -1} }; } public void compareClusterToBclData(final ClusterData cluster, final BclData bclData, final int clusterNum, final int countNum) { final byte[][] bases = bclData.getBases(); final byte[][] qualities = bclData.getQualities(); Assert.assertEquals(bases.length, cluster.getNumReads(), "At cluster num " + clusterNum); Assert.assertEquals(qualities.length, cluster.getNumReads(), "At cluster num " + clusterNum); final StringBuilder baseBuilder = new StringBuilder(); final StringBuilder qualBuilder = new StringBuilder(); final StringBuilder barcode = new StringBuilder(); baseBuilder.append("new byte[]{"); for (int i = 0; i < bases.length; i++) { final byte[] subBase = bases[i]; final byte[] subQual = qualities[i]; for (int j = 0; j < subBase.length; j++) { if ((char) subBase[j] == '.') { baseBuilder.append('P'); } else { baseBuilder.append((char) subBase[j]); } baseBuilder.append(","); qualBuilder.append(subQual[j]); qualBuilder.append(","); if (i == 1) { if ((char) subBase[j] == '.') { barcode.append('P'); } else { barcode.append((char) subBase[j]); } } } } baseBuilder.deleteCharAt(baseBuilder.length() - 1); qualBuilder.deleteCharAt(qualBuilder.length() - 1); baseBuilder.append("},\n new byte[]{"); baseBuilder.append(qualBuilder.toString()); baseBuilder.append("},\n \""); baseBuilder.append(barcode.toString()); baseBuilder.append("\""); for (int i = 0; i < cluster.getNumReads(); i++) { if (!Arrays.equals(bases[i], cluster.getRead(i).getBases())) { System.out.println(cluster.getLane() + " : " + cluster.getTile() + " : " + clusterNum); System.out.println(baseBuilder.toString()); } Assert.assertEquals(bases[i], cluster.getRead(i).getBases(), " Bases differ for read " + i + " at cluster num " + clusterNum + " at cluster count " + countNum); Assert.assertEquals(qualities[i], cluster.getRead(i).getQualities(), " Qualities differ for read " + i + " at cluster num " + clusterNum + " at cluster count " + countNum); } } public void fullBclParserTestImpl(final File dir, final String readStructure, final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex, final boolean multiTile) { final ReadStructure rs = new ReadStructure(readStructure); final OutputMapping outputMapping = new OutputMapping(rs); final IlluminaFileUtil util = new IlluminaFileUtil(dir.getParentFile(), LANE); final PerTilePerCycleFileUtil bclFileUtil = (PerTilePerCycleFileUtil) util.getUtil(IlluminaFileUtil.SupportedIlluminaFormat.Bcl); final MultiTileBclFileUtil multiTileBclFileUtil = (MultiTileBclFileUtil) util.getUtil(IlluminaFileUtil.SupportedIlluminaFormat.MultiTileBcl); final List<Integer> tileIntegers = new ArrayList<Integer>(); for (final int tile : tiles) { tileIntegers.add(tile); } final BclParser bclParser; if(multiTile){ final File bci = new File(MULTI_TILE_DATA_DIR, "s_" + LANE + ".bci"); bclParser = new MultiTileBclParser(dir, LANE, multiTileBclFileUtil.getFiles(tileIntegers, outputMapping.getOutputCycles()), outputMapping, true, new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY), new TileIndex(bci)); } else{ bclParser = new BclParser(dir, LANE, bclFileUtil.getFiles(tileIntegers, outputMapping.getOutputCycles()), outputMapping, new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY)); } final Map<Integer, ClusterData> testData = BinTdUtil.clusterData(LANE, Arrays.asList(boxArr(tiles)), readStructure, DATA_TYPES); int count = 0; int readNum = 0; while (bclParser.hasNext()) { final BclData bclData = bclParser.next(); if (testData.containsKey(readNum)) { compareClusterToBclData(testData.get(readNum), bclData, readNum, count); } if (count == seekAfter) { bclParser.seekToTile(tiles[newTileIndex]); readNum = (orderedTileIndex * TILE_SIZES); } else { readNum++; } count++; } Assert.assertEquals(count, size); bclParser.close(); } public static void deleteBclFiles(final File laneDirectory, final String readStructure) { final ReadStructure rs = new ReadStructure(readStructure); int index = 1; for (final ReadDescriptor rd : rs.descriptors) { if (rd.type == ReadType.S) { for (int i = index; i < index + rd.length; i++) { final File cycleDir = new File(laneDirectory, "C" + i + ".1"); final File[] cycleFiles = cycleDir.listFiles(); for (final File toDelete : cycleFiles) { if (!toDelete.delete()) { throw new RuntimeException("Couldn't delete file " + toDelete.getAbsolutePath()); } } } } index += rd.length; } } @Test(dataProvider = "tileMaps") public void fullBclParserTest(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) { fullBclParserTestImpl(TEST_DATA_DIR, READ_STRUCTURE, tiles, size, seekAfter, newTileIndex, orderedTileIndex, false); } //@Test(dataProvider = "multiTileMaps") public void fullMTBclParserTest(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) { fullBclParserTestImpl(MULTI_TILE_DATA_DIR, READ_STRUCTURE, tiles, size, seekAfter, newTileIndex, orderedTileIndex, true); } @Test(dataProvider = "tileMaps") public void fullBclParserTestWSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) { fullBclParserTestImpl(TEST_DATA_DIR, READ_STRUCTURE_WSKIPS, tiles, size, seekAfter, newTileIndex, orderedTileIndex, false); } @Test(dataProvider = "tileMaps") public void fullBclParserTestWDeletedSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) { fullBclParserTestWDeletedSkipsImpl(tiles, size, seekAfter, newTileIndex, orderedTileIndex, READ_STRUCTURE_WSKIPS); } @Test(dataProvider = "tileMaps") public void fullBclParserTestWPartiallyDeletedSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) { fullBclParserTestWDeletedSkipsImpl(tiles, size, seekAfter, newTileIndex, orderedTileIndex, READ_STRUCTURE_WSKIPS_PARTIAL); } @Test(dataProvider = "tileMaps", expectedExceptions = RuntimeException.class) public void fullBclParserTestWBadDeletedSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) { fullBclParserTestWDeletedSkipsImpl(tiles, size, seekAfter, newTileIndex, orderedTileIndex, READ_STRUCTURE_WSKIPS_BAD); } public void fullBclParserTestWDeletedSkipsImpl(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex, final String readStructure) { final File basecallDir = IOUtil.createTempDir("bclParserTest", "BaseCalls"); Exception exc = null; try { final File l001 = new File(basecallDir, "L001"); if (!l001.mkdir()) { throw new RuntimeException("Couldn't make lane dir " + l001.getAbsolutePath()); } copyBcls(TEST_DATA_DIR, l001); deleteBclFiles(l001, readStructure); fullBclParserTestImpl(l001, READ_STRUCTURE_WSKIPS, tiles, size, seekAfter, newTileIndex, orderedTileIndex, false); } catch (final Exception thrExc) { exc = thrExc; } finally { IOUtil.deleteDirectoryTree(basecallDir); } if (exc != null) { if (exc.getClass() == PicardException.class) { throw new PicardException(exc.getMessage()); } throw new RuntimeException(exc); } } //Custom copy function to avoid copying .svn files etc... public static void copyBcls(final File srcLaneDir, final File dstDir) { final File[] listFiles = srcLaneDir.listFiles(); for (final File dir : listFiles) { if (dir.isDirectory()) { File cycleDir = null; for (final File file : dir.listFiles()) { if (file.getName().endsWith(".bcl")) { if (cycleDir == null) { cycleDir = new File(dstDir, dir.getName()); if (!cycleDir.mkdir()) { throw new RuntimeException("Couldn't make directory (" + cycleDir.getAbsolutePath() + ")"); } } IOUtil.copyFile(file, new File(cycleDir, file.getName())); } } } } } //Helper byte [] tuple for EAMSS testing class BasesAndQuals { public final byte[] bases; public final byte[] quals; public final byte[] maskedQuals; public BasesAndQuals(final byte[] bases, final byte[] quals, final Integer maskStart) { this.bases = bases; this.quals = quals; this.maskedQuals = qualsMaskedFrom(maskStart); } private byte[] qualsMaskedFrom(final Integer maskStart) { final byte[] maskedQuals = Arrays.copyOf(quals, quals.length); if (maskStart != null) { for (int i = maskStart; i < maskedQuals.length; i++) { maskedQuals[i] = BclParser.MASKING_QUALITY; } } return maskedQuals; } public String toString() { return "BasesAndQuals( " + basesToString() + ", " + qualsToString(quals) + ", " + qualsToString(maskedQuals) + ")"; } public String basesToString() { final StringBuilder sb = new StringBuilder(bases.length); for (final byte base : bases) { switch (base) { case A: sb.append("A "); break; case C: sb.append("C "); break; case G: sb.append("G "); break; case T: sb.append("T "); break; case P: sb.append(". "); break; default: throw new RuntimeException("THIS SHOULD NOT HAPPEN! Bad byte in bases!"); } } return sb.toString(); } public String qualsToString(final byte[] qualsToConvert) { final StringBuilder sb = new StringBuilder(bases.length); for (final byte qual : qualsToConvert) { sb.append(String.valueOf((int) qual)); sb.append(","); } return sb.toString(); } } @DataProvider(name = "eamssDataNo10GSeries") public Object[][] eamssDataNo10GSeries() { return new Object[][]{ //Non-masking cases //tally very negative, 9G's {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G}, new byte[]{13, 7, 35, 32, 31, 33, 31, 26, 29}, null)}, //tally barely negative {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G}, new byte[]{13, 7, 35, 26, 18, 19, 35, 8, 33}, null)}, //Reaches 0, A stretch of more than 10 other types of bases {new BasesAndQuals(new byte[]{A, C, C, C, C, C, C, C, C, C, C, C, T, G, G, C, T, A, A}, new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 19, 7, 6, 5, 35, 2, 33, 22, 18, 16, 25}, null)}, //Stays at 0, Stretches of G's Separated {new BasesAndQuals(new byte[]{T, G, G, G, G, G, G, G, P, P, G, G, G, G, G, G, G, T, A, A, G, G, G}, new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 2, 2, 6, 5, 35, 2, 33, 22, 18, 16, 25, 33, 32, 16, 18}, null)}, //shorter {new BasesAndQuals(new byte[]{T, A, C}, new byte[]{25, 16, 16}, null)}, //Longer {new BasesAndQuals(new byte[]{T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A, T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A}, new byte[]{25, 16, 16, 33, 22, 2, 2, 33, 35, 3, 31, 38, 22, 19, 25, 16, 16, 31, 30, 2, 2, 33, 26, 3, 31, 38, 22, 19, 2, 2, 30, 27, 28, 16, 2, 2, 30, 16, 19, 21, 22, 17, 19, 16, 16, 16}, null)}, //Masking-Cases //tally very positive, 9Gs X - Mask From here {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G}, new byte[]{13, 7, 35, 32, 2, 16, 33, 14, 19}, 7)}, //tally barely negative X - Mask from here {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G}, new byte[]{13, 7, 35, 33, 18, 2, 6, 8, 33}, 4)}, //Reaches 0, A stretch of more than 10 other types of bases X - Mask From here {new BasesAndQuals(new byte[]{A, C, C, C, C, C, C, C, C, C, C, C, T, G, G, C, T, A, A}, new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 19, 32, 33, 5, 8, 2, 2, 2, 33, 16, 25}, 11)}, //Stays at 0, Stretches of G's Separated X- Mask from here {new BasesAndQuals(new byte[]{T, G, G, G, G, G, G, G, P, P, G, G, G, G, G, G, G, T, A, A, G, G, G}, new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 2, 2, 6, 5, 35, 2, 33, 30, 13, 16, 7, 2, 2, 16, 18}, 16)}, //shorter X - Mask from here {new BasesAndQuals(new byte[]{T, A, C}, new byte[]{2, 11, 13}, 0)}, //Longer X- Mask from here {new BasesAndQuals(new byte[]{T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A, T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A}, new byte[]{25, 16, 16, 33, 22, 2, 2, 33, 35, 3, 31, 38, 22, 19, 25, 16, 16, 31, 30, 2, 2, 33, 26, 3, 31, 38, 22, 19, 2, 2, 30, 27, 28, 16, 2, 2, 30, 16, 19, 21, 22, 2, 19, 16, 2, 2}, 26)} }; } /** For more information on EAMSS check BclParser and the large comment above runEamssForReadInPlace * */ @DataProvider(name = "eamssDataWithGSeries") public Object[][] eamssTestDat() { return new Object[][]{ //9 G's followed by tally max X - Mask from here {new BasesAndQuals(new byte[]{A, C, G, G, T, G, G, G, G, G, G, G, G, G, A, C, T}, new byte[]{7, 8, 33, 7, 12, 33, 16, 17, 2, 2, 32, 35, 35, 35, 2, 15, 9}, 5)}, //9 G's surpassed by tally max X - Mask from here {new BasesAndQuals(new byte[]{A, C, G, G, T, G, G, G, G, G, G, G, G, G, A, C, T}, new byte[]{7, 8, 2, 7, 2, 2, 16, 17, 2, 2, 32, 35, 35, 35, 2, 15, 9}, 0)}, //10 G's ending before tally max X - Mask from here {new BasesAndQuals(new byte[]{A, C, G, G, T, G, G, G, G, G, G, G, G, G, A, C, T}, new byte[]{7, 8, 2, 7, 2, 2, 16, 17, 2, 2, 32, 35, 35, 35, 33, 15, 9}, 15)}, //10 G's ending on tally max X - Mask from here Is this wrong? {new BasesAndQuals(new byte[]{A, C, C, G, C, C, G, G, G, G, G, G, G, G, G, G, T, T, A}, new byte[]{33, 31, 29, 32, 28, 27, 30, 18, 18, 18, 18, 19, 19, 19, 33, 18, 9, 9, 19}, 6)}, //10 G's no masking {new BasesAndQuals(new byte[]{A, C, C, G, C, C, G, G, G, G, G, G, G, G, G, G, T, T, A}, new byte[]{33, 31, 29, 32, 28, 27, 30, 18, 18, 18, 18, 19, 19, 19, 33, 18, 33, 32, 34}, null)}, //10 G' with an exception X - Mask from here {new BasesAndQuals(new byte[]{A, C, C, G, C, C, G, G, G, G, A, G, G, G, G, G, T, T, A}, new byte[]{33, 31, 29, 32, 28, 27, 30, 18, 18, 18, 18, 19, 19, 19, 33, 18, 9, 9, 19}, 6)}, //longer than 10 G's X - Mask from here {new BasesAndQuals(new byte[]{A, C, G, G, G, C, G, G, G, G, G, G, G, G, G, G, T, T, A}, new byte[]{33, 31, 29, 32, 28, 16, 33, 18, 18, 18, 18, 19, 19, 19, 33, 18, 3, 9, 19}, 2)}, //longer than 10 G's X - Mask from here {new BasesAndQuals(new byte[]{A, C, G, G, C, C, G, G, G, G, G, G, G, G, G, G, G, G, G}, new byte[]{33, 31, 29, 32, 28, 16, 33, 18, 18, 18, 18, 19, 19, 19, 33, 34, 33, 33, 3}, 6)}, //longer than 10 G's {new BasesAndQuals(new byte[]{A, C, G, G, C, C, G, G, G, G, G, G, G, G, G, G, G, G, G, T, A, C, T, T, G, G, G, G, G, G, G, G, G, G, G, G, G}, new byte[]{33, 31, 29, 32, 28, 16, 33, 18, 18, 18, 18, 19, 19, 19, 33, 34, 33, 33, 3, 33, 34, 2, 4, 8, 33, 7, 35, 15, 16, 31, 30, 38, 16, 15, 22, 29, 25}, null)} }; } public void testEamss(final BasesAndQuals bq) { final byte[] bases = Arrays.copyOf(bq.bases, bq.bases.length); final byte[] quals = Arrays.copyOf(bq.quals, bq.quals.length); BclParser.runEamssForReadInPlace(bases, quals); Assert.assertEquals(bases, bq.bases); Assert.assertEquals(quals, bq.maskedQuals); } @Test(dataProvider = "eamssDataNo10GSeries") public void eamssParsingTestNoGSeries(final BasesAndQuals bq) { testEamss(bq); } @Test(dataProvider = "eamssDataWithGSeries") public void eamssParsingTestWithGSeries(final BasesAndQuals bq) { testEamss(bq); } }