package abra.utils; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Iterator; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; public class CalcMismatchDensity { private String ref; private String baseDir; private String inputFile; private String currPid = ""; private String origBam; private String abraBam; private CompareToReference3 c2r; private int minBaseQuality = 20; private int readLength; public CalcMismatchDensity(String ref, String baseDir, String inputFile, String origBam, String abraBam, int readLength) { this.ref = ref; this.baseDir = baseDir; this.inputFile = inputFile; this.origBam = origBam; this.abraBam = abraBam; this.readLength = readLength; } public void run() throws IOException { System.err.println("Loading reference"); c2r = new CompareToReference3(); c2r.init(ref); c2r.setMinBaseQuality(20); BufferedReader reader = new BufferedReader(new FileReader(inputFile)); SAMFileReader abraReader = null; SAMFileReader origReader = null; long s = System.currentTimeMillis(); String line = reader.readLine(); while (line != null) { String[] fields = line.split("\t"); String pid = fields[1]; if (!pid.equals(currPid)) { currPid = pid; System.err.println(currPid); abraReader = initReader(abraReader, baseDir + "/" + pid + "/" + abraBam); origReader = initReader(origReader, baseDir + "/" + pid + "/" + origBam); } String chromosome = fields[2]; int pos = Integer.parseInt(fields[3]); int refLen = Integer.parseInt(fields[5]); int altLen = Integer.parseInt(fields[6]); int length = 0; CigarOperator indelType; if (refLen > 1) { indelType = CigarOperator.D; length = refLen - 1; } else if (altLen > 1) { indelType = CigarOperator.I; length = altLen - 1; } else { throw new IllegalArgumentException("Not an indel: " + line); } double abraMd = calcMismatchDensity(abraReader, indelType, chromosome, pos, length); double origMd = calcMismatchDensity(origReader, indelType, chromosome, pos, length); System.out.println(line + "\t" + abraMd + "\t" + origMd); line = reader.readLine(); } reader.close(); long e = System.currentTimeMillis(); // System.out.println("Elapsed: " + (e-s)); } private double calcMismatchDensity(SAMFileReader reader, CigarOperator indelType, String chromosome, int pos, int length) { int numMismatches = 0; int numBases = 0; CloseableIterator<SAMRecord> iter = reader.queryOverlapping(chromosome, pos-readLength, pos+length+readLength); while (iter.hasNext()) { SAMRecord read = (SAMRecord) iter.next(); // String yo = (String) read.getAttribute("YO"); // if ((yo == null) || (!yo.equals("N/A"))) { numMismatches += c2r.noiseAroundIndel(read, indelType, pos, length); for (int qual : read.getBaseQualities()) { // qual = qual - '!'; if (qual >= minBaseQuality) { numBases += 1; } } // } } iter.close(); return numBases == 0 ? 0.0 : (double) numMismatches / (double) numBases; } private SAMFileReader initReader(SAMFileReader old, String input) { if (old != null) { old.close(); } SAMFileReader reader = new SAMFileReader(new File(input)); reader.setValidationStringency(ValidationStringency.SILENT); return reader; } public static void main(String[] args) throws Exception { String ref = args[0]; String baseDir = args[1]; String inputFile = args[2]; String origBam = args[3]; String abraBam = args[4]; int readLength = Integer.parseInt(args[5]); // String ref = "/home/lmose/reference/chr1/1.fa"; // String baseDir = "/home/lmose/dev/ayc/mismatch_density"; // String inputFile = "/home/lmose/dev/ayc/mismatch_density/gl.txt"; // String origBam = "n1.bam"; // String abraBam = "n.abra.1.bam"; // int readLength = 100; CalcMismatchDensity cmd = new CalcMismatchDensity(ref, baseDir, inputFile, origBam, abraBam, readLength); cmd.run(); System.err.println("Done."); } }