package abra;
import java.util.HashMap;
import java.util.Map;
import htsjdk.samtools.DefaultSAMRecordFactory;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMLineParser;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.SAMRecord;
public class SVReadCounter {
private Map<String, Integer> breakpointCounts = new HashMap<String, Integer>();
private static final int MAX_EDIT_DISTANCE = 5;
private SAMLineParser parser;
private Map<String, Integer> counts;
public Map<String, Integer> countReadsSupportingBreakpoints(SamReader reader, int readLength, SAMFileHeader samHeader) {
parser = new SAMLineParser(new DefaultSAMRecordFactory(),
ValidationStringency.SILENT, samHeader,
null, null);
String fullMatch = readLength + "M";
// Require 90% of the read to overlap the breakpoint
int minStart = (int) (readLength * .10);
int maxStart = (int) (readLength *.9) + 1;
// TODO: Need way to query mapped reads only
for (SAMRecord read : reader) {
if (!read.getReadUnmappedFlag() && read.getCigarString().equals(fullMatch)) {
if (read.getAlignmentStart() >= minStart && read.getAlignmentStart() <= maxStart) {
int editDistance = SAMRecordUtils.getIntAttribute(read, "NM");
if (editDistance <= MAX_EDIT_DISTANCE) {
SAMRecord orig = getOrigRecord(read, samHeader);
int origEditDistance = SAMRecordUtils.getIntAttribute(orig, "YX");
if (editDistance < origEditDistance) {
//TODO: Inspect alternate alignments
/*
String[] regions = read.getReferenceName().split("__");
if (regions.length == 2) {
String[] region1 = regions[0].split("_");
String[] region2 = regions[1].split("_");
if (region1.length >= 3 && region2.length >= 5) {
int region1IdxPad = region1.length - 3; // Adjustment for underscore in chromosome name
int region2IdxPad = region2.length - 5; // Adjustment for underscore in chromosome name
String chr1 = region1[0];
for (int i=1; i<=region1IdxPad; i++) {
chr1 += "_";
chr1 += region1[i];
}
int start1 = Integer.parseInt(region1[1 + region1IdxPad]);
int stop1 = Integer.parseInt(region1[2 + region1IdxPad]);
String chr2 = region2[0];
for (int i=1; i<=region2IdxPad; i++) {
chr1 += "_";
chr2 += region2[i];
}
int start2 = Integer.parseInt(region2[1 + region2IdxPad]);
int stop2 = Integer.parseInt(region2[2 + region2IdxPad]);
}
}
*/
String[] refFields = read.getReferenceName().split("_");
if (refFields.length >= 8) {
// String breakpointGroupId = refFields[0] + "_" + refFields[1] + "\t" + refFields[2] + ":" + refFields[3] + "\t" +
// refFields[4] + ":" + refFields[5];
String breakpointGroupId = refFields[0] + "_" + refFields[1] + "\t" + refFields[2] + ":" + refFields[3] + "\t" +
refFields[4] + ":" + refFields[5] + "\t" + refFields[refFields.length-2] + "\t" + refFields[refFields.length-1];
Integer count = breakpointCounts.get(breakpointGroupId);
if (count == null) {
breakpointCounts.put(breakpointGroupId, 1);
} else {
breakpointCounts.put(breakpointGroupId, count + 1);
}
} else {
System.err.println("Error analyzing breakpoint for: " + read.getSAMString());
}
}
}
}
}
}
this.counts = breakpointCounts;
return breakpointCounts;
}
private SAMRecord getOrigRecord(SAMRecord read, SAMFileHeader samHeader) {
String origSamStr = read.getReadName();
origSamStr = origSamStr.replace(Sam2Fastq.FIELD_DELIMITER, "\t");
SAMRecord orig;
try {
orig = parser.parseLine(origSamStr);
} catch (RuntimeException e) {
System.err.println("Error processing: [" + origSamStr + "]");
System.err.println("Contig read: [" + read.getSAMString() + "]");
e.printStackTrace();
throw e;
}
orig.setHeader(samHeader);
return orig;
}
public Map<String, Integer> getCounts() {
return counts;
}
public static void main(String[] args) throws Exception {
String file = "/home/lmose/dev/abra/sv/virus_test2/t.sv.bam";
final SamReader reader =
SamReaderFactory.make()
.validationStringency(ValidationStringency.SILENT)
.samRecordFactory(DefaultSAMRecordFactory.getInstance())
.open(SamInputResource.of(file));
SAMFileHeader header = reader.getFileHeader();
System.err.println("header: " + header);
SVReadCounter counter = new SVReadCounter();
Map<String, Integer> counts = counter.countReadsSupportingBreakpoints(reader, 100, header);
reader.close();
System.err.println(counts);
}
}