package abra.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.ValidationStringency;
public class CompareToReference3 {
private String refFileName;
private String currSeqName = "";
private String cachedRefLine = null;
private BufferedReader refReader;
private Map<String, byte[]> refMap;
private int minBaseQuality = Integer.MIN_VALUE;
/*
public void compare(String sam, String refFileName, int maxDiff) throws IOException, FileNotFoundException {
loadRefMap();
SAMFileReader reader = new SAMFileReader(new File(sam));
reader.setValidationStringency(ValidationStringency.SILENT);
int count = 0;
int totalMapped = 0;
for (SAMRecord read : reader) {
if (!read.getReadUnmappedFlag()) {
if (numDifferences(read) > maxDiff) {
System.out.println("------------");
System.out.println("read: " + read.getSAMString());
count += 1;
}
totalMapped += 1;
}
}
System.out.println("count: " + count + " out of: " + totalMapped);
reader.close();
}
*/
public void init(String reference) throws FileNotFoundException, IOException {
this.refFileName = reference;
loadRefMap();
}
public void cleanup() throws IOException {
refReader.close();
}
public List<Integer> mismatchPositions(SAMRecord read) {
return mismatchPositions(read, -1);
}
public String getAlternateReference(SAMRecord read, Cigar cigar) {
String alt = null;
byte[] reference = refMap.get(read.getReferenceName().trim());
if (read.getAlignmentEnd() < reference.length) {
StringBuffer altBuf = new StringBuffer(read.getReadLength());
int readIdx = 0;
int refIdx = read.getAlignmentStart()-1;
for (CigarElement element : cigar.getCigarElements()) {
if (element.getOperator() == CigarOperator.M) {
for (int i=0; i<element.getLength(); i++) {
if (refIdx >= reference.length) {
// You're off the edge of the map matey. Monsters be here!
// This read has aligned across chromosomes. Do not proceed.
return null;
}
char refBase = Character.toUpperCase((char) reference[refIdx]);
altBuf.append(refBase);
readIdx++;
refIdx++;
}
} else if (element.getOperator() == CigarOperator.I) {
altBuf.append(read.getReadString().substring(readIdx, readIdx + element.getLength()));
readIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.D) {
refIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.S) {
readIdx += element.getLength();
}
}
alt = altBuf.toString();
}
return alt;
}
public List<Integer> mismatchPositions(SAMRecord read, int maxMismatches) {
if (read.getReadUnmappedFlag()) {
return Collections.emptyList();
}
List<Integer> mismatches = new ArrayList<Integer>();
byte[] reference = refMap.get(read.getReferenceName().trim());
int readIdx = 0;
int refIdx = read.getAlignmentStart()-1;
for (CigarElement element : read.getCigar().getCigarElements()) {
if (element.getOperator() == CigarOperator.M) {
for (int i=0; i<element.getLength(); i++) {
//char readBase = Character.toUpperCase(read.getReadString().charAt(readIdx));
char readBase = getReadBase(read, readIdx);
//char readBase = (char) read.getReadBases()[readIdx];
char refBase = Character.toUpperCase((char) reference[refIdx]);
if ((readBase != refBase) && (readBase != 'N') && (refBase != 'N')) {
mismatches.add(readIdx);
}
readIdx++;
refIdx++;
}
} else if (element.getOperator() == CigarOperator.I) {
readIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.D) {
refIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.S) {
readIdx += element.getLength();
}
if ((maxMismatches > 0) && (mismatches.size() > maxMismatches)) {
break;
}
}
return mismatches;
}
private char getReadBase(SAMRecord read, int index) {
//return Character.toUpperCase(read.getReadString().charAt(index));
return (char) read.getReadBases()[index];
}
private int getReadBaseQuality(SAMRecord read, int index) {
return read.getBaseQualities()[index];
}
public int noiseAroundIndel(SAMRecord read, CigarOperator indelType, int indelPos, int indelLength) {
int diffs = 0;
byte[] reference = refMap.get(read.getReferenceName().trim());
if (reference != null) {
int readIdx = 0;
int refIdx = read.getAlignmentStart()-1;
int elementIdx = 0;
for (CigarElement element : read.getCigar().getCigarElements()) {
if (element.getOperator() == CigarOperator.M) {
for (int i=0; i<element.getLength(); i++) {
int readBaseQuality = getReadBaseQuality(read, readIdx);
if (readBaseQuality >= minBaseQuality) {
char readBase = getReadBase(read, readIdx);
char refBase = Character.toUpperCase((char) reference[refIdx]);
if ((readBase != refBase) && (readBase != 'N') && (refBase != 'N')) {
diffs++;
}
}
readIdx++;
refIdx++;
}
} else if (element.getOperator() == CigarOperator.I) {
if ((indelType != CigarOperator.I) || (indelPos != refIdx) || (element.getLength() != indelLength)) {
diffs++;
}
readIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.D) {
if ((indelType != CigarOperator.D) || (indelPos != refIdx) || (element.getLength() != indelLength)) {
diffs++;
}
refIdx += element.getLength();
} else if (element.getOperator() == CigarOperator.S) {
// readIdx += element.getLength();
if (elementIdx == 0) {
refIdx -= element.getLength();
}
//TODO: Should this always be included?
for (int i=0; i<element.getLength(); i++) {
int readBaseQuality = getReadBaseQuality(read, readIdx);
if (readBaseQuality >= minBaseQuality) {
if ((refIdx >= 0) && (refIdx < reference.length-1)) {
// char readBase = Character.toUpperCase(read.getReadString().charAt(readIdx));
char readBase = getReadBase(read, readIdx);
char refBase = Character.toUpperCase((char) reference[refIdx]);
if ((readBase != refBase) && (readBase != 'N') && (refBase != 'N')) {
diffs++;
}
} else {
diffs++;
}
}
readIdx++;
refIdx++;
}
}
elementIdx++;
}
}
return diffs;
}
private byte[] getBytes(StringBuffer buf) {
byte[] bytes = new byte[buf.length()];
for (int i=0; i<buf.length(); i++) {
bytes[i] = (byte) buf.charAt(i);
}
return bytes;
}
private void loadRefMap() throws IOException {
System.err.println("Loading reference map: " + this.refFileName);
long s = System.currentTimeMillis();
this.refMap = new HashMap<String, byte[]>();
BufferedReader reader = new BufferedReader(new FileReader(refFileName));
String line = reader.readLine();
StringBuffer sequence = new StringBuffer();
String currSeqName = null;
while (line != null) {
if (line.startsWith(">")) {
if (currSeqName != null) {
refMap.put(currSeqName, getBytes(sequence));
}
sequence = new StringBuffer();
currSeqName = line.substring(1, line.length()).trim();
int spaceIdx = currSeqName.indexOf(' ');
if (spaceIdx > 0) {
currSeqName = currSeqName.substring(0, spaceIdx);
}
int tabIdx = currSeqName.indexOf('\t');
if (tabIdx > 0) {
currSeqName = currSeqName.substring(0, tabIdx);
}
} else {
line.toUpperCase();
sequence.append(line);
}
line = reader.readLine();
}
refMap.put(currSeqName, getBytes(sequence));
sequence = null;
reader.close();
long e = System.currentTimeMillis();
System.err.println("Done loading ref map. Elapsed secs: " + (e-s)/1000);
}
public void setMinBaseQuality(int minBaseQuality) {
this.minBaseQuality = minBaseQuality;
}
private String getRefLine() throws IOException {
String line = null;
if (cachedRefLine != null) {
line = cachedRefLine;
cachedRefLine = null;
} else {
line = refReader.readLine();
}
return line;
}
/*
public static void main(String[] args) {
String foo = "ATCGNatcgn";
System.out.println(foo);
byte[] bytes = foo.getBytes();
System.out.println("num bytes: " + bytes.length);
for (int i=0; i<bytes.length; i++) {
System.out.println("b: " + bytes[i] + "-" + ((char) bytes[i]));
}
}
*/
/*
public static void main(String[] args) throws Exception {
// String ref = args[0];
// String sam = args[1];
// String ref = "/home/lmose/reference/chr7/chr7.fa";
// String sam = "/home/lmose/dev/ayc/24/26/test";
String ref = "/home/lmose/reference/chr1/1.fa";
String sam = "/home/lmose/dev/abra_wxs/4/ttest1.bam";
CompareToReference3 c2r = new CompareToReference3();
c2r.init(ref);
// Thread.sleep(100000);
SAMFileReader rdr = new SAMFileReader(new File(sam));
for (SAMRecord read : rdr) {
int mismatches = c2r.numMismatches(read);
System.out.println("mismatches: " + mismatches);
}
rdr.close();
}
*/
}