package org.genedb.crawl.bam;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sf.samtools.AlignmentBlock;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordIterator;
import org.apache.log4j.Logger;
import org.genedb.crawl.model.Alignment;
import org.genedb.crawl.model.MappedCoverage;
import org.genedb.crawl.model.MappedQuery;
import org.genedb.crawl.model.MappedSAMHeader;
import org.genedb.crawl.model.MappedSAMSequence;
import org.genedb.crawl.model.MappedSAMRecords;
import org.genedb.crawl.model.adapter.AlignmentBlockAdapter;
public class Sam {
public BioDataFileStore<Alignment> alignmentStore;
public void setAlignmentStore (BioDataFileStore<Alignment> alignmentStore) {
this.alignmentStore = alignmentStore;
}
private static Logger logger = Logger.getLogger(Sam.class);
/*
* If no properties are supplied for the query method, this is the default set.
*/
private final static String[] defaultProperties = {"alignmentStart", "alignmentEnd", "flags", "readName"};
/*
* A map of field names in the MappedSAMRecords class
*/
private static Map<String, Field> recordsBeanFields = new HashMap<String, Field>();;
/*
* a map of getters in the SAMRecord class whose names equate to (via bean camel case convention) to fields in the Records class.
*/
private static Map<String,Method> samRecordMethodMap = new HashMap<String,Method>();
// no point in doing this more than once
static {
// generate a map of fields in the MappedSAMRecords bean
for (Field f : MappedSAMRecords.class.getDeclaredFields()) {
recordsBeanFields.put(f.getName(), f);
logger.debug(String.format("field %s %s", f.getName(), f));
}
// find equivalent methods in the SAMRecord class
for (Method method : SAMRecord.class.getDeclaredMethods()) {
String methodName = method.getName();
if (methodName.startsWith("get")) {
String propertyName = methodName.substring(3);
propertyName = propertyName.substring(0, 1).toLowerCase() + propertyName.substring(1);
if (! recordsBeanFields.containsKey(propertyName)) {
continue;
}
samRecordMethodMap.put(propertyName, method);
logger.debug(String.format("method %s %s", propertyName, method));
}
}
}
private SAMFileReader getSamOrBam(int fileID) throws Exception {
final SAMFileReader inputSam = alignmentStore.getFile(fileID).getReader();
if (inputSam == null) {
throw new Exception ("Could not find the file " + fileID);
}
inputSam.setValidationStringency(ValidationStringency.SILENT);
return inputSam;
}
public MappedSAMHeader header(int fileID) throws Exception {
return this.header(getSamOrBam(fileID));
}
public MappedSAMHeader header(SAMFileReader file) throws Exception {
MappedSAMHeader model = new MappedSAMHeader();
for (Map.Entry<String, String> entry : file.getFileHeader().getAttributes()) {
model.attributes.put(entry.getKey(), entry.getValue().toString());
}
return model;
}
public List<MappedSAMSequence> sequence(int fileID) throws Exception {
return alignmentStore.getSequences(fileID);
}
public List<Alignment> listforsequence(String sequence) throws Exception {
return alignmentStore.listforsequence(sequence);
}
public List<Alignment> list() {
return alignmentStore.getFiles();
}
public List<Alignment> listfororganism(String organism) {
return alignmentStore.listfororganism(organism);
}
public synchronized MappedQuery query(int fileID, String sequence, int start, int end, boolean contained, String[] properties, int filter ) throws Exception {
logger.debug("FileID : " + fileID);
sequence = alignmentStore.getActualSequenceName(fileID, sequence);
return this.query(getSamOrBam(fileID), sequence, start, end, contained, properties, filter);
}
private synchronized MappedQuery query(SAMFileReader file, String sequence, int start, int end, boolean contained, String[] properties, int filter ) throws Exception {
if (sequence == null) {
throw new Exception ("Supplied sequence does not exist.");
}
if (properties == null) {
properties = defaultProperties;
}
logger.debug(String.format("file: %s\tlocation: '%s:%d-%d'\tcontained?%s\tfilter: %d(%s)", file, sequence, start, end, contained, filter, padLeft(Integer.toBinaryString(filter), 8)));
long startTime = System.currentTimeMillis();
MappedQuery model = new MappedQuery();
model.records = new MappedSAMRecords();
// we certainly don't want duplicates here, as this was cause unnecessary method calls and data to be written out multiple times
// so we use a set
Set<String> props = new HashSet<String>();
// filter the properties, and initialise the relevant property fields in the bean
for (String propertyName : properties) {
// only add fields that we have bean properties for
if (! recordsBeanFields.containsKey(propertyName)) {
continue;
}
Field f = recordsBeanFields.get(propertyName);
// currently the only records field that is not a list
if (! f.getName().equals("alignmentBlocks")) {
f.set(model.records, new ArrayList());
}
props.add(propertyName);
}
logger.info(props);
model.count = 0;
if (props.contains("alignmentBlocks")) {
model.records.alignmentBlocks = new ArrayList<AlignmentBlockAdapter[]>();
}
SAMRecordIterator i = null;
try {
/**
*
* According to the BAMFileReader2 doc:
*
* "Prepare to iterate through the SAMRecords in file order.
* Only a single iterator on a BAM file can be extant at a time. If getIterator() or a query method has been called once,
* that iterator must be closed before getIterator() can be called again.
* A somewhat peculiar aspect of this method is that if the file is not seekable, a second call to
* getIterator() begins its iteration where the last one left off. That is the best that can be
* done in that situation."
*
* For this reason, we must make sure that we close the iterator at the end of the loop AND make sure that the methods
* that use this iterator are iterates is synchronized.
*
*/
i = file.query(sequence, start, end, contained);
while ( i.hasNext() ) {
SAMRecord record = i.next();
/*
* int toFilter = record.getFlags() & filter;
logger.debug(String.format("Read: %s, Filter: %s, Flags: %s, Result: %s", record.getReadName(), filter, record.getFlags(), toFilter ));
logger.debug(padLeft(Integer.toBinaryString(filter), 8));
logger.debug(padLeft(Integer.toBinaryString(record.getFlags()), 8));
logger.debug(padLeft(Integer.toBinaryString(toFilter), 8));
*/
if ((record.getFlags() & filter) > 0) {
//logger.debug("some matches ... skipping");
continue;
}
for (String propertyName : props) {
if (propertyName.endsWith("alignmentBlocks")) {
List<AlignmentBlock> result = record.getAlignmentBlocks();
@SuppressWarnings("unchecked")
List<AlignmentBlock> blocks = (List<AlignmentBlock>) result;
List<AlignmentBlockAdapter> blockAdapters = new ArrayList<AlignmentBlockAdapter>();
for (AlignmentBlock block : blocks) {
blockAdapters.add(new AlignmentBlockAdapter(block));
}
AlignmentBlockAdapter[] blockArray = new AlignmentBlockAdapter[blockAdapters.size()];
int b = 0;
for (AlignmentBlockAdapter block : blockAdapters) {
blockArray[b] = block;
b++;
}
model.records.alignmentBlocks.add(blockArray);
} else {
Method method = samRecordMethodMap.get(propertyName);
Object result = method.invoke(record);
Field f = recordsBeanFields.get(propertyName);
ArrayList list = (ArrayList) f.get(model.records);
list.add(result);
}
}
model.count++;
}
} catch (Exception e) {
logger.error(e.getMessage());
e.printStackTrace();
} finally {
if (i != null) {
i.close();
}
}
long endTime = System.currentTimeMillis() ;
float time = (endTime - startTime) / (float) 1000 ;
model.contained = contained;
model.start = start;
model.end = end;
model.sequence = sequence;
model.time = Float.toString(time);
model.filter = filter;
return model;
}
public synchronized MappedCoverage coverage(int fileID, String sequence, int start, int end, int window, int filter) throws Exception {
sequence = alignmentStore.getActualSequenceName(fileID, sequence);
return this.coverage(getSamOrBam(fileID), sequence, start, end, window, filter);
}
public synchronized MappedCoverage coverage(SAMFileReader file, String sequence, int start, int end, int window, int filter) throws Exception {
if (sequence == null) {
throw new Exception ("Supplied sequence does not exist.");
}
long startTime = System.currentTimeMillis();
int max = 0;
final int nBins = Math.round((end-start+1.f)/window);
int coverage[] = new int[nBins];
for(int i=0; i<coverage.length; i++) {
coverage[i] = 0;
}
logger.debug("starting iterations, filter: " + filter);
logger.debug(start + "," + end + "," + window + "," + nBins);
SAMRecordIterator iter = null;
logger.debug(startTime);
try {
iter = file.query(sequence, start, end, false);
while (iter.hasNext()) {
SAMRecord record = iter.next();
if ((record.getFlags() & filter) > 0) {
continue;
}
List<AlignmentBlock> blocks = record.getAlignmentBlocks();
for (AlignmentBlock block : blocks) {
for (int k = 0; k < block.getLength(); k++) {
final int pos = block.getReferenceStart() + k - start;
final float fbin = pos / (float) window;
if ((fbin < 0) || (fbin > nBins-1)) {
continue;
}
final int ibin = (int) fbin;
coverage[ibin] += 1;
if(coverage[ibin] > max) {
max = coverage[ibin];
}
}
}
}
} catch (Exception e) {
logger.error(e.getMessage());
e.printStackTrace();
} finally {
if (iter != null) {
iter.close();
}
}
long endTime = System.currentTimeMillis() ;
float time = (endTime - startTime) / (float) 1000 ;
logger.debug("ending iterations");
MappedCoverage mc = new MappedCoverage();
mc.data = coverage;
mc.start = start;
mc.end = end;
mc.window = window;
mc.max = max;
mc.time = Float.toString(time);
mc.bins = nBins;
return mc;
}
private String padLeft(String s, int n) {
return String.format("%1$#" + n + "s", s);
}
}