package org.seqcode.deepseq.hitloaders; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import org.seqcode.deepseq.HitPair; import org.seqcode.deepseq.Read; import org.seqcode.deepseq.ReadHit; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.CloseableIterator; /** * SAMFileHitLoader: A FileHitLoader for SAM and BAM files. * Accounts for uniqueness of hits according to user-specified option. * Ignores secondary & supplementary (i.e. chimeric) alignments. * @author mahony * */ public class SAMFileHitLoader extends FileHitLoader{ private boolean useChimericReads=false; //Ignore chimeric mappings for now. public SAMFileHitLoader(File f, boolean nonUnique, boolean loadT1Reads, boolean loadT2Reads, boolean loadRead2, boolean loadPairs) { super(f, nonUnique, true, false, loadRead2, loadPairs); if(!loadT1Reads || loadT2Reads) System.err.println("SAMFileHitLoader: You asked to load only Type1 or Type2 reads, we do not yet load this information from SAM format."); } /** * Get the reads from the appropriate source (implementation-specific). * Loads data to the fivePrimesList and hitsCountList * Loads pairs to hitPairsList */ public void sourceAllHits() { this.initialize(); SamReaderFactory factory = SamReaderFactory.makeDefault() .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) .validationStringency(ValidationStringency.SILENT); SamReader reader = factory.open(file); CloseableIterator<SAMRecord> iter = reader.iterator(); Collection<SAMRecord> byRead = new ArrayList<SAMRecord>(); String lastread = null; while (iter.hasNext()) { SAMRecord record = iter.next(); if(record.getReadUnmappedFlag()) {continue; } if(record.isSecondaryOrSupplementary() && !useChimericReads){continue;} if(record.getReadPairedFlag() && record.getSecondOfPairFlag() && !loadRead2){continue;} if (lastread == null || !lastread.equals(record.getReadName())) { processRead(byRead); byRead.clear(); } lastread = record.getReadName(); byRead.add(record); //Filter by first or second of pair here if loading by type1/2? //load pair if this is a first mate, congruent, proper pair if(loadPairs && record.getFirstOfPairFlag() && record.getProperPairFlag()){ boolean neg = record.getReadNegativeStrandFlag(); boolean mateneg = record.getMateNegativeStrandFlag(); HitPair hp = new HitPair((neg ? record.getAlignmentEnd() : record.getAlignmentStart()), record.getMateReferenceName().replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", ""), (mateneg ? record.getMateAlignmentStart()+record.getReadLength()-1 : record.getMateAlignmentStart()), mateneg ? 1 : 0, 1); addPair(record.getReferenceName().replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", ""), neg ? '-':'+', hp); } } processRead(byRead); iter.close(); try { reader.close(); } catch (IOException e) { e.printStackTrace(); } }//end of sourceAllHits method protected void processRead(Collection<SAMRecord> records) { int mapcount = records.size(); if(mapcount == 0) return; if(!useNonUnique && mapcount > 1) return; float weight = 1 / ((float)mapcount); Read currRead = new Read(); for (SAMRecord record : records) { int start = record.getAlignmentStart(); int end = record.getAlignmentEnd(); ReadHit currHit = new ReadHit( record.getReferenceName().replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", ""), start, end, record.getReadNegativeStrandFlag() ? '-' : '+', weight); currRead.addHit(currHit); }currRead.setNumHits(mapcount); addHits(currRead); }//end of processRead }