/* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ package abra.utils; import java.io.File; import java.io.IOException; import java.util.List; import abra.Feature; import abra.RegionLoader; import abra.ReAligner; import htsjdk.samtools.SAMFileReader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; /** * Utility class for exploring base quality by region * * @author Lisle E. Mose (lmose at unc dot edu) */ public class BaseQualityByRegion { //TODO: Move to utils package public void run(String input, String regionsGtf) throws IOException { SAMFileReader reader = new SAMFileReader(new File(input)); RegionLoader loader = new RegionLoader(); List<Feature> regions = loader.load(regionsGtf, false); regions = ReAligner.splitRegions(regions); for (Feature region : regions) { long numReads = 0; long numBases = 0; long totalQuality = 0; long numBasesLt5 = 0; long numBasesLt10 = 0; long numBasesLt20 = 0; long numReadsLt5 = 0; long numReadsLt10 = 0; long numReadsLt20 = 0; long numReadsLt30 = 0; long numReads5X10 = 0; long numInternalReadsLt20 = 0; long numReadsWithAmbiguousBases = 0; long numReadsIntersectLt20Ambiguous = 0; long minMapq = 1000; long totalMapq = 0; CloseableIterator<SAMRecord> iter = reader.queryOverlapping(region.getSeqname(), (int) region.getStart(), (int) region.getEnd()); while (iter.hasNext()) { SAMRecord read = iter.next(); if (read.getMappingQuality() >= 10) { String qualStr = read.getBaseQualityString(); boolean readLt5 = false; boolean readLt10 = false; boolean readLt20 = false; boolean readLt30 = false; boolean internalReadLt20 = false; numReads++; numBases += qualStr.length(); int readBasesLt5 = 0; for (int i=0; i<qualStr.length(); i++) { // Assuming phred33 int qual = qualStr.charAt(i) - '!'; totalQuality += qual; if (qual < 5) { numBasesLt5++; readBasesLt5++; readLt5 = true; } if (qual < 10) { numBasesLt10++; readLt10 = true; } if (qual < 20) { numBasesLt20++; readLt20 = true; if ((i>=10) && (i<90)) { internalReadLt20 = true; } } if (qual < 30) { readLt30 = true; } } if (readLt5) { numReadsLt5++; } if (readLt10) { numReadsLt10++; } if (readLt20) { numReadsLt20++; } if (readLt30) { numReadsLt30++; } if (readBasesLt5 >= 10) { numReads5X10++; } if (internalReadLt20 == true) { numInternalReadsLt20++; } if (read.getReadString().contains("N")) { numReadsWithAmbiguousBases++; } if (readLt20 || read.getReadString().contains("N")) { numReadsIntersectLt20Ambiguous++; } if (read.getMappingQuality() < minMapq) { minMapq = read.getMappingQuality(); } totalMapq += read.getMappingQuality(); } } iter.close(); System.out.println(region.getDescriptor() + "\t" + numBases + "\t" + numReads + "\t" + avg(numBasesLt5, numBases) + "\t" + avg(numBasesLt10, numBases) + "\t" + avg(numBasesLt20, numBases) + "\t" + avg(totalQuality, numBases) + "\t" + avg(numReadsLt5, numReads) + "\t" + avg(numReadsLt10, numReads) + "\t" + avg(numReadsLt20, numReads) + "\t" + avg(numReadsLt30, numReads) + "\t" + avg(numReads5X10, numReads) + "\t" + avg(numInternalReadsLt20, numReads) + "\t" + avg(numReadsWithAmbiguousBases, numReads) + "\t" + avg(numReadsIntersectLt20Ambiguous, numReads) + "\t" + avg(totalMapq, numReads) + "\t" + minMapq); } reader.close(); } private double avg(long num, long dem) { return (double) num / (double) dem; } public static void main(String[] args) throws Exception { BaseQualityByRegion bq = new BaseQualityByRegion(); bq.run(args[0], args[1]); } }