/* The MIT License (MIT) Copyright (c) 2015 Pierre Lindenbaum Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. History: * 2015 creation */ package com.github.lindenb.jvarkit.tools.pcr; import java.io.BufferedReader; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.regex.Pattern; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMProgramRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordIterator; import htsjdk.samtools.SamReader; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.IntervalTreeMap; import com.github.lindenb.jvarkit.io.IOUtils; import com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress; /** BEGIN_DOC Soft clip BAM files based on PCR target regions https://www.biostars.org/p/147136/ * mapping quality is set to zero if a read on strand - overlap the 5' side of the PCR fragment * mapping quality is set to zero if a read on strand + overlap the 3' side of the PCR fragment * mapping quality is set to zero if no PCR fragment is found after processing the BAM file should be sorted, processed with samtools calmd and picard fixmate ### Example ``` echo "seq2\t1100\t1200" > test.bed java -jar dist-1.133/pcrclipreads.jar -B test.bed -b samtools-0.1.19/examples/ex1.bam |\ samtools view -q 1 -F 4 -bu - |\ samtools sort - clipped && samtools index clipped.bam samtools tview -p seq2:1100 clipped.bam samtools-0.1.19/examples/ex1.fa ``` ### output ![img](http://i.imgur.com/bjDEnMW.jpg) ``` 1091 1101 1111 1121 1131 1141 1151 1161 1171 1181 1191 AAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAATNNNNN ................................... .................................. ,,, .................................. ,,,,, ................................. ,,,,,,,,,,, .............................N... ,,,,,,,, ............................... ,,g,,,,,,,,,,,,, ............................ ,,,,,,,,,,,,,,,,,,,, ............................ ,,,,,,,,,,,,,,,,,,, .......................... ,,,,,,,,,,,,,,,,,,,,,, .......................... ,,,,,,,,,,,,,,,,,,,,,,, ...................... ,,,,,,,,,,,,,,,,,,,,,,,,,, .................. ,,,,,,,,,,,,,,,,,,,,,,,,,, .................. ,,,,,,,,,,,,,,,,,,,,,,,,,,,, ................. ,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ................ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ............... ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ............ ,,,,,,,,,,,,a,,,,,,,,,,,,,,,,,,, ....... ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ...... ,,a,,,a,,,,,,,,,,,,,,,,,,,,,,,,,,, .... ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, .... ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, . . ``` ### See also * https://www.biostars.org/p/147136/ * https://www.biostars.org/p/178308 END_DOC */ import com.beust.jcommander.Parameter; import com.beust.jcommander.ParametersDelegate; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; @Program(name="pcrclipreads",description="Soft clip bam files based on PCR target regions https://www.biostars.org/p/147136/") public class PcrClipReads extends Launcher { private static final Logger LOG = Logger.build(PcrClipReads.class).make(); @Parameter(names={"-o","--output"},description="Output file. Optional . Default: stdout") private File outputFile = null; @Parameter(names={"-B","--bed"},description="Bed file containing non-overlapping PCR fragments") private File bedFile = null; @Parameter(names={"-flag","--flag"},description="Only run on reads having sam flag (flag). -1 = all reads. (as https://github.com/lindenb/jvarkit/issues/43)") private int onlyFlag = -1 ; @Parameter(names={"-largest","--largest"},description="see if a read overlaps two bed intervals use the bed region sharing the longest sequence with a read. see https://github.com/lindenb/jvarkit/issues/44") private boolean chooseLargestOverlap = false; @Parameter(names={"-pr","--programId"},description="add a program ID to the clipped SAM records") private boolean addProgramId = false; @ParametersDelegate private WritingBamArgs writingBamArgs = new WritingBamArgs(); private final IntervalTreeMap<Interval> bedIntervals=new IntervalTreeMap<Interval>(); private Interval findInterval(final SAMRecord rec) { if(rec.getReadUnmappedFlag()) return null; return findInterval(rec.getContig(), rec.getAlignmentStart(), rec.getAlignmentEnd()); } private Interval findInterval(final String chrom,final int start,final int end) { final Interval i= new Interval(chrom,start,end); final List<Interval> L= new ArrayList<>(this.bedIntervals.getOverlapping(i)); if(L.isEmpty()) return null; if(L.size()==1) return L.get(0); if(this.chooseLargestOverlap) { Interval best = null; Integer dist = null; for(final Interval j: L) { if(!i.intersects(j)) continue;//???? final int commonDist = i.intersect(j).length(); if(dist==null || dist < commonDist) { best = j; dist = commonDist; } } return best; } else { throw new IllegalStateException( "Option chooseLargestOverlap not used. Overlapping PCR intervals samRecord "+i+": "+L); } } private int run(final SamReader reader) { final SAMFileHeader header1= reader.getFileHeader(); final SAMFileHeader header2 = header1.clone(); Optional<SAMProgramRecord> samProgramRecord = Optional.empty(); if(this.addProgramId) { final SAMProgramRecord spr = header2.createProgramRecord(); samProgramRecord = Optional.of(spr); spr.setProgramName(this.getProgramName()); spr.setProgramVersion(this.getGitHash()); } header2.addComment(getProgramName()+" "+getVersion()+": Processed with "+getProgramCommandLine()); header2.setSortOrder(SortOrder.unsorted); SAMFileWriter sw=null; SAMRecordIterator iter = null; try { sw = this.writingBamArgs.openSAMFileWriter(outputFile,header2, false); final SAMSequenceDictionaryProgress progress =new SAMSequenceDictionaryProgress(header1); iter = reader.iterator(); while(iter.hasNext()) { SAMRecord rec= progress.watch(iter.next()); if(this.onlyFlag!=-1 && (rec.getFlags() & this.onlyFlag) != 0) { sw.addAlignment(rec); continue; } if(rec.getReadUnmappedFlag()) { sw.addAlignment(rec); continue; } final Interval fragment = findInterval(rec); if(fragment==null) { rec.setMappingQuality(0); sw.addAlignment(rec); continue; } // strand is '-' and overap in 5' of PCR fragment if( rec.getReadNegativeStrandFlag() && fragment.getStart()< rec.getAlignmentStart() && rec.getAlignmentStart()< fragment.getEnd()) { rec.setMappingQuality(0); sw.addAlignment(rec); continue; } // strand is '+' and overap in 3' of PCR fragment if( !rec.getReadNegativeStrandFlag() && fragment.getStart()< rec.getAlignmentEnd() && rec.getAlignmentEnd()< fragment.getEnd()) { rec.setMappingQuality(0); sw.addAlignment(rec); continue; } // contained int PCR fragment if(rec.getAlignmentStart()>= fragment.getStart() && rec.getAlignmentEnd()<=fragment.getEnd()) { sw.addAlignment(rec); continue; } final ReadClipper readClipper = new ReadClipper(); if(samProgramRecord.isPresent()) { readClipper.setProgramGroup(samProgramRecord.get().getId()); } rec = readClipper.clip(rec, fragment); sw.addAlignment(rec); } progress.finish(); return RETURN_OK; } catch(Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(iter); CloserUtil.close(sw); } } @Override public int doWork(List<String> args) { if(this.bedFile==null) { LOG.error("undefined bed file "); return -1; } BufferedReader r=null; SamReader samReader=null; try { final String inputName = oneFileOrNull(args); samReader = openSamReader(inputName); LOG.info("reading bed File "+this.bedFile); final Pattern tab= Pattern.compile("[\t]"); r= IOUtils.openFileForBufferedReading(this.bedFile); String line; while((line=r.readLine())!=null) { String tokens[]=tab.split(line); if(tokens.length<3) { LOG.error("Bad bed line "+line); return -1; } String chrom = tokens[0]; int chromStart1 = Integer.parseInt(tokens[1])+1; int chromEnd1 = Integer.parseInt(tokens[2])+0; if(chromStart1<1 || chromStart1>chromEnd1) { LOG.error("Bad bed line "+line); return -1; } Interval i =new Interval(chrom, chromStart1, chromEnd1); this.bedIntervals.put(i, i); } CloserUtil.close(r);r=null; return run(samReader); } catch (Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(r); CloserUtil.close(samReader); this.bedIntervals.clear();; } } public static void main(String[] args) { new PcrClipReads().instanceMain(args); } }