package com.github.lindenb.jvarkit.tools.cmpbams; import java.io.File; import java.io.PrintWriter; import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.regex.Pattern; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.PeekableIterator; /** BEGIN_DOC ## Example ### Example 1 ``` $ java -jar dist/commbams.jar --samtools \ -f but_metadata -delim '\n' \ B00GWFP_std.hg19.qname.bam B00GWFP_S1.hg19.qname.bam PANORAMIX:1:HJY2CCCXX:7:1101:1133:5388/1 83 chr5 21564864 40 151M = 21564540 -475 CTCCCAGAGAGAAGCATCAACAGCTTAGGGTGTAGTCTAAACAGAAATCTTGCACTCCTCCTGCAGTAGCGTCTCTATTTTTTATGCTGAACATTATTTGCTAATTCCAACTGGCTCTAAGCTAATGTGTTTCCCAGGTTTTCTCAATGAN AFAA<,,,<,,,,,,,,7,,,,7,,,,A7KKF<,F,,7,7,A,A7F7,K<A,,,,7,,,7KKKFAFA,7,A7F7,7,,,KFF,,AKKFFFF<<K<<KAFAKA,A,,A7,AAAFKFA,A,FKKAA,AKKKKFFFKF<KKKKKKKFFAAAA<# PANORAMIX:1:HJY2CCCXX:7:1101:1133:5388/1 77 * 0 0 * * 0 0 NTCATTGAGAAAACCTGGGAAACACATTAGCTTAGAGCCAGTTGGAATTAGCAAATAATGTTCAGCATAAAAAATAGAGACGCTACTGCAGGAGGAGTGCAAGATTTCTGTTTAGACTACACCCTAAGCTGTTGATGCTTCTCTCTGGGA !<AAAAFFKKKKKKK<FKFFFKKKKA,AAKKF,A,AFKFAAA,7A,,A,AKAFAK<<K<<FFFFKKA,,FFK,,,7,7F7A,7,AFAFKKK7,,,7,,,,A<K,7F7A,A,7,7,,F,<FKK7A,,,,7,,,,7,,,,,,,,<,,,<AAF PANORAMIX:1:HJY2CCCXX:7:1101:1133:5388/2 163 chr5 21564540 60 8S106M37S = 21564864 475 NTAAGAATATTTCACACTTAAAACAAAATCTGATTAGACAAACACTTTGATTGTTATTATTCGCGTATATCATCTACCAGAAGCAAATAGACATCTACTACATCTTTCAAGAAAGTTTACCTATCAATATTACTCAACTGGACCCAATAAT #<A,<,,A,,K<7FKFF,7,,AF,,7,7AAF,,7<<,,7,AF,,7,7A<,7FA,,7,,7F,,A7FKK7,7,,,,,7,,,,,<,<,,,,7,<,,,,,,7FF7AF<7,,<,,,,7,7,,,,,,,<,,,,,,,,,,,,,,,,,,<,,,,,,,,, PANORAMIX:1:HJY2CCCXX:7:1101:1133:5388/2 141 * 0 0 * * 0 0 NTAAGAATATTTCACACTTAAAACAAAATCTGATTAGACAAACACTTTGATTGTTATTATTCGCGTATATCATCTACCAGAAGCAAATAGACATCTACTACATCTTTCAAGAAAGTTTACCTATCAATATTACTCAACTGGACCCAATAA !<A,<,,A,,K<7FKFF,7,,AF,,7,7AAF,,7<<,,7,AF,,7,7A<,7FA,,7,,7F,,A7FKK7,7,,,,,7,,,,,<,<,,,,7,<,,,,,,7FF7AF<7,,<,,,,7,7,,,,,,,<,,,,,,,,,,,,,,,,,,<,,,,,,,, ``` ### Example 2 ``` $ java -jar dist/commbams.jar --samtools \ B00GWFP_std.hg19.qname.bam B00GWFP_S1.hg19.qname.bam . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:5388/1 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:5388/2 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:6513/1 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:6513/2 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:7181/1 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:7181/2 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:10205/1 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:10205/2 . PANORAMIX:1:HJY2CCCXX:7:1101:1133:10380/1 . . PANORAMIX:1:HJY2CCCXX:7:1101:1133:10380/2 . END_DOC */ @Program(name="commbams",description="Equivalent of unix 'comm' for bams sorted on queryname") public class CommBams extends Launcher { private enum WhatToPrint { name,but_metadata,all} private static final Logger LOG=Logger.build(CommBams.class).make(); @Parameter(names={"-o","--out"},description="output file . Default:stdout") private File outputFile = null; @Parameter(names={"-1","--hide1"},description="suppress read unique to file 1") private boolean hide1=false; @Parameter(names={"-2","--hide2"},description="suppress read unique to file 2") private boolean hide2=false; @Parameter(names={"-3","--hide3"},description="suppress reads present in both files") private boolean hide3=false; @Parameter(names={"-st","--samtools"},description="Data was sorted using samtools sort -n algorithm (!= picard) see https://github.com/samtools/hts-specs/issues/5") private boolean samtoolsquerysort = false; @Parameter(names={"-delim","--delimiter"},description="Output delimiter") private String delim = "\t"; @Parameter(names={"-empty","--empty"},description="Empty content symbol") private String emptySymbol = "."; @Parameter(names={"-f","--format"},description="What should I print ? (only the read name ? etc...)") private WhatToPrint whatPrint=WhatToPrint.name; private final Pattern tab=Pattern.compile("[\t]"); private static int side(final SAMRecord rec) { return CompareBams4.side(rec); } private static String readName(final SAMRecord rec) { if(rec.getReadPairedFlag()) { if(rec.getFirstOfPairFlag()) return rec.getReadName()+"/1"; if(rec.getSecondOfPairFlag()) return rec.getReadName()+"/2"; throw new RuntimeException("Side for flag "+rec.getReadName()+":"+rec.getFlags()+"?"); } else { return rec.getReadName(); } } private void dump(final PrintWriter out, final String name, int col ) { boolean first=true; for(int i=1;i<=3;++i) { if(i==1 && hide1) continue; if(i==2 && hide2) continue; if(i==3 && hide3) continue; if(!first) out.print(this.delim); if(i==col) { out.print(name); } else { out.print(this.emptySymbol); } first=false; } out.println(); } private void dump(final PrintWriter out, final SAMRecord rec0, final SAMRecord rec1 ) { switch(this.whatPrint) { case name: if(rec0==null && rec1!=null) { if(!hide2) dump(out,readName(rec1),2); } else if(rec0!=null && rec1==null) { if(!hide1) dump(out,readName(rec0),1); } else { if(!hide3) dump(out,readName(rec0),3); } break; case but_metadata: case all: { for(int side=0;side<2;++side) { final SAMRecord r= (side==0?rec0:rec1); if(r==null) { for(int j=0;j< 11;++j) { if(j>0) out.print('\t'); out.print(this.emptySymbol); } continue; } String samStr = r.getSAMString(); if(samStr.charAt(samStr.length()-1)=='\n') { samStr= samStr.substring(0,samStr.length()-1); } if(this.whatPrint.equals(WhatToPrint.all)) { out.print(samStr); } else { final String tokens[] = this.tab.split(samStr); tokens[0]=readName(r); for(int j=0;j< 11;++j) { if(j>0) out.print('\t'); out.print(j<tokens.length?tokens[j]:this.emptySymbol); } } if(side==0) out.print(this.delim); } out.println(); break; } } } @Override public int doWork(final List<String> args) { final Comparator<SAMRecord> comparator; if(args.size() !=2) { LOG.info("Expected two and only two bams please, but got "+args.size()); return -1; } if( this.samtoolsquerysort) { LOG.info("using the samtools sort -n comparator"); comparator = new CompareBams4.SamToolsReadNameComparator(); } else { comparator = new CompareBams4.SimpleReadNameComparator(); } final SamReader samFileReaders[]={null,null}; @SuppressWarnings("unchecked") final PeekableIterator<SAMRecord> iters[]=new PeekableIterator[]{null,null}; PrintWriter out=null; Optional<SAMRecord> rec0 = Optional.empty(); Optional<SAMRecord> rec1 = Optional.empty(); if(whatPrint.equals(WhatToPrint.name) && hide1 && hide2 && hide3) { LOG.error("all flags hide** are on"); return -1; } try { if( this.delim.equals("\\n")) this.delim = "\n"; for(int i=0;i< args.size() && i< samFileReaders.length;++i) { final String samFile=args.get(i); LOG.info("opening "+samFile); samFileReaders[i]=super.openSamReader(samFile); final SAMFileHeader header = samFileReaders[i].getFileHeader(); if(header.getSortOrder()!=SAMFileHeader.SortOrder.queryname) { LOG.error("Expected "+samFile+" to be sorted on "+SAMFileHeader.SortOrder.queryname+" but got "+header.getSortOrder()); return -1; } iters[i] = new PeekableIterator<>(samFileReaders[i].iterator()); } out= super.openFileOrStdoutAsPrintWriter(outputFile); for(;;) { for(int i=0;i< 2;++i) { Optional<SAMRecord> optRec = (i==0?rec0:rec1); if(!optRec.isPresent()) { while(iters[i].hasNext()) { final SAMRecord rec = iters[i].peek(); if(optRec.isPresent() && comparator.compare(optRec.get(),rec)>0) { LOG.error("Something is wrong in sort order of "+args.get(i)+" : got\n\t" +rec+" "+side(rec)+"\nafter\n\t"+ optRec.get()+" "+side(optRec.get())+"\nSee also option (samtools querysort)" ); return -1; } // equals else if( optRec.isPresent()&& comparator.compare(optRec.get(),rec)==0) { iters[i].next();//consumme } //it's a new else if(!optRec.isPresent()){ optRec = Optional.of(iters[i].next());//consumme if(i==0) { rec0=optRec; } else { rec1=optRec; } } // compare <0 else { break; } } } } if(!rec0.isPresent() && !rec1.isPresent()) break; if((!rec0.isPresent() && rec1.isPresent()) || (rec0.isPresent() && rec1.isPresent() && comparator.compare(rec0.get(),rec1.get())>0) ) { dump(out,null,rec1.get()); rec1 =Optional.empty(); } else if((rec0.isPresent() && !rec1.isPresent()) || (rec0.isPresent() && rec1.isPresent() && comparator.compare(rec0.get(), rec1.get())<0)) { dump(out,rec0.get(),null); rec0 =Optional.empty(); } else { dump(out,rec0.get(),rec1.get()); rec0 =Optional.empty(); rec1 =Optional.empty(); } } out.flush(); out.close();out=null; return 0; } catch(Exception err) { LOG.error(err); return -1; } finally { for(int i=0;i< samFileReaders.length;++i){ CloserUtil.close(iters[i]); CloserUtil.close(samFileReaders[i]); } CloserUtil.close(out); } } public static void main(final String[] args) { new CommBams().instanceMainWithExit(args); } }