package com.github.lindenb.jvarkit.tools.jaspar; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.readers.LineIteratorImpl; import htsjdk.tribble.readers.LineReader; import htsjdk.tribble.readers.SynchronousLineReader; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.io.IOUtils; import com.github.lindenb.jvarkit.lang.SubSequence; import com.github.lindenb.jvarkit.util.bio.RevCompCharSequence; import com.github.lindenb.jvarkit.util.jcommander.Launcher; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; @Program(name="genomicjaspar",description="Find jaspar patterns in FASTA sequences. Reports a BED file.") public class GenomicJaspar extends Launcher { private static final Logger LOG = Logger.build(GenomicJaspar.class).make(); @Parameter(names="-J",description=" jaspar PFM uri. required. example: http://jaspar.genereg.net/html/DOWNLOAD/JASPAR_CORE/pfm/nonredundant/pfm_vertebrates.txt") private String jasparUri=null; @Parameter(names="-f",description="(0<ratio<1) fraction of best score") private double fraction_of_max=0.95; private List<Matrix> jasparDb=new ArrayList<Matrix>(); private GenomicJaspar() { } private void digest( String seqName, int position0, final StringBuilder sequence ) { for(final Matrix matrix:this.jasparDb) { if(matrix.length()>sequence.length()) continue; CharSequence forward=new SubSequence(sequence,0,matrix.length()); CharSequence revcomp=new RevCompCharSequence(forward); for(int strand=0;strand<2;++strand) { double score= matrix.score(strand==0?forward:revcomp); if(score<=0) continue; if(score>= matrix.max()*this.fraction_of_max) { System.out.print(seqName); System.out.print('\t'); System.out.print(position0); System.out.print('\t'); System.out.print(position0+matrix.length()); System.out.print('\t'); System.out.print(matrix.getName()); System.out.print('\t'); System.out.print((int)(1000.0*(score/matrix.max()))); System.out.print('\t'); System.out.print(strand==1?'-':'+'); System.out.print('\t'); System.out.print(matrix.length()); System.out.print('\t'); System.out.print(matrix.getArchetype()); System.out.print('\t'); System.out.print(strand==0?forward:revcomp); System.out.println(); break; } } } } private void run(Reader in) throws IOException { int longest=0; for(Matrix m:this.jasparDb) { longest=Math.max(m.length(), longest); } LOG.info("longest:"+longest); String seqName=""; int position0=0; StringBuilder sequences=new StringBuilder(longest); for(;;) { int c=in.read(); if(c==-1 || c=='>') { while(sequences.length()!=0) { digest(seqName,position0,sequences); ++position0; sequences.deleteCharAt(0); } if(c==-1) break; StringBuilder b=new StringBuilder(); while((c=in.read())!=-1 && c!='\n') { b.append((char)c); } seqName=b.toString(); position0=0; } else if(!Character.isWhitespace(c)) { sequences.append((char)Character.toUpperCase(c)); if(sequences.length()==longest) { digest(seqName,position0,sequences); if(System.out.checkError()) return ; ++position0; sequences.deleteCharAt(0); if(position0%1000000==0) { LOG.info(seqName+" "+position0); } } } } } @Override public int doWork(List<String> args) { if(jasparUri==null) { LOG.error("Undefined jaspar-uri"); return -1; } try { LOG.info("Reading "+jasparUri); LineReader lr=new SynchronousLineReader(IOUtils.openURIForReading(jasparUri)); LineIterator liter=new LineIteratorImpl(lr); Iterator<Matrix> miter=Matrix.iterator(liter); while(miter.hasNext()) { Matrix matrix = miter.next(); this.jasparDb.add(matrix.convertToPWM()); } lr.close(); LOG.info("JASPAR size: "+this.jasparDb.size()); if(args.isEmpty()) { LOG.info("Reading from stdin"); run(new InputStreamReader(stdin())); } else { for(final String fname:args) { LOG.info("Opening "+fname); Reader in=IOUtils.openURIForBufferedReading(fname); run(in); in.close(); } } return 0; } catch(Throwable err) { LOG.error(err); return -1; } } /** * @param args */ public static void main(String[] args) { new GenomicJaspar().instanceMainWithExit(args); } }