package com.github.lindenb.jvarkit.tools.genscan; import java.awt.Color; import java.awt.Graphics2D; import java.awt.Shape; import java.awt.geom.Line2D; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import javax.imageio.ImageIO; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.readers.LineIteratorImpl; import htsjdk.tribble.readers.SynchronousLineReader; import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; import com.beust.jcommander.Parameter; import com.github.lindenb.jvarkit.io.IOUtils; import com.github.lindenb.jvarkit.util.jcommander.Program; import com.github.lindenb.jvarkit.util.log.Logger; import com.github.lindenb.jvarkit.util.picard.AbstractDataCodec; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.SortingCollection; /** * GenScan * */ @Program(name="genscan",description="Paint a Genome Scan picture from a Tab delimited file (CHROM/POS/VALUE1/VALUE2/....).") public class GenScan extends AbstractGeneScan { private static final Logger LOG = Logger.build(GenScan.class).make(); @Parameter(names={"-o","--output"},description="Output file. Optional . Default: stdout") private File outputFile = null; @Parameter(names={"-R","--reference"},description="Fasta indexed reference.") private File faidxFile = null; // @Option(shortName="IIS",doc="Input is a BAM/SAM file",optional=false) private Map<String,ChromInfo> chrom2chromInfo=new HashMap<String,ChromInfo>(); private SortingCollection<DataPoint> dataPoints=null; private boolean drawLines=true; private boolean firstLineIsHeader=true; private class DataPoint implements Comparable<DataPoint> { int tid; int pos; double values[]=new double[samples.size()]; @Override public int compareTo(DataPoint other) { int i=this.tid-other.tid; if(i!=0) return i; i=this.pos-other.pos; if(i!=0) return i; return 0; } } private class DataPointCodec extends AbstractDataCodec<DataPoint> { @Override public DataPoint decode(DataInputStream dis) throws IOException { int tid; try { tid=dis.readInt();} catch(IOException err) { return null;} DataPoint dpt=new DataPoint(); dpt.tid=tid; dpt.pos=dis.readInt(); for(int i=0;i< dpt.values.length;++i) { dpt.values[i]=dis.readDouble(); } return dpt; } @Override public void encode(DataOutputStream out, DataPoint o) throws IOException { out.writeInt(o.tid); out.writeInt(o.pos); for(int i=0;i< o.values.length;++i) { out.writeDouble(o.values[i]); } } @Override public DataPointCodec clone() { return new DataPointCodec(); } } /* protected int doWork() { CloseableIterator<DataPoint> iter=null; IndexedFastaSequenceFile faidx=null; try { this.chromInfos=new ArrayList<GenScan.ChromInfo>(dict.size()); this.dataPoints= SortingCollection.newInstance(DataPoint.class, new DataPointCodec() , new Comparator<DataPoint>() { @Override public int compare(DataPoint p0, DataPoint p1) { return p0.compareTo(p1); } }, super.MAX_RECORDS_IN_RAM); if(IN.isEmpty()) { LOG.info("reading from stdin"); read(System.in); } else { for(File f: IN) { LOG.info("reading "+f); InputStream in=IoUtil.openFileForReading(f); read(in); in.close(); } } dataPoints.setDestructiveIteration(true); dataPoints.doneAdding(); if(DICARD_UNSEEN_CHROM) { int n=0; while(n<this.chromInfos.size()) { if(this.chromInfos.get(n).max_pos < this.chromInfos.get(n).min_pos) { this.chromInfos.remove(n); } else { n++; } } } long genomeSize=0L; for(ChromInfo ci:this.chromInfos) { genomeSize+=ci.getSequenceLength(); } double x=this.insets.left; for(ChromInfo ci:this.chromInfos) { ci.x=x; ci.width=(ci.getSequenceLength()/(double)genomeSize)*(WIDTH-(insets.left+insets.right)); x=ci.x+ci.width; } double log10=Math.pow(10,Math.floor(Math.log10(this.max_value))); //max_value=(this.max_value/log10); this.max_value=Math.max(this.max_value,Math.ceil(this.max_value/log10)*log10); log10=Math.pow(10,Math.floor(Math.log10(this.min_value))); this.min_value=Math.min(this.min_value,Math.floor(this.min_value/log10)*log10); } catch (Exception e) { e.printStackTrace(); return -1; } finally { CloserUtil.close(iter); CloserUtil.close(faidx); if(dataPoints!=null) dataPoints.cleanup(); } return 0; }*/ protected void drawPoints(Graphics2D g) { Point2D.Double[] previous=null; int prev_tid=-1; if(drawLines) { previous=new Point2D.Double[samples.size()]; } CloseableIterator<DataPoint> iter=dataPoints.iterator(); while(iter.hasNext()) { DataPoint dpt=iter.next(); ChromInfo ci=this.chromInfos.get(dpt.tid); if(!ci.visible) continue; double x=ci.x; if(drawLines && prev_tid!=ci.tid) { Arrays.fill(previous, null); prev_tid=ci.tid; } if(!super.DISCARD_BOUNDS) { x+=(((double)dpt.pos/ci.minmaxBase.getMax())*ci.width); } else { x+=((((double)dpt.pos-ci.minmaxBase.getMin())/ci.minmaxBase.getAmplitude())*ci.width); } for(int i=0;i< getSamples().size();++i) { Sample sample=getSamples().get(i); if(!sample.visible) continue; double value=dpt.values[i]; if(Double.isNaN(value)) continue; if(!drawLines && !this.minMaxY.contains(value)) { continue; } double y= sample.y + sample.height -sample.height*this.minMaxY.getFraction(value); g.setColor(i%2==0?Color.YELLOW:Color.GREEN); Shape clip=g.getClip(); g.setClip(new Rectangle2D.Double( ci.x,sample.y, ci.width,sample.height )); if(drawLines) { if(previous[i]!=null) { g.draw(new Line2D.Double( previous[i].x, previous[i].y, x, y )); } previous[i]=new Point2D.Double(x,y); } else { g.draw(new Line2D.Double(x-1,y,x+1, y)); g.draw(new Line2D.Double(x, y-1,x,y+1)); } g.setClip(clip); g.setColor(Color.BLACK); } } iter.close(); iter=null; } private void readTsv(InputStream in) throws IOException { LOG.info("reading tsv"); int nLines=0; Pattern tab=Pattern.compile("[\t]"); @SuppressWarnings("resource") LineIterator r=new LineIteratorImpl(new SynchronousLineReader(in)); boolean foundHeader=false; if(firstLineIsHeader) { if(!r.hasNext()) throw new IOException("First line/header is Missing"); String line=r.next(); String tokens[]=tab.split(line); if(tokens.length<4) throw new IOException("Expected at least 4 columns in "+line); for(int c=3;c<tokens.length;++c) { Sample sample=new Sample(); sample.sample_id=this.samples.size(); sample.name=tokens[c]; this.samples.add(sample); LOG.info("Adding sample "+sample.name+" "+samples.size()); } foundHeader=true; ++nLines; } while(r.hasNext()) { String line=r.next(); ++nLines; if(line.isEmpty() ) continue; String tokens[]=tab.split(line); if(tokens.length<4) throw new IOException("Expected at least 4 columns in "+line); if(!foundHeader && !firstLineIsHeader) { for(int c=3;c<tokens.length;++c) { Sample sample=new Sample(); sample.sample_id=this.samples.size(); sample.name="$"+(c-2); this.samples.add(sample); LOG.info("Adding sample "+sample.name+" "+samples.size()); } foundHeader=true; } else { if(tokens.length!=(3+samples.size())) { throw new IOException( "Expected at least "+(3+samples.size())+" columns in "+line+" but got "+tokens.length); } } DataPoint pt=new DataPoint(); ChromInfo ci=this.chrom2chromInfo.get(tokens[0]); if(ci==null) { if(this.faidxFile!=null) //dict provided by user { LOG.warning("chromosome "+tokens[0]+" was not defined in dictionary."); continue; } ci=new ChromInfo(); ci.sequenceName=tokens[0]; ci.tid=this.chromInfos.size(); this.chromInfos.add(ci); this.chrom2chromInfo.put(tokens[0], ci); } pt.tid=ci.tid; try { pt.pos=Integer.parseInt(tokens[1]); if(pt.pos<0) { LOG.warning("Position <0 in "+line); continue; } if(this.faidxFile!=null && (pt.pos< 0 || pt.pos > ci.dictSequenceLength)) { LOG.warning("Position 0<"+pt.pos+"<"+ci.dictSequenceLength+" out of range in "+line); continue; } } catch (Exception e) { LOG.warning("bad pos in "+line); continue; } for(int i=0;i< samples.size();++i) { try { pt.values[i]=Double.parseDouble(tokens[3+i]); if(Double.isNaN(pt.values[i])) { LOG.warning("bad value in "+tokens[0]+":"+tokens[1]+":"+tokens[2]+"="+tokens[3+i]); } } catch (Exception e) { LOG.warning("bad value in "+tokens[0]+":"+tokens[1]+":"+tokens[2]+"="+tokens[3+i]); pt.values[i]=Double.NaN; } samples.get(i).minmax.visit(pt.values[i]); } this.dataPoints.add(pt); if(this.faidxFile==null) { ci.minmaxBase.visit(pt.pos); } } CloserUtil.close(r); LOG.info("num lines:"+nLines); } @Override protected List<ChromInfo> getChromInfos() { return this.chromInfos; } @Override public int doWork(List<String> args) { InputStream in=null; try { this.dataPoints = SortingCollection.newInstance(DataPoint.class, new DataPointCodec(), new Comparator<GenScan.DataPoint>() { @Override public int compare(DataPoint o1, DataPoint o2) { return o1.compareTo(o2); } }, 50000); if(args.isEmpty()) { LOG.info("Reading from stdin."); in=stdin(); } else if(args.size()==1) { String filename=args.get(0); LOG.info("Reading from "+filename); in=IOUtils.openURIForReading(filename); } else { LOG.error("Illegal number of arguments"); return -1; } if(faidxFile!=null) { LOG.info("Reading "+faidxFile); final SAMSequenceDictionary dict= SAMSequenceDictionaryExtractor.extractDictionary(faidxFile); for(final SAMSequenceRecord rec: dict.getSequences()) { ChromInfo ci=new ChromInfo(); ci.dictSequenceLength=rec.getSequenceLength(); ci.sequenceName=rec.getSequenceName(); ci.tid=chromInfos.size(); this.chromInfos.add(ci); this.chrom2chromInfo.put(ci.sequenceName, ci); } } readTsv(in); this.dataPoints.doneAdding(); this.dataPoints.setDestructiveIteration(true); BufferedImage img=makeImage(); this.dataPoints.cleanup(); this.dataPoints=null; if(outputFile==null) { showGui(img); } else { ImageIO.write(img, "JPG", outputFile); } return 0; } catch(Exception err) { LOG.error(err); return -1; } finally { CloserUtil.close(in); try { if(this.dataPoints!=null) this.dataPoints.cleanup();} catch(Exception er){} } } public static void main(String[] args) { new GenScan().instanceMainWithExit(args); } }