package plugins.rplot; import java.io.File; import java.util.ArrayList; import java.util.TreeMap; import matrix.DataMatrixInstance; import org.molgenis.data.Data; import org.molgenis.util.RScript; import org.molgenis.util.RScriptException; import org.molgenis.xgap.Chromosome; import plugins.qtlfinder.QtlPlotDataPoint; public class MakeRPlot { public static File plot(Data data, DataMatrixInstance instance, String rowName, String colName, String action, String type, int width, int height) throws Exception { String rowType = data.getTargetType(); // shorthand String colType = data.getFeatureType(); // shorthand // Data data = model.getSelectedData(); //shorthand Object[] plotThis = null; PlotParameters params = new PlotParameters(); if (action.endsWith("row")) { if (data.getValueType().equals("Text")) { params.setTitle(rowType + " " + rowName); params.setxLabel("Type of " + colType); params.setyLabel("# of " + colType); } else if (data.getValueType().equals("Decimal")) { params.setTitle(rowType + " " + rowName); params.setxLabel(colType); params.setyLabel(rowType + " value"); } plotThis = instance.getRow(rowName); } else if (action.endsWith("col")) { if (data.getValueType().equals("Text")) { params.setTitle(colType + " " + colName); params.setxLabel("Type of " + rowType); params.setyLabel("# of " + rowType); } else if (data.getValueType().equals("Decimal")) { params.setTitle(colType + " " + colName); params.setxLabel(rowType); params.setyLabel(colType + " value"); } plotThis = instance.getCol(colName); } else if (action.endsWith("heatmap")) { params.setTitle(instance.getData().getName()); params.setxLabel(""); params.setyLabel(""); } else { throw new Exception("unrecognized action: " + action); } File tmpImg = new File(System.getProperty("java.io.tmpdir") + File.separator + "rplot" + System.nanoTime() + ".png"); params.setType(type); params.setWidth(width); params.setHeight(height); if (action.endsWith("col") || action.endsWith("row")) { if (type.equals("boxplot")) { params.setFunction("boxplot"); } else { params.setFunction("plot"); } new ScriptInstance(plotThis, tmpImg, params); } else if (action.endsWith("heatmap")) { new HeatmapScriptInstance(instance, tmpImg, params); } return tmpImg; } public File qtlPlot(String plotName, TreeMap<Long, QtlPlotDataPoint> data, long genePos, int width, int height, String ylab, String filePrefix) throws RScriptException { double[] lodscores = new double[data.size()]; long[] bplocs = new long[data.size()]; String[] chromosomes = new String[data.size()]; int index = 0; for (Long key : data.keySet()) { lodscores[index] = data.get(key).getLodScore(); bplocs[index] = data.get(key).getBpLoc(); chromosomes[index] = data.get(key).getChromosome(); index++; } return qtlPlot(plotName, lodscores, bplocs, chromosomes, genePos, width, height, ylab, filePrefix); } /** * Bad: specific for WormQTL... * * @param dataPoints * @param width * @param height * @param title * @param chromosomes * @return */ public File wormqtl_ProfilePlot(File dataPoints, int width, int height, String title, ArrayList<Chromosome> chromosomes) { String chrNamesAppend = ""; for (int i = 0; i < chromosomes.size(); i++) { chrNamesAppend += "\"" + chromosomes.get(i).getName() + "\""; if (i != chromosomes.size() - 1) { chrNamesAppend += ","; } } String chrLengthsAppend = ""; for (int i = 0; i < chromosomes.size(); i++) { chrLengthsAppend += chromosomes.get(i).getBpLength(); if (i != chromosomes.size() - 1) { chrLengthsAppend += ","; } } long time = System.nanoTime(); File tmpImg = new File(System.getProperty("java.io.tmpdir") + File.separator + "qtl_multiplot_" + time + ".png"); RScript script = new RScript(); RScript.R_COMMAND = "R CMD BATCH --vanilla --slave"; script.append("drawChrRect<-function()"); script.append("{"); script.append(" par(new=TRUE)"); script.append(" chr.lengths<-c(" + chrLengthsAppend + ")/10^6"); script.append(" x.text <- diffinv(chr.lengths)[-length(diffinv(chr.lengths))]+chr.lengths/2"); script.append(" y.text <- max(as.numeric(plotMe[,7]))"); script.append(" "); script.append(" for(i in 1:length(chr.lengths))"); script.append(" {"); script.append(" if ( i %in% c(1,3,5,7) ){ rect(sum(chr.lengths[1:i])-sum(chr.lengths[i]),0,sum(chr.lengths[1:i]),2*y.text,col=\"grey\",border=F)}"); script.append(" if( i %in% 7){ lines(c(sum(chr.lengths),sum(chr.lengths)),c(0,2*y.text),lwd=3,col=\"grey\") }"); script.append(" }"); script.append(" text( x.text,y.text,c(" + chrNamesAppend + "),cex=2,font=2) "); script.append(" par(new=TRUE)"); script.append("}"); script.append(" "); script.append("imagefile <- \"" + tmpImg.getAbsolutePath().replace("\\", "/") + "\";"); script.append("png(imagefile, width = 2024, height = 1024)"); script.append("plotMe <- read.table(\"" + dataPoints.getAbsolutePath().replace("\\", "/") + "\")"); script.append(" "); script.append("probenames <- paste(plotMe[,5],plotMe[,8],sep=\":\")"); script.append("plotMe[,5] <- probenames"); script.append(" par(fig=c(0,0.85,0,1),mgp=c(4,2,0),mai=c(1,1.5,1,0.05))"); script.append("plot(0,0,type=\"n\",xlim=c(0,102),ylim=c(0,max(as.numeric(plotMe[,7]))),cex.lab=3,cex.axis=3,main=\"Profile plot for " + title + "\","); script.append(" xlab=\"Genome (MB)\", ylab=\"LOD\",cex.main=4)"); script.append("drawChrRect()"); script.append("nr.of.lines <- length(unique(probenames))"); script.append("for( i in 1:nr.of.lines){"); script.append("x <- plotMe[plotMe[,5] == unique(probenames)[i],4]/1e6"); script.append("y <- plotMe[plotMe[,5] == unique(probenames)[i],7]"); script.append("lines(x,y,lwd=3,col=rainbow(nr.of.lines)[i])"); script.append("}"); script.append("box()"); script.append(" "); script.append("par(fig=c(0.85,1,0.2,1),new=T,mar=c(0,0,2,0))"); script.append("matrix.id <- c(7,10,11,14,17,18,21,36,37,38)"); script.append("matrix.name <- c(\"7: Age group 1\","); script.append(" \"10: Interaction Age 1&2\","); script.append(" \"11: Constitutive Age 1&2\","); script.append(" \"14: Age group 2\","); script.append(" \"17: Interaction Age 2&3\","); script.append(" \"18: Constitutive Age 2&3\","); script.append(" \"21: Age group 3\","); script.append(" \"36: Rockman etal 2010\","); script.append(" \"37: Grown at 16oC\","); script.append(" \"38: Grown at 24oC\")"); script.append("plot(0,0,type=\"n\",xlim=c(0,10),ylim=c(0,nr.of.lines),axes=F,xlab=\"\",ylab=\"\",main=\"Legend\",cex.main=2)"); script.append("for( i in 1:nr.of.lines){"); script.append("lines(c(1,9),c(i,i),lwd=3,col=rainbow(nr.of.lines)[i])"); script.append("txt.nr <- plotMe[plotMe[,5] == unique(probenames)[i],8][1]"); script.append("text(5,i-0.1,unique(probenames)[i],font=2,cex=1.3)"); script.append("text(5,i-0.4,matrix.name[matrix.id == txt.nr],cex=1.3)"); script.append("}"); script.append("box()"); script.append("par(fig=c(0.85,1,0,0.2),new=T,mar=c(0,0,0,0))"); script.append("plot(0,0,type=\"n\",xlim=c(0,10),ylim=c(0,4),axes=F,xlab=\"\",ylab=\"\")"); script.append("text(0,3.75,\"7, 10, 11, 14, 17, 18, 21:\",adj=0,cex=2)"); script.append("text(0,3.25,\"Vinuela & snoek etal 2010\",adj=0,cex=2)"); script.append("text(0,2.5,\"36:\",adj=0,cex=2)"); script.append("text(0,2,\"Rockman etal 2010\",adj=0,cex=2)"); script.append("text(0,1.25,\"37, 38:\",adj=0,cex=2)"); script.append("text(0,0.75,\"Li etal 2006\",adj=0,cex=2)"); script.append("box()"); script.append(" "); // print to file script.append("dev.off()"); // may not fail and crash the other plots try { script.execute(System.getProperty("java.io.tmpdir") + File.separator + "qtl_multiplot_" + time + ".R"); } catch (RScriptException e) { e.printStackTrace(); } return tmpImg; } public File wormqtl_MultiPlot(File dataPoints, int width, int height, String title, ArrayList<Chromosome> chromosomes) { // TODO: make dynamic using the chromosomes.. String chrNamesAppend = ""; for (int i = 0; i < chromosomes.size(); i++) { chrNamesAppend += "\"" + chromosomes.get(i).getName() + "\""; if (i != chromosomes.size() - 1) { chrNamesAppend += ","; } } long cumulative = 0; String cumuChrLengthsAppend = ""; for (int i = 0; i < chromosomes.size(); i++) { if (i == 0) { cumulative = 0; } else if (i == chromosomes.size() - 1) { cumulative += chromosomes.get(i - 1).getBpLength(); cumuChrLengthsAppend += cumulative + ""; } else { cumulative += chromosomes.get(i - 1).getBpLength(); cumuChrLengthsAppend += cumulative + ","; } // for(int cumu = 0; cumu < i; cumu ++) // { // // } // chrLengthsAppend += chromosomes.get(i).getBpLength(); // if(i != chromosomes.size()-1){ chrLengthsAppend += ","; } } long time = System.nanoTime(); File tmpImg = new File(System.getProperty("java.io.tmpdir") + File.separator + "qtl_multiplot_" + time + ".png"); RScript script = new RScript(); RScript.R_COMMAND = "R CMD BATCH --vanilla --slave"; // lines input example: // plotMe <- rbind(plotMe, c(807, "C5M5_2", 66810758, "A_12_P172557", // 15184683, 0.0127419530093572)) script.append("imagefile <- \"" + tmpImg.getAbsolutePath().replace("\\", "/") + "\";"); script.append("png(imagefile, width = " + width + ", height = " + height + ")"); script.append("plotMe <- read.table(\"" + dataPoints.getAbsolutePath().replace("\\", "/") + "\")"); // FIXME: worm specific!!! bad!! // script.append("chr_startpos <- c(15072423,15072423+15279345,15072423+15279345+13783700,15072423+15279345+13783700+17493793,15072423+15279345+13783700+17493793+20924149)"); script.append("chr_startpos <- c(" + cumuChrLengthsAppend + ")"); script.append("lodscores <- as.numeric(plotMe[,7])"); script.append("lodscores[which(lodscores > 10)] <- 10"); script.append("lodscores[which(lodscores < 0)] <- 0"); script.append("probenames <- paste(plotMe[,5],plotMe[,8],sep=\":\")"); script.append("plotMe[,5] <- probenames"); script.append("ramp <- colorRamp(c(\"lightgray\",\"blue\",\"black\"))"); script.append("use.col <- rgb( ramp(seq(0, 1, length = 100)), max = 255)"); script.append("datasetids <- unlist(lapply(strsplit(unique(as.character(plotMe[,5])),\":\"),\"[\",2))"); script.append("p <- datasetids[1]"); script.append("cl <- 0"); script.append("cnt <- 1"); script.append("for(x in datasetids){"); script.append(" if(x != p) cl <- c(cl,cnt)"); script.append(" p <- x"); script.append(" cnt <- cnt+1"); script.append("}"); script.append("cl <- c(cl,length(datasetids))"); script.append("op <- par(mai=c(1,2,1,1))"); script.append("plot(c(0, max(as.numeric(plotMe[,4])/1000000)),c(1,length(unique(probenames))),type='n',main=\"Heat plot for " + title + "\", yaxt='n',ylab=\"\", xlab=\"Location (cumulative Mbp)\")"); script.append("cnt <- 1"); script.append("for(x in unique(probenames)){"); script.append(" toplot <- which(plotMe[,5]==x)"); script.append(" points(as.numeric(plotMe[toplot,4])/1000000, rep(cnt,length(toplot)),cex=1, col=use.col[round(lodscores[toplot]*9)+1],pch=15)"); script.append(" points(as.numeric(plotMe[toplot[1],6])/1000000, cnt, pch=18, cex=1.5, col=\"red\")"); script.append(" cnt <- cnt + 1"); script.append("}"); // script.append("axis(2,at=1:length(unique(probenames)),unlist(lapply(strsplit(unique(probenames),\":\"),\"[\",1)),las=1,cex.axis=0.7)"); // //removes dataset id script.append("axis(2,at=1:length(unique(probenames)),unique(probenames),las=1,cex.axis=1)"); script.append("abline(v=(chr_startpos/1000000)+.5,col='red')"); script.append("dl <- cl-1"); script.append("dl[1] <- cl[1]"); script.append("dl[length(cl)] <- cl[length(cl)]"); script.append("abline(h=dl+0.5,col=\"black\")"); script.append("cnt <- 1"); script.append("p <- 1"); script.append("cc <- 1"); script.append("for(x in cl[-1]){"); script.append(" text(x=-1.5, cc + (x-p)/2, labels=unique(datasetids)[cnt],cex=1.3)"); script.append(" cnt <- cnt + 1"); script.append(" cc <- cc+((x-p))"); script.append(" p <- x"); script.append("}"); // print to file script.append("dev.off()"); // may not fail and crash the other plots try { script.execute(System.getProperty("java.io.tmpdir") + File.separator + "qtl_multiplot_" + time + ".R"); } catch (RScriptException e) { e.printStackTrace(); } return tmpImg; } public File wormqtl_ProfilePlot_prev(File dataPoints, int width, int height, String title, ArrayList<Chromosome> chromosomes) { long time = System.nanoTime(); File tmpImg = new File(System.getProperty("java.io.tmpdir") + File.separator + "qtl_regular_" + time + ".png"); RScript script = new RScript(); RScript.R_COMMAND = "R CMD BATCH --vanilla --slave"; appendDrawChromosomes(script, chromosomes); script.append("plotMe <- read.table(\"" + dataPoints.getAbsolutePath().replace("\\", "/") + "\")"); script.append("imagefile <- \"" + tmpImg.getAbsolutePath().replace("\\", "/") + "\";"); script.append("lodscores <- as.numeric(plotMe[,7])"); script.append("lodscores[which(lodscores < 0)] <- 0"); script.append("probenames <- paste(plotMe[,5],plotMe[,8],sep=\":\")"); script.append("plotMe[,5] <- probenames"); script.append("if(length(unique(probenames))<=7){"); script.append(" png(imagefile, width = 2024, height = 2024)"); script.append(" op <- par(mai=c(1,1,1,1))"); script.append(" par(mfrow=c(length(unique(probenames)),1)) "); script.append(" for(x in unique(probenames)){"); script.append(" toplot <- which(plotMe[,5]==x)"); script.append(" plot( as.numeric(plotMe[toplot,4])/1000000, lodscores[toplot],type=\"n\",xlab=\"\",ylab=\"\",cex.axis=2.5)"); script.append(" drawChrStrips()"); script.append(" plot( as.numeric(plotMe[toplot,4])/1000000, lodscores[toplot],type=\"l\",col=\"blue\",lwd=2,ylab=\"LOD\",xlab=\"\",main=x,cex.main=2.5,cex.lab=2.5,cex.axis=2.5) "); script.append(" points(as.numeric(plotMe[toplot[1],6])/1000000, 0, pch=25, cex=3, col=\"red\") "); script.append(" } "); script.append("}else{"); script.append(" png(imagefile, width = 2024, height = 1024)"); script.append(" op <- par(mai=c(1,1,1,1))"); script.append(" qtl.all <- NULL"); script.append(" for(x in unique(probenames)){"); script.append(" toplot <- which(plotMe[,5]==x)"); script.append(" qtl.all<- rbind(qtl.all,lodscores[toplot])"); script.append(" }"); script.append(" matplot( as.numeric(plotMe[toplot,4])/1000000, t(qtl.all),type=\"n\",xlab=\"\",ylab=\"\",cex.axis=2.5)"); script.append(" drawChrStrips() "); script.append(" matplot( as.numeric(plotMe[toplot,4])/1000000, t(qtl.all),type=\"l\", lty=rep(1,length(unique(probenames))),col=1:length(unique(probenames)),lwd=2,ylab=\"LOD\",xlab=\"Genome (Mb)\",main=\"QTLs for " + title + "\",cex.main=2.5,cex.lab=2.5,cex.axis=2.5) "); script.append("}"); // print to file script.append("dev.off()"); // may not fail and crash the other plots try { script.execute(System.getProperty("java.io.tmpdir") + File.separator + "qtl_regular_" + time + ".R"); } catch (RScriptException e) { e.printStackTrace(); } return tmpImg; } public void appendDrawChromosomes(RScript script, ArrayList<Chromosome> chromosomes) { String chrNamesAppend = ""; for (int i = 0; i < chromosomes.size(); i++) { chrNamesAppend += "\"" + chromosomes.get(i).getName() + "\""; if (i != chromosomes.size() - 1) { chrNamesAppend += ","; } } String chrLengthsAppend = ""; for (int i = 0; i < chromosomes.size(); i++) { chrLengthsAppend += chromosomes.get(i).getBpLength(); if (i != chromosomes.size() - 1) { chrLengthsAppend += ","; } } script.append("drawChrStrips<-function()"); script.append("{"); script.append(" par(new=TRUE)"); script.append(" chr.lengths<-c(" + chrLengthsAppend + ")/10^6"); script.append(" nn <- 100"); script.append(" chrStrips<-seq(0,0,length=100)"); script.append(" x.text <- diffinv(chr.lengths)[-length(diffinv(chr.lengths))]+chr.lengths/2"); script.append(" y.text <- 102"); script.append(" text( x.text,y.text,c(" + chrNamesAppend + "),cex=1.5)"); script.append(" for(i in 1:length(chr.lengths))"); script.append(" {"); script.append(" if(i%%2==0){ next; }"); script.append(" lim <- c( diffinv(chr.lengths)[i], diffinv(chr.lengths)[i+1])"); script.append(" for (j in 1: nn)"); script.append(" {"); script.append(" lines(c(lim[1]+(j*(lim[2]-lim[1])/nn), lim[1]+(j*(lim[2]-lim[1])/nn)), c(0, 100), col=grey(0.95),lwd=2)"); script.append(" }"); script.append(" }"); script.append(" par(new=TRUE)"); script.append("}"); } public File wormqtl_CisTransPlot(File dataPoints, int width, int height, String title, ArrayList<Chromosome> chromosomes) { long time = System.nanoTime(); File tmpImg = new File(System.getProperty("java.io.tmpdir") + File.separator + "qtl_cistrans_" + time + ".png"); RScript script = new RScript(); RScript.R_COMMAND = "R CMD BATCH --vanilla --slave"; script.append("plotMe <- read.table(\"" + dataPoints.getAbsolutePath().replace("\\", "/") + "\")"); script.append("imagefile <- \"" + tmpImg.getAbsolutePath().replace("\\", "/") + "\";"); script.append("png(imagefile, width = " + width + ", height = " + height + ")"); script.append("op <- par(mai=c(1,2,1,1))"); script.append("min.qtl <- 2"); script.append("my.scale <- 4"); appendDrawChromosomes(script, chromosomes); script.append("my.offset <- 1000000/10^6"); script.append("plot(c(0,max(as.numeric(plotMe[,4]))),c(0,max(as.numeric(plotMe[,6]))),type='n',main=\"Cis-trans plot for " + title + "\",ylab=\"Probe position (Mb)\", xlab=\"Marker position (Mb)\",xlim=c(0,101),ylim=c(0,101),cex.lab=1.5,cex.main=1.5,cex.axis=1.5)"); script.append("drawChrStrips()"); script.append("points(as.numeric(plotMe[,4])/10^6,as.numeric(plotMe[,6])/10^6,cex=(plotMe[,7]/my.scale) * as.numeric(plotMe[,7] >= min.qtl), col=as.numeric(plotMe[,3]),pch=20)"); script.append("abline(-my.offset,1,col=\"gray\",lty=2)"); script.append("abline(my.offset,1,col=\"gray\",lty=2)"); script.append("abline(0,1,col=\"black\",lty=1)"); // print to file script.append("dev.off()"); // may not fail and crash the other plots try { script.execute(System.getProperty("java.io.tmpdir") + File.separator + "qtl_cistrans_" + time + ".R"); } catch (RScriptException e) { e.printStackTrace(); } return tmpImg; } // all inputs must be sorted and of equal length!! // create QTL plot scaling by incrementing basepair position // give markers colours based on their chromosome // no missing values allowed! public File qtlPlot(String plotName, double[] lodscores, long[] bplocs, String[] chromosomes, long genePos, int width, int height, String ylab, String filePrefix) throws RScriptException { File tmpImg = new File(System.getProperty("java.io.tmpdir") + File.separator + filePrefix + "_rplot" + System.nanoTime() + ".png"); RScript script = new RScript(); RScript.R_COMMAND = "R CMD BATCH --vanilla --slave"; script.append("imagefile <- \"" + tmpImg.getAbsolutePath().replace("\\", "/") + "\";"); script.append("dataVector <- rep(0," + lodscores.length + ");"); script.append("locs <- rep(0," + lodscores.length + ");"); script.append("chrs <- rep(0," + lodscores.length + ");"); for (int i = 0; i < lodscores.length; i++) { script.append("dataVector[" + (i + 1) + "] <- " + lodscores[i]); script.append("locs[" + (i + 1) + "] <- " + bplocs[i]); script.append("chrs[" + (i + 1) + "] <- \"" + chromosomes[i] + "\""); } script.append("chrs <- as.numeric(as.factor(chrs))+1"); script.append("pos <- " + genePos); script.append("png(imagefile, width = " + width + ", height = " + height + ")"); // start plotting: black line script.append("plot(y=dataVector,x=locs,col=\"black\",main=\"" + plotName + "\",xlab=\"" + "Basepair position" + "\",ylab=\"" + ylab + "\",type=\"" + "l" + "\",pch=20,cex=2,lwd=2)"); // now add coloured balls script.append("points(y=dataVector,x=locs,col=chrs,type=\"" + "p" + "\",pch=20,cex=2)"); // now add vertical coloured lines script.append("points(y=dataVector,x=locs,col=chrs,type=\"" + "h" + "\",lwd=2)"); // add transcript positions script.append("axis(1,pos,\"Transcript\",line=1)"); script.append("abline(v=pos,lty=2,col='black')"); // print to file script.append("dev.off()"); script.execute(); return tmpImg; } public static void main(String[] args) throws RScriptException { File res = new MakeRPlot().qtlPlot("henkie", new double[] { 3, 4, 3, 6, 7, 2, 4, 6 }, new long[] { 1, 2, 4, 30, 8, 15, 20, 5 }, new String[] { "I", "I", "I", "IV", "II", "III", "IV", "II" }, 25, 800, 600, "LOD score", "qtl"); System.out.println("RES @ " + res); } }