package org.molgenis.hemodb.plugins;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
import matrix.DataMatrixInstance;
import matrix.general.DataMatrixHandler;
import matrix.implementations.binary.BinaryDataMatrixInstance;
import org.molgenis.data.Data;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.framework.db.QueryRule;
import org.molgenis.framework.db.QueryRule.Operator;
import org.molgenis.hemodb.HemoProbe;
import org.molgenis.hemodb.plugins.QuestionsModel.QuestionState;
import org.molgenis.util.EmailService;
import org.molgenis.util.RScript;
import app.DatabaseFactory;
public class CalculateMeanR implements Runnable
{
private String geneExp;
private List<String> sampleNamesGroup1;
private List<String> sampleNamesGroup2;
private double signifCutoff;
private List<String> allProbes;
private EmailService es;
private String emailAddress;
private QuestionsModel questionsModel;
private List<String> groups;
public CalculateMeanR(QuestionsModel questionsModel, String geneExp, List<String> sampleNamesGroup1,
List<String> sampleNamesGroup2, List<String> groups, double signifCutoff, List<String> allProbes,
EmailService es, String emailAdress)
{
super();
this.geneExp = geneExp;
this.sampleNamesGroup1 = sampleNamesGroup1;
this.sampleNamesGroup2 = sampleNamesGroup2;
this.signifCutoff = signifCutoff;
this.allProbes = allProbes;
this.es = es;
this.emailAddress = emailAdress;
this.questionsModel = questionsModel;
this.groups = groups;
Thread t = new Thread(this);
t.start();
}
public void calculateMean() throws Exception
{
// db connection OUTSIDE frontcontroller: evil but needed because we are
// in a thread.... :(
Database db = DatabaseFactory.create();
List<String> allSampleNames = new ArrayList<String>();
allSampleNames.addAll(sampleNamesGroup1);
allSampleNames.addAll(sampleNamesGroup2);
// get data set
Data dataSet = db.query(Data.class).eq(Data.NAME, geneExp).find().get(0);
// load matrix
DataMatrixHandler dmh = new DataMatrixHandler(db);
DataMatrixInstance instance = (BinaryDataMatrixInstance) dmh.createInstance(dataSet, db);
// slice part out of dataset
DataMatrixInstance group1Selection = instance.getSubMatrix(allProbes, sampleNamesGroup1);
DataMatrixInstance group2Selection = instance.getSubMatrix(allProbes, sampleNamesGroup2);
File group1SelectionFile = new File(System.getProperty("java.io.tmpdir") + File.separator
+ "group1Selection_data_" + System.nanoTime() + ".txt");
File group2SelectionFile = new File(System.getProperty("java.io.tmpdir") + File.separator
+ "group2Selection_data_" + System.nanoTime() + ".txt");
File proberesults = new File(System.getProperty("java.io.tmpdir") + File.separator + "proberesults_"
+ System.nanoTime() + ".txt");
PrintStream p1 = new PrintStream(new BufferedOutputStream(new FileOutputStream(group1SelectionFile)), false,
"UTF8");
group1Selection.toPrintStream(p1);
PrintStream p2 = new PrintStream(new BufferedOutputStream(new FileOutputStream(group2SelectionFile)), false,
"UTF8");
group2Selection.toPrintStream(p2);
// create RScript
RScript script = new RScript();
// execute
script.append("compareExpression <- function(data, col1, col2){compare <- data[,col1]-data[,col2]");
script.append("return(compare)}");
script.append("geneExpressionDataSetGroupOne <- read.table(\"" + group1SelectionFile.getAbsolutePath()
+ "\",sep=\"\\t\",header=T,row.names=1,colClasses=c(\"character\"),check.names=FALSE)");
script.append("geneExpressionDataSetGroupOne <- as.matrix(geneExpressionDataSetGroupOne)");
script.append("colnames <- colnames(geneExpressionDataSetGroupOne)");
script.append("rownames <- rownames(geneExpressionDataSetGroupOne)");
script.append("geneExpressionDataSetGroupOne <- matrix(as.numeric(as.matrix(geneExpressionDataSetGroupOne)),c(dim(geneExpressionDataSetGroupOne)[1],dim(geneExpressionDataSetGroupOne)[2]))");
script.append("colnames(geneExpressionDataSetGroupOne) <- colnames");
script.append("rownames(geneExpressionDataSetGroupOne) <- rownames");
script.append("geneExpressionDataSetGroupTwo <- read.table(\"" + group2SelectionFile.getAbsolutePath()
+ "\",sep=\"\\t\",header=T,row.names=1,colClasses=c(\"character\"),check.names=FALSE)");
script.append("geneExpressionDataSetGroupTwo <- as.matrix(geneExpressionDataSetGroupTwo)");
script.append("colnames <- colnames(geneExpressionDataSetGroupTwo)");
script.append("rownames <- rownames(geneExpressionDataSetGroupTwo)");
script.append("geneExpressionDataSetGroupTwo <- matrix(as.numeric(as.matrix(geneExpressionDataSetGroupTwo)),c(dim(geneExpressionDataSetGroupTwo)[1],dim(geneExpressionDataSetGroupTwo)[2]))");
script.append("colnames(geneExpressionDataSetGroupTwo) <- colnames");
script.append("rownames(geneExpressionDataSetGroupTwo) <- rownames");
script.append("rowMeansGroupOne <- as.matrix(rowMeans(geneExpressionDataSetGroupOne))");
script.append("rowMeansGroupTwo <- as.matrix(rowMeans(geneExpressionDataSetGroupTwo))");
script.append("meanDataSet <- cbind(rowMeansGroupOne,rowMeansGroupTwo)");
script.append("rownames(meanDataSet) <- row.names(geneExpressionDataSetGroupOne)");
script.append("geneExpressionTest <- as.matrix(compareExpression(meanDataSet,1,2))");
script.append("rownames(geneExpressionTest) <- row.names(geneExpressionDataSetGroupOne)");
script.append("significance <- geneExpressionTest >=" + signifCutoff + " | geneExpressionTest <= -"
+ signifCutoff);
script.append("significantProbes <- as.matrix(geneExpressionTest[significance])");
script.append("index <- which(significance[,1]==TRUE)");
script.append("result <- significance[index,]");
script.append("probeNames <- names(result)");
script.append("write.table(probeNames, file=\"" + proberesults.getAbsolutePath()
+ "\", sep=\"\\t\", quote=FALSE, row.names=FALSE, col.names=FALSE)");
script.execute();
System.out.println("script executed");
Scanner scanner = new Scanner(proberesults);
ArrayList<String> probes = new ArrayList<String>();
while (scanner.hasNextLine())
{
String line = scanner.nextLine();
probes.add(line);
}
List<String> genes = selectGenesWithProbes(db, probes);
System.out.println("genes are: " + genes);
StringBuilder sb = new StringBuilder();
sb.append("This is the data you selected for the analysis: " + "\n");
sb.append("The data matrix selected: " + geneExp + "\n");
sb.append("Group one: " + groups.get(0) + "\n");
sb.append("Samples in this group: " + sampleNamesGroup1 + "\n");
sb.append("Group two: " + groups.get(1) + "\n");
sb.append("Samples in this group: " + sampleNamesGroup2 + "\n");
sb.append("Significance cutoff: " + signifCutoff + "\n");
String results = sb.toString();
// System.out.println("Testje!!!! : " + results);
StringBuilder geneList = new StringBuilder();
geneList.append(genes);
String genesToString = geneList.toString();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
Date date = new Date();
System.out.println(dateFormat.format(date));
es.email("Analysis results: " + dateFormat.format(date), results, emailAddress, genesToString, outputStream,
true, "Human leukemia database mail");
}
@Override
public void run()
{
try
{
calculateMean();
questionsModel.setState(QuestionState.QUESTION2_RESULT);
}
catch (Exception e)
{
e.printStackTrace();
}
}
public List<String> selectGenesWithProbes(Database db, ArrayList<String> probes) throws DatabaseException
{
/**
* selectGenesWithProbes gets a String with (Hemo)probe names, converts
* those to (Hemo)genes (by querying the database) and returns the genes
*/
try
{
List<String> chompedProbes = new ArrayList<String>();
for (String probe : probes)
{
chompedProbes.add(probe.trim());
}
List<HemoProbe> genesForProbe = db.find(HemoProbe.class, new QueryRule(HemoProbe.NAME, Operator.IN,
chompedProbes));
List<String> genes = new ArrayList<String>();
for (HemoProbe gfp : genesForProbe)
{
if (!genes.contains(gfp.getReportsFor_Name()))
{
genes.add(gfp.getReportsFor_Name());
}
}
if (!genes.isEmpty())
{
System.out.println("gene found with this probe: " + genes);
return genes;
}
else if (genes.isEmpty())
{
System.out.println("This probe cannot be found in this database.");
}
}
catch (Exception e)
{
System.out.println("This gene cannot be found in this database.");
e.printStackTrace();
}
return null;
}
}