package matrix.test.implementations.general;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import loaders.XgapLoadPanelTypes;
import matrix.implementations.binary.BinaryDataMatrixInstance;
import matrix.implementations.memory.MemoryDataMatrixInstance;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.molgenis.core.MolgenisFile;
import org.molgenis.core.OntologyTerm;
import org.molgenis.data.Data;
import org.molgenis.data.DecimalDataElement;
import org.molgenis.data.TextDataElement;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.framework.db.QueryRule;
import org.molgenis.framework.db.QueryRule.Operator;
import org.molgenis.organization.Investigation;
import org.molgenis.pheno.Individual;
import org.molgenis.pheno.Panel;
import org.molgenis.util.Entity;
import org.molgenis.util.TarGz;
import org.molgenis.xgap.DerivedTrait;
import org.molgenis.xgap.Marker;
import regressiontest.util.Util;
import decorators.NameConvention;
public class Helper
{
private Database db = null;
private List<String> uniqueNames = new ArrayList<String>();
private Logger logger = Logger.getLogger(getClass().getSimpleName());
private List<Data> dataList;
private File inputFilesDir;
public List<Data> getDataList()
{
return dataList;
}
public File getInputFilesDir()
{
return inputFilesDir;
}
public Helper(Database db)
{
this.db = db;
}
private Data getTestTextData(Investigation inv, DerivedTrait feature, Panel target, String rowType, String colType,
String source)
{
Data d = new Data();
d.setName("testTextData");
d.setValueType("Text");
d.setInvestigation(inv);
d.setInvestigation_Name(inv.getName());
d.setTargetType(rowType);
d.setFeatureType(colType);
d.setStorage(source);
// d.setFeature(feature);
// d.setTarget(target);
return d;
}
public void prepareDatabaseAndFiles(String storage, Params params) throws DatabaseException, IOException,
InterruptedException
{
// new emptyDatabase(db);
db.remove(db.find(TextDataElement.class));
db.remove(db.find(DecimalDataElement.class));
db.remove(db.find(Marker.class));
db.remove(db.find(Individual.class));
db.remove(db.find(MolgenisFile.class));
for (Data d : db.find(Data.class))
{
db.remove(d);
}
db.remove(db.find(DerivedTrait.class));
db.remove(db.find(Panel.class));
db.remove(db.find(Investigation.class));
logger.info("Creating new investigation and adding it to database..");
Investigation inv = getTestInvestigation();
db.add(inv);
logger.info("Creating randomized individuals and markers, and adding them to database..");
List<Individual> indList = getRandomIndividuals(inv, params.matrixDimension1);
List<Marker> marList = getRandomMarkers(inv, params.matrixDimension2);
db.add(indList);
db.add(marList);
logger.info("Creating feature annotations for matrices");
DerivedTrait textFeature = new DerivedTrait();
textFeature.setName("test_text_data_feature");
db.add(textFeature);
DerivedTrait decimalFeature = new DerivedTrait();
decimalFeature.setName("test_decimal_data_feature");
db.add(decimalFeature);
logger.info("Loading panel ontologies");
new XgapLoadPanelTypes(db);
OntologyTerm panelType = db.find(OntologyTerm.class, new QueryRule("definition", Operator.EQUALS, "other"))
.get(0);
logger.info("Creating panel as matrix 'target'");
Panel p = new Panel();
p.setName("panel_name");
p.setPanelType(panelType);
db.add(p);
logger.info("Creating 'data' objects and adding them to database..");
List<Data> dataList = new ArrayList<Data>();
dataList.add(getTestTextData(inv, textFeature, p, "Individual", "Marker", storage));
dataList.add(getTestDecimalData(inv, decimalFeature, p, "Marker", "Individual", storage));
db.add(dataList);
this.dataList = dataList;
logger.info("Creating or refreshing input directory to hold data matrices files..");
File inputFilesDir = new File(System.getProperty("java.io.tmpdir") + File.separator
+ NameConvention.escapeFileName(inv.getName()) + "_datamatrices");
if (!inputFilesDir.exists())
{
inputFilesDir.mkdir();
}
else
{
FileUtils.cleanDirectory(inputFilesDir);
}
logger.info("Randomizing data matrix filling and adding data files to input directory..");
for (Data data : dataList)
{
createAndWriteRandomMatrix(inputFilesDir, data, db, params.matrixDimension2, params.matrixDimension1,
params.maxTextLength, params.sparse, params.fixedTextLength);
}
this.inputFilesDir = inputFilesDir;
}
private Data getTestDecimalData(Investigation inv, DerivedTrait feature, Panel target, String rowType,
String colType, String source)
{
Data d = new Data();
d.setName("testDecimalData");
d.setValueType("Decimal");
d.setInvestigation(inv);
d.setInvestigation_Name(inv.getName());
d.setTargetType(rowType);
d.setFeatureType(colType);
d.setStorage(source);
// d.setFeature(feature);
// d.setTarget(target);
return d;
}
private Investigation getTestInvestigation()
{
Investigation inv = new Investigation();
inv.setName("testStudy");
inv.setStartDate(new Date());
return inv;
}
public String printBinaryMatrixInfo(BinaryDataMatrixInstance bm)
{
String out = "";
Data data = bm.getData();
out += "matrix name = " + data.getName() + "\n";
out += "investigation label = " + data.getInvestigation_Name() + "\n";
out += "coltype = " + data.getFeatureType() + "\n";
out += "rowtype = " + data.getTargetType() + "\n";
out += "valuetype = " + data.getValueType() + "\n";
out += "first three colnames:" + "\n";
String colNames = "";
for (int i = 0; i < bm.getColNames().size(); i++)
{
if (i < 3)
{
colNames += bm.getColNames().get(i) + " ";
}
else
{
break;
}
}
out += colNames + "\n";
out += "first three rownames:" + "\n";
String rowNames = "";
for (int i = 0; i < bm.getRowNames().size(); i++)
{
if (i < 3)
{
rowNames += bm.getRowNames().get(i) + " ";
}
else
{
break;
}
}
out += rowNames;
return out;
}
/**
* Returns random MemoryMatrix. NOTE: also creates a file, at location:
* inputMatrixDir.getAbsolutePath() + File.separator +
* NameConvention.escapeFileName(data.getName()) + ".txt"
*
* @param data
* @param db
* @param totalRows
* @param totalCols
* @param maxStringLength
* @param sparse
* @param fixedTextLength
* @return
* @throws Exception
*/
public MemoryDataMatrixInstance<Object> createAndWriteRandomMemoryMatrix(File inputMatrixDir, Data data,
Database db, int totalRows, int totalCols, int maxStringLength, boolean sparse, boolean fixedTextLength)
throws Exception
{
File res = new File(inputMatrixDir.getAbsolutePath() + File.separator
+ NameConvention.escapeFileName(data.getName()) + ".txt");
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(res)));
List<String> colNames = new ArrayList<String>();
List<String> rowNames = new ArrayList<String>();
Object[][] elements = null;
List<? extends Entity> colList = db.find(db.getClassForName(data.getFeatureType()));
List<? extends Entity> rowList = db.find(db.getClassForName(data.getTargetType()));
for (Entity e : colList)
{
out.write("\t" + e.get("name").toString());
colNames.add(e.get("name").toString());
}
out.write("\n");
out.flush();
for (Entity e : rowList)
{
rowNames.add(e.get("name").toString());
}
if (data.getValueType().equals("Decimal"))
{
// decimal data
elements = new Object[totalRows][totalCols];
for (int i = 0; i < totalRows; i++)
{
out.write(rowList.get(i).get("name").toString());
for (int j = 0; j < totalCols; j++)
{
if (sparse)
{
if (Util.getRandomBoolean() == true)
{
double rand = Util.getRandomDouble();
elements[i][j] = rand;
out.write("\t" + rand);
}
else
{
elements[i][j] = null;
out.write("\t");
}
}
else
{
double rand = Util.getRandomDouble();
elements[i][j] = rand;
out.write("\t" + rand);
}
}
out.write("\n");
out.flush();
}
}
else
{
// for text data, swap row with col dimension size
elements = new Object[totalCols][totalRows];
for (int i = 0; i < totalCols; i++)
{
out.write(rowList.get(i).get("name").toString());
for (int j = 0; j < totalRows; j++)
{
if (sparse)
{
if (Util.getRandomBoolean() == true)
{
String rand = Util.getRandomString(maxStringLength, fixedTextLength);
elements[i][j] = rand;
out.write("\t" + rand);
}
else
{
elements[i][j] = null;
out.write("\t");
}
}
else
{
String rand = Util.getRandomString(maxStringLength, fixedTextLength);
elements[i][j] = rand;
out.write("\t" + rand);
}
}
out.write("\n");
out.flush();
}
}
out.close();
MemoryDataMatrixInstance<Object> mm = new MemoryDataMatrixInstance<Object>(rowNames, colNames, elements, data);
return mm;
}
private File createAndWriteRandomMatrix(File inputMatrixDir, Data data, Database db, int totalRows, int totalCols,
int maxStringLength, boolean sparse, boolean fixedTextLength) throws IOException, DatabaseException
{
File res = new File(inputMatrixDir.getAbsolutePath() + File.separator
+ NameConvention.escapeFileName(data.getName()) + ".txt");
List<? extends Entity> colList = db.find(db.getClassForName(data.getFeatureType()));
List<? extends Entity> rowList = db.find(db.getClassForName(data.getTargetType()));
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(res)));
for (Entity e : colList)
{
out.write("\t" + e.get("name").toString());
}
out.write("\n");
out.flush();
if (data.getValueType().equals("Decimal"))
{
// decimal data
for (int i = 0; i < totalRows; i++)
{
out.write(rowList.get(i).get("name").toString());
for (int j = 0; j < totalCols; j++)
{
if (sparse)
{
if (Util.getRandomBoolean() == true)
{
out.write("\t" + Util.getRandomDouble());
}
else
{
out.write("\t");
}
}
else
{
out.write("\t" + Util.getRandomDouble());
}
}
out.write("\n");
out.flush();
}
}
else
{
// for text data, swap row with col dimension size
for (int i = 0; i < totalCols; i++)
{
out.write(rowList.get(i).get("name").toString());
for (int j = 0; j < totalRows; j++)
{
if (sparse)
{
if (Util.getRandomBoolean() == true)
{
out.write("\t" + Util.getRandomString(maxStringLength, fixedTextLength));
}
else
{
out.write("\t");
}
}
else
{
out.write("\t" + Util.getRandomString(maxStringLength, fixedTextLength));
}
}
out.write("\n");
out.flush();
}
}
out.close();
return res;
}
public void printSettings(String source, Params params)
{
System.out.println("##################################################");
System.out.println("## Test" + source + "Matrix" + "\tstarting with settings: ##");
System.out.println("##################################################");
System.out.println("matrixDimension1 <- " + params.matrixDimension1);
System.out.println("matrixDimension2 <- " + params.matrixDimension2);
System.out.println("maxTextLength <- " + params.maxTextLength);
System.out.println("fixedTextLength <- " + Boolean.toString(params.fixedTextLength).toUpperCase());
System.out.println("sparse <- " + Boolean.toString(params.sparse).toUpperCase());
System.out.println("skipPerElement <- " + Boolean.toString(params.skipPerElement).toUpperCase());
}
private List<Individual> getRandomIndividuals(Investigation inv, int amount)
{
List<Individual> indList = new ArrayList<Individual>();
for (int i = 0; i < amount; i++)
{
String name = Util.getRandomString(10, false);
if (uniqueNames.contains(name))
{
amount++;
}
else
{
uniqueNames.add(name);
Individual ind = new Individual();
ind.setName(name);
ind.setInvestigation(inv);
indList.add(ind);
}
}
return indList;
}
private List<Marker> getRandomMarkers(Investigation inv, int amount)
{
List<Marker> marList = new ArrayList<Marker>();
for (int i = 0; i < amount; i++)
{
String name = Util.getRandomString(10, false);
if (uniqueNames.contains(name))
{
amount++;
}
else
{
uniqueNames.add(name);
Marker mar = new Marker();
mar.setName(name);
mar.setInvestigation(inv);
marList.add(mar);
}
}
return marList;
}
public static String readFileToString(File file) throws FileNotFoundException, Exception
{
BufferedReader br = new BufferedReader(new FileReader(file));
String res = "";
String line;
while ((line = br.readLine()) != null)
{
res += line + "\n";
}
return res;
}
}