package org.molgenis.compute.commandline;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.molgenis.compute.design.ComputeParameter;
import org.molgenis.util.SimpleTuple;
import org.molgenis.util.Tuple;
import freemarker.template.Configuration;
import freemarker.template.Template;
public class Worksheet
{
// The worksheet variable
public List<Tuple> worksheet = new ArrayList<Tuple>();
// public List<Tuple> folded = new ArrayList<Tuple>();
// public List<Tuple> reduced = new ArrayList<Tuple>();
List<ComputeParameter> parameterlist; // parameters.txt
List<Tuple> userworksheet; // original user worksheet
// public Set<String> reducedfields = new HashSet<String>(); // fields
// (lists) that are reduced to a single value
// public Set<String> foldon = new HashSet<String>(); // fields on which we
// folded
// public Set<String> list; // fields that remain a list
// public Set<String> getConstants()
// {
// Set<String> constants = new HashSet<String>();
// for (String field : reducedfields)
// {
// if (!foldon.contains(field))
// {
// constants.add(field);
// }
// }
//
// return (constants);
// }
// map with (parameter name, parameter object) tuples
// public Map<String, ComputeParameter> computeparameters = new
// HashMap<String, ComputeParameter>();
public Worksheet(ComputeBundle computebundle)
{
// set parameter list
this.parameterlist = computebundle.getComputeParameters(); // parameters.txt
this.userworksheet = computebundle.getUserParameters(); // original user
// worksheet
fillWorksheet();
}
/**
* Construct worksheet
*
* @param parameterList
* originating from parameters.txt or from database
* @param worksheet
*/
public Worksheet(List<ComputeParameter> parameterList, List<Tuple> worksheet)
{
// set parameter list
this.parameterlist = parameterList; // parameters.txt
this.userworksheet = worksheet; // original user worksheet
fillWorksheet();
}
private void fillWorksheet()
{
Map<String, String> parameters = new HashMap<String, String>();
// novel worksheet that combines user worksheet with parameters
List<Tuple> worksheet = new ArrayList<Tuple>();
// fill worksheet and iteratively substitute values that point to
// parameters
for (Tuple usertuple : userworksheet)
{
// first put all parameters/values in map
parameters.clear();
// add parameters.txt fields to parameters
for (ComputeParameter cp : parameterlist)
{
String field = cp.getName();
String value = cp.getDefaultValue();
// only add if it not exists yet
if (!parameters.containsKey(field))
{
parameters.put(cp.getName(), value == null ? "" : value);
}
else
{
// // check whether existing value is empty
// // if not: error
// if (value != null && value != "") {
throw new RuntimeException("Parameter " + field + " occurs > 1 times in your parameter.txt file.");
// }
}
}
// add user worksheet values to wt, for this tuple
for (String field : usertuple.getFields())
{
String value = usertuple.getString(field);
parameters.put(field, value == null ? "" : value);
}
// iteratively substitute values that point to parameters
String fieldtemplate, value, originalvalue;
Template template;
Configuration conf = new Configuration();
StringWriter filledtemplate;
boolean done = false;
while (!done)
{
boolean updated = false;
for (String field : parameters.keySet())
{
try
{
// do substitution for field
originalvalue = parameters.get(field);
fieldtemplate = new String(originalvalue == null ? "" : originalvalue);
template = new Template(field, new StringReader(fieldtemplate), conf);
filledtemplate = new StringWriter();
template.process(parameters, filledtemplate);
value = filledtemplate.toString();
if (!value.equalsIgnoreCase(originalvalue))
{
// update value of field in parameter list
parameters.put(field, value);
updated = true;
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
if (!updated) done = true; // nothing changed, so we're done
}
// all values in parameters for this usertupele + parameters.txt are
// now iteratively filled
// put these parameters in worksheet tuple (wt)
Tuple wt = new SimpleTuple();
for (String field : parameters.keySet())
{
value = parameters.get(field);
wt.set(field, value == null ? "" : value);
}
// add wt to worksheet
worksheet.add(wt);
}
setWorksheet(worksheet);
}
private void setWorksheet(List<Tuple> ws)
{
this.worksheet = ws;
}
/** Returns the folded list, based on folding targets */
public static List<Tuple> foldWorksheet(List<Tuple> worksheet, List<ComputeParameter> parameterlist,
List<String> targets)
{
/*
* Fold worksheet based on targets. Example (targets = lane, sequencer
* {because lane hasOne sequencer}): lane, barcode, sequencer (1, a, x);
* (1, b, x); (2, a, x)
*
* Becomes: [1, 1], [a, b], [x, x]; [2], [a], [x]
*
* Use reduceTargets(worksheet, targets) to reduce the instances of the
* targets (for easy use in FTL templates): (1, [a, b], x); (2, [a], x)
*/
Map<String, ArrayList<Object>> tupleset = null; // [(Lane: 1,1,1),
// (Sample: a,b,c),
// (Flowcell: x,y,z)]
Map<String, Map<String, ArrayList<Object>>> wsset = new LinkedHashMap<String, Map<String, ArrayList<Object>>>(); // Suppose
// target
// is
// lane:
// [1:
// [(lane:
// 1,1,1),
// (sample:
// a,b,c),
// (flowcell:
// x,y,z)]]
for (Tuple t : worksheet)
{
// fill ws
String key = "";// t.getString(targets);
// create unique key based on concat of folding targets
for (String target : targets)
{
key += t.getString(target) + "_";
}
// get existing folding set or create new
if (!key.equals("") && wsset != null && wsset.containsKey(key))
{
// we already have a tupleset corresponding to this key
tupleset = wsset.get(key);
}
else
{
// Initialize tupleset: each field gets empty List of objects
tupleset = new LinkedHashMap<String, ArrayList<Object>>();
for (String field : t.getFields())
{
tupleset.put(field, new ArrayList<Object>());
}
}
// add values in tuple t to this tupleset
for (String field : t.getFields())
{
ArrayList<Object> lst = tupleset.get(field);
lst.add(t.getObject(field));
tupleset.put(field, lst);
}
// put the updated tupleset wsset, for the given key (= unique
// combination of targets)
wsset.put(key, tupleset);
}
// put folded tuples in 'folded worksheet'
List<Tuple> folded = new ArrayList<Tuple>();
for (String key : wsset.keySet())
{
tupleset = wsset.get(key);
// this is bizarre:
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (String field : tupleset.keySet())
{
m.put(field, tupleset.get(field));
}
SimpleTuple st = new SimpleTuple(m);
folded.add(st);
}
// check: is the worksheet (ws) that we want to return, after expansion,
// equal to the original worksheet?
// if not, throw exception
// System.out.println(">> original:");
// System.out.println(">> " + worksheet);
// System.out.println(">> folded:");
// System.out.println(">> " + folded);
// System.out.println(">> unfolded:");
// System.out.println(">> " + unfoldWorksheet(folded));
if (!equalWorksheets(unfoldWorksheet(folded), worksheet))
{
throw new RuntimeException(">> Error: folded and unfolded worksheets should be equal but are not!");
}
// reduce the folded worksheet (i.e., reduce list to single value where
// 'allowed')
List<Tuple> reducedWorksheet = reduceTargets(folded, parameterlist, targets);
// Add task_number as a number to each line in (folded) worksheet
Iterator<Tuple> it = reducedWorksheet.iterator();
int i = 0;
while (it.hasNext())
{
it.next().set("task_number", ++i);
}
return reducedWorksheet;
}
private static Set<String> reduceFieldSet(List<ComputeParameter> parameterlist, List<String> targets)
{
// Let R and L be two sets. R contains 'reduce' parameters, L contains
// 'list' parameters.
Set<String> reduceParams = new HashSet<String>();
Set<String> listParams = new HashSet<String>();
// (i) put targets in R
for (String t : targets)
{
reduceParams.add(t);
}
// (ii) put all the target's (indirect) hasOnes in R
boolean ready = false;
while (!ready)
{
ready = true;
for (ComputeParameter cp : parameterlist)
{
if (reduceParams.contains(cp.getName()))
{
// we want to reduce on cp, so we also want to reduce on its
// hasOnes
// thus, add its hasOnes, but only if not yet present!
for (String ho : cp.getHasOne_Name())
{
if (!reduceParams.contains(ho))
{
// we found a new reduce parameter
reduceParams.add(ho);
// more work to do
ready = false;
}
}
}
}
}
// (iii) put all parameters that are null and are not in R, in L
for (ComputeParameter cp : parameterlist)
{
if (cp.getDefaultValue() == null && !reduceParams.contains(cp.getName()))
{
listParams.add(cp.getName());
}
}
// (iv) put all not-null parameters that refer to a constant in R
for (ComputeParameter cp : parameterlist)
{
if (cp.getDefaultValue() != null && !reduceParams.contains(cp.getName()))
{
if (!cp.getDefaultValue().contains("${"))
{
// cp must be a constant, so add it to R
reduceParams.add(cp.getName());
}
}
}
// (v) for each parameter cp that is not in R and not in L, determine
// the 'set' of parameters it refers to. Put (cp, set) in map.
Map<String, Set<String>> map = new HashMap<String, Set<String>>();
for (ComputeParameter cp : parameterlist)
{
if (!reduceParams.contains(cp.getName()) && !listParams.contains(cp.getName()))
{
Set<String> set = new HashSet<String>();
for (ComputeParameter cp2 : parameterlist)
{
if (cp.getDefaultValue().contains("${" + cp2.getName() + "}"))
{
// cp refers to cp2
set.add(cp2.getName());
}
}
// put cp and its set of parameters in map
map.put(cp.getName(), set);
}
}
// (vi) iteratively determine for all parameters whether they go to R or
// L
ready = false;
while (!ready)
{
ready = true;
for (String p : map.keySet())
{
boolean pinR = true;
for (String p2 : map.get(p))
{
if (listParams.contains(p2))
{
pinR = false;
listParams.add(p);
map.remove(p);
ready = false;
break;
}
else if (!reduceParams.contains(p2))
{
// p2 is not in "R u L", so its destination is unkown,
// yet
pinR = false;
break;
}
}
if (!ready) break;
if (pinR)
{
// p should be element R
reduceParams.add(p);
map.remove(p);
ready = false;
break;
}
}
}
// (vii) check: are all parameters in "R u L"?
// print("R: " + R);
// print("L: " + L);
Set<String> allp = new HashSet<String>();
for (ComputeParameter cp : parameterlist)
{
allp.add(cp.getName());
}
// System.out.println("R: " + reduceParams);
// System.out.println("L: " + listParams);
// System.out.println("allp: " + allp);
if (!allp.containsAll(reduceParams) || !allp.containsAll(listParams)) throw new RuntimeException(
"You just found a bug! The union of (1) parameters on which you want to fold and (2) parameters on which you don't want to fold and which should thus be in a list, is not equal to the given set of ComputeParameters. Maybe you've defined a hasOne that doesn't exist?");
allp.removeAll(reduceParams);
allp.removeAll(listParams);
if (!allp.isEmpty()) throw new RuntimeException(
"You just found a bug! There are parameters for which it is unclear whether you want to fold on them.");
// make L global
// this.list = listParams;
return (reduceParams);
}
public static List<Tuple> reduceTargets(List<Tuple> folded, List<ComputeParameter> parameterlist,
List<String> targets)
{
// reduce the targets in worksheet (eg lane = 1, 1, 1, 1) to one single
// value (lane = 1) for easy use in freemarker
Set<String> reducedfields = reduceFieldSet(parameterlist, targets);
if (1 == targets.size() && "line_number".equals(targets.get(0)))
{
reducedfields.addAll(folded.get(0).getFields());
}
// clear data that is now in reduced worksheet
// this.reduced.clear();
List<Tuple> reduced = new ArrayList<Tuple>();
for (Tuple t : folded)
{
Tuple tclone = cloneTuple(t);
for (String rfield : reducedfields)
{
if (!tclone.getFields().contains(rfield))
{
throw new RuntimeException("Field: " + rfield + " is not known!");
}
else
{
if (!tclone.isNull(rfield))
{ // tclone has a value, which
// should be a list
@SuppressWarnings("unchecked")
// List<String> ls = (List<String>)
// tclone.getList(rfield);
List<?> lstmp = tclone.getList(rfield);
List<String> ls = new ArrayList<String>();
for (Object x : lstmp)
{
if (x == null) ls.add(null);
else
ls.add(x.toString());
}
// check: all values should be equal
String value = ls.get(0);
for (int i = 1; i < ls.size(); i++)
{
// if ((value != null &&
// !value.equalsIgnoreCase(ls.get(i))) || (value ==
// null && ls.get(i) != null))
if (value != null && !value.equalsIgnoreCase(ls.get(i)) || value == null
&& ls.get(i) != null)
{
int j = i + 1;
throw new RuntimeException("Cannot reduce field " + rfield
+ " because it contains different values!" + " Value 1: " + value
+ " is not the same as value " + j + ": " + ls.get(i));
}
}
// reduce to one value
tclone.set(rfield, value);
}
}
}
reduced.add(tclone);
}
return reduced;
}
private static Tuple cloneTuple(Tuple t)
{
Tuple tclone = new SimpleTuple();
for (String field : t.getFields())
{
if (t.getObject(field) instanceof List)
{
List<String> clone = new ArrayList<String>();
clone.addAll((Collection<? extends String>) t.getList(field));
tclone.set(field, clone);
}
else
{
tclone.set(field, t.getString(field));
}
}
return tclone;
}
public static List<Tuple> unfoldWorksheet(List<Tuple> worksheet)
{
List<Tuple> ws = new ArrayList<Tuple>();
int nelements = 0;
for (Tuple t : worksheet)
{
// check lenght of line_number field
nelements = t.getList("line_number").size();
List<String> fields = t.getFields();
for (int i = 0; i < nelements; i++)
{
Tuple st = new SimpleTuple();
// fill this new tuple, based on i'th elements in the tupleset
for (String field : fields)
{
if (t.isNull(field))
{
st.set(field, null);
}
else if (nelements > t.getList(field).size())
{
st.set(field, t.getList(field).get(0));
}
else
{
st.set(field, t.getList(field).get(i));
}
}
ws.add(st);
}
}
return ws;
}
public static List<String> unfoldWorksheetCSV(List<Tuple> worksheet)
{
List<Tuple> w = unfoldWorksheet(worksheet);
List<String> ws = new ArrayList<String>();
for (Tuple t : w)
{
String row = "";
for (String field : t.getFields())
{
row = row + (row.equalsIgnoreCase("") ? "" : ", ") + field + "=\'" + t.getObject(field).toString()
+ "\'";
}
ws.add(row);
}
return ws;
}
private static boolean equalTuples(Tuple t1, Tuple t2)
{
List<String> fields1 = t1.getFields();
List<String> fields2 = t2.getFields();
// is number of fields equal?
if (fields1.size() != fields2.size())
{
return false;
}
for (String field : fields1)
{
if (t1.isNull(field))
{
if (!t2.isNull(field)) return false;
}
else if (!t1.getString(field).equals(t2.getString(field))) return false;
}
return true;
}
private static boolean equalWorksheets(List<Tuple> ws1, List<Tuple> ws2)
{
if (ws1.size() != ws2.size())
{
Set<String> lineNumbers = new HashSet();
for (Tuple t1 : ws1)
{
System.out.println(t1.getString("line_number"));
if (lineNumbers.contains(t1.getString("line_number"))) System.out.println("DUPLICATE!");
else
lineNumbers.add(t1.getString("line_number"));
}
System.out.println("worksheets of unequal lengths");
return false;
}
// for each tuple in ws1, find a matching tuple in ws2 and remove the
// 'matching index' from li
for (Tuple t1 : ws1)
{
boolean match = false;
for (Tuple t2 : ws2)
{
if (equalTuples(t1, t2))
{
match = true; // match found!
break;
}
}
if (!match)
{
System.err.println("Folded not correctly: " + t1);
System.err.println("testing fields:");
// find a matching tuple in ws2 based on line number
for (Tuple t : ws2)
{
if (t.getString("line_number").equals(t1.getString("line_number")))
{
for (String field : t.getFields())
{
if ((t.isNull(field) && !t1.isNull(field))
|| !t.getString(field).equals(t1.getString(field)))
{
System.err.println("differences in field '" + field + "': " + t.getString(field) + "!="
+ t1.getString(field));
}
}
}
}
return (false);
}
}
return true;
}
/**
* Find all tuples in unfolded worksheet that have matching target values as
* the foldedTuple
*/
public void getUnfolded(String[] targets, Tuple foldedTuple)
{
// for each unfolded tuple we check if it matches folded tuple on the
// selected targets
// then we will set column 'name' to 'value'
List<Tuple> result = new ArrayList<Tuple>();
for (Tuple unfoldedTuple : this.worksheet)
{
// we assume match, unless we find a target that is not equal
boolean matchAllTargets = true;
// compare values on each target, if unequal we change match to
// false
for (String target : targets)
{
// ignore if not all targets match
if (!unfoldedTuple.getObject(target).equals(foldedTuple.getObject(target)))
{
matchAllTargets = false;
}
}
// if all fields have matched
if (matchAllTargets)
{
result.add(unfoldedTuple);
}
}
}
public String getdefaultvalue(String parameter)
{
for (ComputeParameter cp : parameterlist)
{
if (cp.getName().equalsIgnoreCase(parameter))
{
return cp.getDefaultValue();
}
}
return null;
}
}