package converters.mpd; import java.io.File; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.molgenis.organization.Investigation; import org.molgenis.pheno.Individual; import org.molgenis.pheno.Measurement; import org.molgenis.pheno.ObservedValue; import org.molgenis.pheno.Panel; import org.molgenis.util.CsvFileReader; import org.molgenis.util.CsvReader; import org.molgenis.util.Tuple; import app.CsvExport; /** * Convertor to convert standard MPD (mouse phenotype database) downloads to * pheno model. Each file type has its own loader, all in memory. After that you * can choose to write to file or write to database. */ public class ConvertMpdToPheno { // required static String importDir = "../molgenis4phenotype/data/MPD/orig"; // if you want to write to files static String outputDir = "../molgenis4phenotype/data/MPD/output"; public static void main(String[] args) throws Exception { ConvertMpdToPheno conv = new ConvertMpdToPheno(); conv.loadProjects(); conv.loadStrains(); conv.loadAssayStats(); conv.loadMeasurements(); conv.loadAnimalDataPoints(); CsvExport export = new CsvExport(); export.exportAll(new File(outputDir), projects, strains, animals, measurements, values); // CsvImport importer = new CsvImport(); // Database db = null; // importer.importAll(db,projects, strains); } // containers for the mpd data static List<Investigation> projects = new ArrayList<Investigation>(); static List<Panel> strains = new ArrayList<Panel>(); static List<Individual> animals = new ArrayList<Individual>(); static List<Measurement> measurements = new ArrayList<Measurement>(); static List<ObservedValue> values = new ArrayList<ObservedValue>(); // mpd data provides measnum to link values to the feature. Map<String, String> measNumToName = new LinkedHashMap<String, String>(); /** * Load projects.txt This is a flat listing of investigation.name and * investigation.description. * * @throws Exception */ public void loadProjects() throws Exception { CsvReader reader = new CsvFileReader(new File(importDir + "/projects.txt")); for (Tuple tuple : reader) { Investigation project = new Investigation(); project.setName(tuple.getString("name")); project.setDescription(tuple.getString("description")); projects.add(project); } } /** * Load measurements.txt * * This file lists all measurements.name, unit and a serie of custom fields * "measnum projsym displayorder varname desc units inseries protolink p1 cat1 cat2 cat3 p2 hints intervention intparm appmeth panelsym p3 datatype origin sextested nstrainstested ageweeks" * * @throws Exception */ public void loadMeasurements() throws Exception { CsvReader reader = new CsvFileReader(new File(importDir + "/measurements.txt")); for (Tuple tuple : reader) { Measurement m = new Measurement(); m.setInvestigation_Name("projsym"); m.setName(tuple.getString("name")); m.setUnit_Name("units"); m.setDescription(tuple.getString("desc")); measurements.add(m); } } /** * Load animaldatapoints.txt. These are observedvalue per individual plus * sex */ public void loadAnimalDataPoints() throws Exception { CsvReader reader = new CsvFileReader(new File(importDir + "/animaldatapoints.txt")); addObservableFeatures("sex"); // sex is repeated for each measurement final List<String> hasSexValue = new ArrayList<String>(); for (Tuple tuple : reader) { // sex, should be only once??? if (!hasSexValue.contains(tuple.getString("animal_id"))) { ObservedValue v = new ObservedValue(); v.setTarget_Name(tuple.getString("animal_id")); // map measnum to meas.name v.setFeature_Name("sex"); v.setValue(tuple.getString("sex")); values.add(v); hasSexValue.add(tuple.getString("animal_id")); } // value ObservedValue v = new ObservedValue(); v.setTarget_Name(tuple.getString("animal_id")); // map measnum to meas.name v.setFeature_Name(measNumToName.get(tuple.getString("measnum"))); v.setValue(tuple.getString("value")); } } /** * Load strains.txt. This file has per line panel.name plus some * observedvalue per panel ("longname", "mpd_id", "vendor", "stocknum", * "prigroup", "typecode", "genaltcode", "ndatasets") * * @throws Exception */ public void loadStrains() throws Exception { // includes a few ObservableFeatures; final String[] varNames = new String[] { "longname", "mpd_id", "vendor", "stocknum", "prigroup", "typecode", "genaltcode", "ndatasets" }; addObservableFeatures(varNames); CsvReader reader = new CsvFileReader(new File(importDir + "/strains.txt")); for (Tuple tuple : reader) { Panel strain = new Panel(); strain.setName(tuple.getString("strainname")); strains.add(strain); addObservedValuesForTarget(strain, varNames, tuple); } } /** * Loads assaystats.txt. * * Grouped by (sex=m, sex=f) it lists a matrix of feature * feature. For * example '{ahtracis in m} X {nstrains,mean,median,sd,se,cv,min,max}'. * * Alternatively, we could make this a ObservedInference. * * @throws Exception */ public void loadAssayStats() throws Exception { CsvReader reader = new CsvFileReader(new File(importDir + "/assaystats.txt")); // get the features List<String> temp = reader.colnames(); temp.remove("measnum"); temp.remove("varname"); final String[] features = temp.toArray(new String[temp.size()]); addObservableFeatures(features); for (Tuple tuple : reader) { Panel strain = new Panel(); strain.setName(tuple.getString("strainname")); strains.add(strain); addObservedValuesForTarget(strain, features, tuple); } } /* * Helper method for loading observed values per target from a matrix like * tuple */ private void addObservedValuesForTarget(Panel strain, String[] varNames, Tuple tuple) { for (String name : varNames) { ObservedValue v = new ObservedValue(); v.setTarget(strain); v.setFeature_Name(name); v.setValue(tuple.getString(name)); values.add(v); } } /* * Helper method for loading a set of features. */ private void addObservableFeatures(String... featureNames) { // NB if Java had named parameters this code would not be needed. // Idea: make possible to say Observeable f = // ObserveableFeature.make().name("x"); for (String name : featureNames) { Measurement f = new Measurement(); f.setName(name); measurements.add(f); } } }