package org.hadatac.data.loader; import java.io.File; import java.lang.String; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.csv.CSVRecord; import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryExecutionFactory; import org.apache.jena.query.QuerySolution; import org.apache.jena.query.ResultSet; import org.apache.jena.query.ResultSetFactory; import org.apache.jena.query.ResultSetRewindable; import org.apache.jena.rdf.model.Literal; import org.hadatac.utils.Collections; import org.hadatac.utils.NameSpaces; import com.google.common.collect.Iterables; public class SampleGenerator extends BasicGenerator { final String kbPrefix = "chear-kb:"; private String dataAcquisition = ""; private int counter = 1; //starting index number public SampleGenerator(File file) { super(file); } @Override void initMapping() { mapCol.clear(); mapCol.put("sampleID", "specimen_id"); mapCol.put("sampleSuffix", "suffix"); mapCol.put("subjectID", "patient_id"); mapCol.put("pilotNum", "project_id"); mapCol.put("sampleType", "sample_type"); mapCol.put("samplingMethod", "sample_collection_method"); mapCol.put("samplingVol", "sample_quantity"); mapCol.put("samplingVolUnit", "sample_quantity_uom"); mapCol.put("storageTemp", "sample_storage_temp"); mapCol.put("FTcount", "sample_freeze_thaw_cycles"); } @Override Map<String, Object> createRow(CSVRecord rec) { Map<String, Object> row = new HashMap<String, Object>(); row.put("hasURI", getUri(rec)); row.put("a", getType(rec)); row.put("rdfs:label", getLabel(rec)); row.put("hasco:originalID", getOriginalID(rec)); row.put("hasco:isSampleOf", getSubjectUri(rec)); row.put("hasco:isObjectOf", getCollectionUri(rec)); row.put("rdfs:comment", getComment(rec)); row.put("hasco:hasSamplingMethod", getSamplingMethod(rec)); row.put("hasco:hasSamplingVolume", getSamplingVolume(rec)); row.put("hasco:hasSamplingVolumeUnit", getSamplingVolumeUnit(rec)); row.put("hasco:hasStorageTemperature", getStorageTemperature(rec)); row.put("hasco:hasStorageTemperatureUnit", getStorageTemperatureUnit()); row.put("hasco:hasNumFreezeThaw", getNumFreezeThaw(rec)); counter++; return row; } private int getSampleCount(String pilotNum){ int count = 0; String sampleCountQuery = NameSpaces.getInstance().printSparqlNameSpaceList() + " SELECT (count(DISTINCT ?sampleURI) as ?sampleCount) WHERE { " + " ?sampleURI hasco:isObjectOf ?SC . " + " ?SC hasco:isSampleCollectionOf chear-kb:STD-Pilot-" + pilotNum + " . " + "}"; QueryExecution qexecSample = QueryExecutionFactory.sparqlService( Collections.getCollectionsName(Collections.METADATA_SPARQL), sampleCountQuery); ResultSet sampleResults = qexecSample.execSelect(); ResultSetRewindable resultsrwSample = ResultSetFactory.copyResults(sampleResults); qexecSample.close(); if (resultsrwSample.hasNext()) { QuerySolution soln = resultsrwSample.next(); Literal countLiteral = (Literal) soln.get("sampleCount"); if(countLiteral != null){ count += countLiteral.getInt(); } } return count; } private String getUri(CSVRecord rec) { return kbPrefix + "SPL-" + String.format("%04d", counter + getSampleCount(rec.get(mapCol.get("pilotNum")))) + "-Pilot-" + rec.get(mapCol.get("pilotNum")); // + "-" + getSampleSuffix() } private String getType(CSVRecord rec) { if(!rec.get(mapCol.get("sampleType")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("sampleType")); } else { return "sio:Sample"; } } private String getLabel(CSVRecord rec) { return "SID " + String.format("%04d", counter + getSampleCount(rec.get(mapCol.get("pilotNum")))) + " - Pilot " + rec.get(mapCol.get("pilotNum")) + " " + getSampleSuffix(rec); } private String getOriginalID(CSVRecord rec) { if(!rec.get(mapCol.get("sampleID")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("sampleID")); } else { return ""; } } private String getSubjectUri(CSVRecord rec) { if (rec.get(mapCol.get("subjectID")).equalsIgnoreCase("NULL")) { return ""; } String subject = ""; String subjectQuery = NameSpaces.getInstance().printSparqlNameSpaceList() + " SELECT ?subjectURI WHERE { " + " ?subjectURI hasco:originalID \"" + rec.get(mapCol.get("subjectID")) + "\" . }"; QueryExecution qexecSubject = QueryExecutionFactory.sparqlService(Collections.getCollectionsName(Collections.METADATA_SPARQL), subjectQuery); ResultSet subjectResults = qexecSubject.execSelect(); ResultSetRewindable resultsrwSubject = ResultSetFactory.copyResults(subjectResults); qexecSubject.close(); if (resultsrwSubject.hasNext()) { QuerySolution soln = resultsrwSubject.next(); subject = soln.get("subjectURI").toString(); } return subject; } private String getDataAcquisition() { return dataAcquisition; } private String getComment(CSVRecord rec) { return "Sample " + String.format("%04d", counter + getSampleCount(rec.get(mapCol.get("pilotNum")))) + " for Pilot " + rec.get(mapCol.get("pilotNum")) + " " + getSampleSuffix(rec); } private String getSamplingMethod(CSVRecord rec) { if(!rec.get(mapCol.get("samplingMethod")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("samplingMethod")); } else { return ""; } } private String getSamplingVolume(CSVRecord rec) { if(!rec.get(mapCol.get("samplingVol")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("samplingVol")); } else { return ""; } } private String getSamplingVolumeUnit(CSVRecord rec) { if(!rec.get(mapCol.get("samplingVolUnit")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("samplingVolUnit")); } else { return "obo:UO_0000095"; // default volume unit } } private String getStorageTemperature(CSVRecord rec) { if(!rec.get(mapCol.get("storageTemp")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("storageTemp")); } else { return ""; } } private String getStorageTemperatureUnit() { // defaulting to Celsius since SID file does not contain temp unit return "obo:UO_0000027"; } private String getNumFreezeThaw(CSVRecord rec) { if(!rec.get(mapCol.get("FTcount")).equalsIgnoreCase("NULL")){ return rec.get("FTcount"); } else { return ""; } } private String getSampleSuffix(CSVRecord rec) { if(!rec.get(mapCol.get("sampleSuffix")).equalsIgnoreCase("NULL")){ return rec.get(mapCol.get("sampleSuffix")); } else { return ""; } } private String getStudyUri(CSVRecord rec) { return kbPrefix + "STD-Pilot-" + rec.get(mapCol.get("pilotNum")); } private String getCollectionUri(CSVRecord rec) { return kbPrefix + "SC-Pilot-" + rec.get(mapCol.get("pilotNum")); } private String getCollectionLabel(CSVRecord rec) { return "Sample Collection of Pilot Study " + rec.get(mapCol.get("pilotNum")); } public Map<String, Object> createCollectionRow(CSVRecord rec) { Map<String, Object> row = new HashMap<String, Object>(); row.put("hasURI", getCollectionUri(rec)); row.put("a", "hasco:SampleCollection"); row.put("rdfs:label", getCollectionLabel(rec)); row.put("hasco:hasSize", Integer.toString(Iterables.size(records)+1)); row.put("hasco:isSampleCollectionOf", getStudyUri(rec)); counter++; return row; } public List< Map<String, Object> > createCollectionRows() { rows.clear(); for (CSVRecord record : records) { rows.add(createCollectionRow(record)); } return rows; } }