/* * Created on : 07-11-2013 * Author : Bastian Weinlich */ package de.hpi.i2b2.girix; import java.util.HashMap; import java.util.Map; import de.hpi.i2b2.girix.datavo.pdo.ConceptSet; import de.hpi.i2b2.girix.datavo.pdo.ConceptType; import de.hpi.i2b2.girix.datavo.pdo.EventSet; import de.hpi.i2b2.girix.datavo.pdo.EventType; import de.hpi.i2b2.girix.datavo.pdo.ModifierSet; import de.hpi.i2b2.girix.datavo.pdo.ModifierType; import de.hpi.i2b2.girix.datavo.pdo.ObservationSet; import de.hpi.i2b2.girix.datavo.pdo.ObservationType; import de.hpi.i2b2.girix.datavo.pdo.ObserverSet; import de.hpi.i2b2.girix.datavo.pdo.ObserverType; import de.hpi.i2b2.girix.datavo.pdo.ParamType; import de.hpi.i2b2.girix.datavo.pdo.PatientSet; import de.hpi.i2b2.girix.datavo.pdo.PatientType; // This class extracts the various information of the CRC response and builds csv files which can then be imported into R // So this class holds information of the structure of the CRC response and determines how data is provided in R public class CRCResponseParser { // The R column class informations public static final String patientsColClasses = "c(\"character\",\"character\",\"factor\",\"i2b2DateTime\",\"i2b2DateTime\",\"factor\",\"numeric\",\"factor\",\"factor\"," + "\"factor\",\"factor\",\"character\",\"character\",\"factor\",\"character\",\"i2b2DateTime\",\"i2b2DateTime\",\"i2b2DateTime\",\"character\",\"character\")"; public static final String conceptsColClasses = "c(\"character\",\"character\",\"character\",\"numeric\",\"i2b2DateTime\",\"i2b2DateTime\",\"character\",\"numeric\",\"i2b2DateTime\",\"numeric\"" + ",\"character\",\"character\",\"character\",\"character\",\"character\",\"numeric\",\"character\",\"character\",\"character\"," + "\"character\",\"character\",\"numeric\",\"character\",\"i2b2DateTime\",\"character\",\"character\",\"i2b2DateTime\",\"character\",\"character\"," + "\"character\",\"character\")"; public static final String modifierColClasses = "c(\"i2b2DateTime\",\"i2b2DateTime\",\"character\",\"character\",\"character\",\"character\",\"i2b2DateTime\",\"numeric\")"; public static final String eventsColClasses = "c(\"i2b2DateTime\",\"i2b2DateTime\",\"character\",\"i2b2DateTime\",\"character\",\"character\"," + "\"i2b2DateTime\",\"i2b2DateTime\",\"numeric\",\"numeric\",\"character\",\"character\",\"character\",\"character\")"; public static final String observersColClasses = "c(\"i2b2DateTime\",\"i2b2DateTime\",\"character\",\"character\",\"character\",\"character\",\"i2b2DateTime\",\"numeric\")"; private static String SEP = GIRIXUtil.SEP; public static GIRIXCSVContainer parsePatientSet(PatientSet crcPS) { boolean empty = true; // Build patient set csv string StringBuilder psSB = new StringBuilder(10000); // First line of csv string: Column names String psString = "patient_num" + SEP + "source" + SEP + "vital_status_cd" + SEP + "birth_date" + SEP + "death_date" + SEP + "sex_cd" + SEP + "age_in_years_num" + SEP + "language_cd" + SEP + "race_cd" + SEP + "marital_status_cd" + SEP + "religion_cd" + SEP + "zip_cd" + SEP + "statecityzip_path" + SEP + "income_cd" + SEP + "patient_blob" + SEP + "update_date" + SEP + "download_date" + SEP + "import_date" + SEP + "sourcesystem_cd" + SEP + "upload_id\n"; psSB.append(psString); for (PatientType patient : crcPS.getPatient()) { empty = false; // If the parameter doesn't exist just add a single separator character psSB.append((patient.getPatientId().getValue() == null) ? (SEP) : (escape(patient.getPatientId().getValue()) + SEP)); psSB.append((patient.getPatientId().getSource() == null) ? (SEP) : (escape(patient.getPatientId().getSource()) + SEP)); // Order of params is undefined here. So put it all in a map first and append it afterwards in the right order to the string Map<String,String> m = new HashMap<String,String>(); for (ParamType pt : patient.getParam()) { m.put(pt.getColumn(), pt.getValue()); } // Now append in the right order psSB.append( ((m.get("vital_status_cd") == null) ? (SEP) : (escape(m.get("vital_status_cd")) + SEP)) + ((m.get("birth_date") == null) ? (SEP) : (m.get("birth_date") + SEP)) + ((m.get("death_date") == null) ? (SEP) : (m.get("death_date") + SEP)) + ((m.get("sex_cd") == null) ? (SEP) : (escape(m.get("sex_cd")) + SEP)) + ((m.get("age_in_years_num") == null) ? (SEP) : (m.get("age_in_years_num") + SEP)) + ((m.get("language_cd") == null) ? (SEP) : (escape(m.get("language_cd")) + SEP)) + ((m.get("race_cd") == null) ? (SEP) : (escape(m.get("race_cd")) + SEP)) + ((m.get("marital_status_cd") == null) ? (SEP) : (escape(m.get("marital_status_cd")) + SEP)) + ((m.get("religion_cd") == null) ? (SEP) : (escape(m.get("religion_cd")) + SEP)) + ((m.get("zip_cd") == null) ? (SEP) : (escape(m.get("zip_cd")) + SEP)) + ((m.get("statecityzip_path") == null) ? (SEP) : (escape(m.get("statecityzip_path")) + SEP)) + ((m.get("income_cd") == null) ? (SEP) : (escape(m.get("income_cd")) + SEP)) + ((patient.getPatientBlob() == null) ? (SEP) : (escape(patient.getPatientBlob().toString()) + SEP)) + ((patient.getUpdateDate() == null) ? (SEP) : (patient.getUpdateDate() + SEP)) + ((patient.getDownloadDate() == null) ? (SEP) : (patient.getDownloadDate() + SEP)) + ((patient.getImportDate() == null) ? (SEP) : (patient.getImportDate() + SEP)) + ((patient.getSourcesystemCd() == null) ? (SEP) : (escape(patient.getSourcesystemCd()) + SEP)) + ((patient.getUploadId() == null) ? "" : (escape(patient.getUploadId()))) + "\n" ); } if (empty) return new GIRIXCSVContainer(psSB.toString(), false); else return new GIRIXCSVContainer(psSB.toString(), true); } public static GIRIXCSVContainer parseObservationSet(ObservationSet crcOS, ConceptSet crcCS) { boolean empty = true; // Build concept_cd -> concept_path map Map<String, String> conceptMap = new HashMap<String,String>(); for(ConceptType ct : crcCS.getConcept()) { conceptMap.put(ct.getConceptCd(), ct.getConceptPath()); } // Build observation set csv string StringBuilder osSB = new StringBuilder(10000); // First line of csv string: Column names String osString = "concept_cd_name" + SEP + "concept_cd_value" + SEP + "concept_path" + SEP + "confidence_num" + SEP + "download_date" + SEP + "end_date" + SEP + "event_id_source" + SEP + "event_id_value" + SEP + "import_date" + SEP + "instance_num" + SEP + "location_cd_name" + SEP + "location_cd_value" + SEP + "modifier_cd_name" + SEP + "modifier_cd_value" + SEP + "nvalnum_units" + SEP + "nvalnum_value" + SEP + "observation_blob" + SEP + "observer_cd_name" + SEP + "observer_cd_value" + SEP + "patient_id_source" + SEP + "patient_id_value" + SEP + "quantity_num" + SEP + "sourcesystem_cd" + SEP + "start_date" + SEP + "tvalchar" + SEP + "units_cd" + SEP + "update_date" + SEP + "upload_id" + SEP + "valueflag_cd_name" + SEP + "valueflag_cd_value" + SEP + "valuetype_cd\n"; osSB.append(osString); for (ObservationType obs : crcOS.getObservation()) { empty = false; osSB.append( ((obs.getConceptCd().getName() == null) ? (SEP) : (escape(obs.getConceptCd().getName()) + SEP)) + ((obs.getConceptCd().getValue() == null) ? (SEP) : (escape(obs.getConceptCd().getValue()) + SEP)) + // Now look at the concept_cd -> concept_path map and add the path. This is equivalent to JOIN concept_path BY concept_cd ((obs.getConceptCd().getValue() == null || conceptMap.get(obs.getConceptCd().getValue()) == null) ? (SEP) : (escape(conceptMap.get(obs.getConceptCd().getValue())) + SEP)) + ((obs.getConfidenceNum() == null) ? (SEP) : (obs.getConfidenceNum() + SEP)) + ((obs.getDownloadDate() == null) ? (SEP) : (obs.getDownloadDate() + SEP)) + ((obs.getEndDate() == null) ? (SEP) : (obs.getEndDate() + SEP)) + ((obs.getEventId().getSource() == null) ? (SEP) : (escape(obs.getEventId().getSource()) + SEP)) + ((obs.getEventId().getValue() == null) ? (SEP) : (obs.getEventId().getValue() + SEP)) + ((obs.getImportDate() == null) ? (SEP) : (obs.getImportDate() + SEP)) + ((obs.getInstanceNum().getValue() == null) ? (SEP) : (obs.getInstanceNum().getValue() + SEP)) + ((obs.getLocationCd().getName() == null) ? (SEP) : (escape(obs.getLocationCd().getName()) + SEP)) + ((obs.getLocationCd().getValue() == null) ? (SEP) : (escape(obs.getLocationCd().getValue()) + SEP)) + ((obs.getModifierCd().getName() == null) ? (SEP) : (escape(obs.getModifierCd().getName()) + SEP)) + ((obs.getModifierCd().getValue() == null) ? (SEP) : (escape(obs.getModifierCd().getValue()) + SEP)) + ((obs.getNvalNum().getUnits() == null) ? (SEP) : (escape(obs.getNvalNum().getUnits()) + SEP)) + ((obs.getNvalNum().getValue() == null) ? (SEP) : (obs.getNvalNum().getValue() + SEP)) + ((obs.getObservationBlob() == null) ? (SEP) : (escape(obs.getObservationBlob().toString()) + SEP)) + ((obs.getObserverCd().getName() == null) ? (SEP) : (escape(obs.getObserverCd().getName()) + SEP)) + ((obs.getObserverCd().getValue() == null) ? (SEP) : (escape(obs.getObserverCd().getValue()) + SEP)) + ((obs.getPatientId().getSource() == null) ? (SEP) : (escape(obs.getPatientId().getSource()) + SEP)) + ((obs.getPatientId().getValue() == null) ? (SEP) : (escape(obs.getPatientId().getValue()) + SEP)) + ((obs.getQuantityNum() == null) ? (SEP) : (obs.getQuantityNum() + SEP)) + ((obs.getSourcesystemCd() == null) ? (SEP) : (escape(obs.getSourcesystemCd()) + SEP)) + ((obs.getStartDate() == null) ? (SEP) : (obs.getStartDate() + SEP)) + ((obs.getTvalChar() == null) ? (SEP) : (escape(obs.getTvalChar()) + SEP)) + ((obs.getUnitsCd() == null) ? (SEP) : (escape(obs.getUnitsCd()) + SEP)) + ((obs.getUpdateDate() == null) ? (SEP) : (obs.getUpdateDate() + SEP)) + ((obs.getUploadId() == null) ? (SEP) : (escape(obs.getUploadId() + SEP))) + ((obs.getValueflagCd().getName() == null) ? (SEP) : (escape(obs.getValueflagCd().getName()) + SEP)) + ((obs.getValueflagCd().getValue() == null) ? (SEP) : (escape(obs.getValueflagCd().getValue()) + SEP)) + ((obs.getValuetypeCd() == null) ? "" : (escape(obs.getValuetypeCd()))) + "\n" ); } if (empty) return new GIRIXCSVContainer(osSB.toString(), false); else return new GIRIXCSVContainer(osSB.toString(), true); } public static GIRIXCSVContainer parseModifierSet(ModifierSet crcMS) { boolean empty = true; // Build modifier set csv string StringBuilder msSB = new StringBuilder(10000); // First line of csv string: Column names String msString = "download_date" + SEP + "import_date" + SEP + "modifier_cd" + SEP + "modifier_path" + SEP + "name_char" + SEP + "sourcesystem_cd" + SEP + "update_date" + SEP + "upload_id" + "\n"; msSB.append(msString); for (ModifierType modType : crcMS.getModifier()) { empty = false; msSB.append( ((modType.getDownloadDate() == null) ? (SEP) : (modType.getDownloadDate() + SEP)) + ((modType.getImportDate() == null) ? (SEP) : (modType.getImportDate() + SEP)) + ((modType.getModifierCd() == null) ? (SEP) : (escape(modType.getModifierCd()) + SEP)) + ((modType.getModifierPath() == null) ? (SEP) : (escape(modType.getModifierPath()) + SEP)) + ((modType.getNameChar() == null) ? (SEP) : (escape(modType.getNameChar()) + SEP)) + ((modType.getSourcesystemCd() == null) ? (SEP) : (escape(modType.getSourcesystemCd()) + SEP)) + ((modType.getUpdateDate() == null) ? (SEP) : (modType.getUpdateDate() + SEP)) + ((modType.getUploadId() == null) ? (SEP) : (modType.getUploadId())) + "\n" ); } if (empty) return new GIRIXCSVContainer(msSB.toString(), false); else return new GIRIXCSVContainer(msSB.toString(), true); } public static GIRIXCSVContainer parseEventSet(EventSet crcES) { boolean empty = true; // Build modifier set csv string StringBuilder esSB = new StringBuilder(10000); // First line of csv string: Column names String esString = "download_date" + SEP + "end_date" + SEP + "event_id" + SEP + "import_date" + SEP + "patient_num" + SEP + "sourcesystem_cd" + SEP + "start_date" + SEP + "update_date" + SEP + "upload_id" + SEP + "length_of_stay" + SEP + "location_path" + SEP + "active_status" + SEP + "location_cd" + SEP + "inout_cd" + "\n"; esSB.append(esString); for (EventType evType : crcES.getEvent()) { empty = false; // Order of params is undefined here. So put it all in a map first and append it afterwards in the right order to the string Map<String,String> m = new HashMap<String,String>(); for (ParamType pt : evType.getParam()) { m.put(pt.getColumn(), pt.getValue()); } esSB.append( ((evType.getDownloadDate() == null) ? (SEP) : (evType.getDownloadDate() + SEP)) + ((evType.getEndDate() == null) ? (SEP) : (evType.getEndDate() + SEP)) + ((evType.getEventId().getValue() == null) ? (SEP) : (escape(evType.getEventId().getValue()) + SEP)) + ((evType.getImportDate() == null) ? (SEP) : (evType.getImportDate() + SEP)) + ((evType.getPatientId().getValue() == null) ? (SEP) : (escape(evType.getPatientId().getValue()) + SEP)) + ((evType.getSourcesystemCd() == null) ? (SEP) : (escape(evType.getSourcesystemCd()) + SEP)) + ((evType.getStartDate() == null) ? (SEP) : (evType.getStartDate() + SEP)) + ((evType.getUpdateDate() == null) ? (SEP) : (evType.getUpdateDate() + SEP)) + ((evType.getUploadId() == null) ? (SEP) : (evType.getUploadId() + SEP)) + ((m.get("length_of_stay") == null) ? (SEP) : (m.get("length_of_stay") + SEP)) + ((m.get("location_path") == null) ? (SEP) : (escape(m.get("location_path")) + SEP)) + ((m.get("active_status_cd") == null) ? (SEP) : (escape(m.get("active_status_cd")) + SEP)) + ((m.get("location_cd") == null) ? (SEP) : (escape(m.get("location_cd")) + SEP)) + ((m.get("inout_cd") == null) ? (SEP) : (escape(m.get("inout_cd")))) + "\n" ); } if (empty) return new GIRIXCSVContainer(esSB.toString(), false); else return new GIRIXCSVContainer(esSB.toString(), true); } public static GIRIXCSVContainer parseObserverSet(ObserverSet crcObS) { boolean empty = true; // Build modifier set csv string StringBuilder obsSB = new StringBuilder(10000); // First line of csv string: Column names String esString = "download_date" + SEP + "import_date" + SEP + "name_char" + SEP + "observer_cd" + SEP + "observer_path" + SEP + "sourcesystem_cd" + SEP + "update_date" + SEP + "upload_id" + "\n"; obsSB.append(esString); for (ObserverType obsType : crcObS.getObserver()) { empty = false; obsSB.append( ((obsType.getDownloadDate() == null) ? (SEP) : (obsType.getDownloadDate() + SEP)) + ((obsType.getImportDate() == null) ? (SEP) : (obsType.getImportDate() + SEP)) + ((obsType.getNameChar() == null) ? (SEP) : (escape(obsType.getNameChar()) + SEP)) + ((obsType.getObserverCd() == null) ? (SEP) : (escape(obsType.getObserverCd()) + SEP)) + ((obsType.getObserverPath() == null) ? (SEP) : (escape(obsType.getObserverPath()) + SEP)) + ((obsType.getSourcesystemCd() == null) ? (SEP) : (escape(obsType.getSourcesystemCd()) + SEP)) + ((obsType.getUpdateDate() == null) ? (SEP) : (obsType.getUpdateDate() + SEP)) + ((obsType.getUploadId() == null) ? (SEP) : (obsType.getUploadId())) + "\n" ); } if (empty) return new GIRIXCSVContainer(obsSB.toString(), false); else return new GIRIXCSVContainer(obsSB.toString(), true); } // Helper function for correct handling of separator strings private static String escape(String s) { return "\"" + s.replace("\"", "\"\"") + "\""; } }