package edu.harvard.i2b2.crc.loader.util.csv; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.util.Date; import java.util.Hashtable; import javax.xml.bind.JAXBElement; import javax.xml.namespace.QName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import edu.harvard.i2b2.common.exception.I2B2Exception; import edu.harvard.i2b2.common.util.jaxb.DTOFactory; import edu.harvard.i2b2.common.util.jaxb.JAXBUtil; import edu.harvard.i2b2.common.util.jaxb.JAXBUtilException; import edu.harvard.i2b2.crc.loader.datavo.CRCLoaderJAXBUtil; import edu.harvard.i2b2.crc.datavo.pdo.ObservationSet; import edu.harvard.i2b2.crc.datavo.pdo.ObservationType; import edu.harvard.i2b2.crc.datavo.pdo.PatientIdType; /** * This program creates ObservationFact portion of PatientData XML file, from * given input csv file. * * Sample column heading : "Record_Id," "Report_Id," 1 "EMPI," 2 "MRN_Type," 3 * "MRN," "LMRNote_Date_Time," 5 "Status," "Author," "COD," "Institution," 9 * "Author_MRN," 10 "Subject," 11 "Classification," "Timestamp," 13 * "Software_Version" * * The above numbered columns are currently used. * * @author rk903 * */ public class ObservationFactCSV2XmlBuilder { private String inputFileName = null; private String outputXmlFileName = null; private static Log log = LogFactory .getLog(ObservationFactCSV2XmlBuilder.class); private static Hashtable csvHeaderMap = null; private Hashtable csvHeaderColumnPosition = null; private JAXBUtil jaxbUtil = null; private DTOFactory dtoFactory = new DTOFactory(); private edu.harvard.i2b2.crc.datavo.pdo.ObjectFactory pdoObjectFactory = new edu.harvard.i2b2.crc.datavo.pdo.ObjectFactory(); static { csvHeaderMap = new Hashtable(); csvHeaderMap.put("encounter_ide", new String[] { "encounter_id" }); csvHeaderMap.put("patient_ide", new String[] { "patient_id" }); // csvHeaderMap.put("encounter_ide_source",new // String[]{"MRN_TYPE","MRN_Type"}); csvHeaderMap.put("encounter_ide_source", new String[] { "sourcesytem_cd" }); csvHeaderMap.put("start_date", new String[] { "start_date" }); csvHeaderMap.put("sourcesystem_cd", new String[] { "sourcesystem_cd" }); csvHeaderMap.put("provider_id", new String[] { "provider_id" }); csvHeaderMap.put("concept_cd", new String[] { "concept_cd" }); csvHeaderMap.put("update_date", new String[] { "Timestamp", "Extraction_Timestamp" }); csvHeaderMap.put("Negation", new String[] { "Negation" }); csvHeaderMap.put("end_date", new String[] { "end_date" }); } /** * constructor * * @param inputFileName * @param outputXmlFileName */ public ObservationFactCSV2XmlBuilder(String inputFileName, String outputXmlFileName) { this.inputFileName = inputFileName; this.outputXmlFileName = outputXmlFileName; jaxbUtil = CRCLoaderJAXBUtil.getJAXBUtil(); } /** * Read csv file and create ObservationFact xml file. * * @throws I2B2Exception */ public void buildXml() throws I2B2Exception { BufferedReader inputReader = null; BufferedWriter observationFactWriter = null; log.info("Before building ObservationFact xml: " + outputXmlFileName + " for " + inputFileName); try { // read file and return buffered reader inputReader = new BufferedReader(new InputStreamReader( new FileInputStream(inputFileName))); observationFactWriter = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(outputXmlFileName))); writeHeader(observationFactWriter); observationFactWriter.write("<ns2:observation_set>\n"); CSVFileReader csvReader = new CSVFileReader(inputFileName, ',', '\"'); // read header and map header column position with element name java.util.Vector<String> headerFields = csvReader.readFields(); int headerFieldCount = headerFields.size(); csvHeaderColumnPosition = PatientDataXmlBuilder .getCsvHeaderColumnPosition(csvHeaderMap, headerFields); java.util.Vector<String> fields = null; int i = 0; ObservationSet observationSet = new ObservationSet(); while ((fields = csvReader.readFields()) != null) { // skip line which have less columns, compared to header column if (headerFieldCount > fields.size()) { continue; } String col[] = (String[]) fields.toArray(new String[] {}); observationSet.getObservation().add(getObservationFact(col)); i++; if (i % 100 == 0) { i = 0; StringWriter strWriter = new StringWriter(); try { jaxbUtil.marshaller(new JAXBElement(new QName("", "observation_set"), ObservationSet.class, observationSet), strWriter); } catch (JAXBUtilException e) { e.printStackTrace(); } String xml = strWriter.toString(); String observationStr = xml.substring(xml.indexOf('>', xml .indexOf("observation_set")) + 1, xml .indexOf("</observation_set")); observationFactWriter.write(observationStr); observationSet = new ObservationSet(); } } if (i > 0) { StringWriter strWriter = new StringWriter(); try { jaxbUtil.marshaller(new JAXBElement(new QName("", "observation_set"), ObservationSet.class, observationSet), strWriter); } catch (JAXBUtilException e) { e.printStackTrace(); } String xml = strWriter.toString(); String observationStr = xml.substring(xml.indexOf('>', xml .indexOf("observation_set")) + 1, xml .indexOf("</observation_set")); observationFactWriter.write(observationStr); } observationFactWriter.write("</ns2:observation_set>\n"); writeEndDocument(observationFactWriter); } catch (Exception ex) { ex.printStackTrace(); throw new I2B2Exception("Build XML failed ", ex); } finally { try { if (inputReader != null) { inputReader.close(); } if (observationFactWriter != null) { observationFactWriter.close(); } } catch (IOException closeEx) { closeEx.printStackTrace(); } } log.info("Finished building ObservationFact xml: " + outputXmlFileName + " for " + inputFileName); } private String getColumnValue(String col[], String elementName) { String elementValue = PatientDataXmlBuilder.getColumnValue( csvHeaderColumnPosition, col, elementName); return elementValue; } private String getNegationPrefix(String col[]) { // check if negation present in this file. if (csvHeaderColumnPosition.get("Negation") == null) { return ""; } String negation = getColumnValue(col, "Negation"); if (negation != null) { if (negation.equals("negated")) { return "-NEG"; } else { return ""; } } return ""; } private String getProviderId(String col[]) { if (csvHeaderColumnPosition.get("provider_id") == null) { return "@"; } else { return getColumnValue(col, "provider_id"); } } private String getEndDate(String col[]) { String endDate = getColumnValue(col, "end_date"); if (endDate == null) { endDate = ""; } return endDate; } private ObservationType getObservationFact(String col[]) { ObservationType observation = new ObservationType(); ObservationType.EventId eventId = new ObservationType.EventId(); eventId.setSource(getColumnValue(col, "sourcesystem_cd")); eventId.setValue(getColumnValue(col, "encounter_ide")); observation.setEventId(eventId); PatientIdType patientId = new PatientIdType(); patientId.setSource("EMPI"); patientId.setValue(getColumnValue(col, "patient_ide")); observation.setPatientId(patientId); ObservationType.ConceptCd conceptCd = new ObservationType.ConceptCd(); conceptCd.setValue(getColumnValue(col, "concept_cd").toLowerCase()); observation.setConceptCd(conceptCd); ObservationType.ObserverCd observerCd = new ObservationType.ObserverCd(); observerCd.setValue(getProviderId(col)); observation.setObserverCd(observerCd); Date date = PatientDataXmlBuilder.getDate(getColumnValue(col, "start_date")); observation.setStartDate(dtoFactory.getXMLGregorianCalendar(date .getTime())); date = PatientDataXmlBuilder.getDate(getEndDate(col)); observation.setEndDate((date != null) ? dtoFactory .getXMLGregorianCalendar(date.getTime()) : null); return observation; } private void writeHeader(BufferedWriter observationFactWriter) throws IOException { observationFactWriter.write(PatientDataXmlBuilder.getDocumentHeader()); } private void writeEndDocument(BufferedWriter observationFactWriter) throws IOException { observationFactWriter.write(PatientDataXmlBuilder.getEndDocument()); } }