package org.hadatac.data.loader.ccsv; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.math.BigDecimal; import java.util.Date; import java.util.Iterator; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVRecord; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.hadatac.data.loader.util.FileFactory; import org.hadatac.data.model.ParsingResult; import org.hadatac.entity.pojo.DataAcquisition; import org.hadatac.entity.pojo.Dataset; import org.hadatac.entity.pojo.Deployment; import org.hadatac.entity.pojo.HADataC; import org.hadatac.entity.pojo.Measurement; import org.hadatac.entity.pojo.MeasurementType; import org.hadatac.entity.pojo.Subject; import org.hadatac.metadata.loader.ValueCellProcessing; import org.hadatac.utils.Collections; import org.hadatac.utils.Feedback; import play.Play; public class Parser { private FileFactory files; private HADataC hadatacCcsv; private HADataC hadatacKb; public Parser() { hadatacCcsv = null; hadatacKb = null; } public ParsingResult validate(int mode, FileFactory files) throws IOException { ParsingResult result = null; String message = ""; String preamble; this.files = files; files.openFile("ccsv", "r"); preamble = getPreamble(); Model model = ModelFactory.createDefaultModel(); model.read(new ByteArrayInputStream(preamble.getBytes()), null, "TTL"); // Verify if model is successfully loaded if (model.isEmpty()) { message += Feedback.println(mode, "[ERROR] Preamble not a well-formed Turtle."); System.out.println("[ERROR] Preamble not a well-formed Turtle."); } else { message += Feedback.println(mode, "[OK] Preamble a well-formed Turtle."); System.out.println("[OK] Preamble a well-formed Turtle."); } result = loadFromPreamble(mode, model); if (result.getStatus() == 0) { message += result.getMessage(); result = loadFromKb(mode); message += result.getMessage(); } files.closeFile("ccsv", "r"); return new ParsingResult(result.getStatus(), message); } public ParsingResult index(int mode) { System.out.println("indexing..."); DataAcquisition dataAcquisition = DataAcquisition.create(hadatacCcsv, hadatacKb); if (hadatacCcsv.getDataAcquisition().getStatus() > 0) { hadatacKb.getDataAcquisition().merge(dataAcquisition); } else { hadatacKb.setDataAcquisition(dataAcquisition); } hadatacKb.getDataAcquisition().save(); ParsingResult result = indexMeasurements(); return new ParsingResult(result.getStatus(), result.getMessage()); } private ParsingResult indexMeasurements(){ System.out.println("indexMeasurements()..."); String message = ""; try { files.openFile("csv", "r"); } catch (IOException e) { e.printStackTrace(); message += "[ERROR] Fail to open the csv file\n"; return new ParsingResult(1, message); } Iterable<CSVRecord> records = null; try { records = CSVFormat.DEFAULT.withHeader().parse(files.getReader("csv")); } catch (IOException e) { e.printStackTrace(); message += "[ERROR] Fail to parse header of the csv file\n"; return new ParsingResult(1, message); } int total_count = 0; int batch_size = 10000; int nTimeStampCol = -1; int nTimeInstantCol = -1; int nIdCol = -1; for(MeasurementType mt : hadatacKb.getDataset().getMeasurementTypes()){ if(mt.getTimestampColumn() > -1){ nTimeStampCol = mt.getTimestampColumn(); } if(mt.getTimeInstantColumn() > -1){ nTimeInstantCol = mt.getTimeInstantColumn(); } if(mt.getIdColumn() > -1){ nIdCol = mt.getIdColumn(); } } boolean isSubjectPlatform = Subject.isPlatform(hadatacKb.getDeployment().getPlatform().getUri()); SolrClient solr = new HttpSolrClient(Play.application().configuration(). getString("hadatac.solr.data") + Collections.DATA_ACQUISITION); for (CSVRecord record : records) { Iterator<MeasurementType> iter = hadatacKb.getDataset().getMeasurementTypes().iterator(); while (iter.hasNext()) { MeasurementType measurementType = iter.next(); if (measurementType.getTimestampColumn() > -1) { continue; } if (measurementType.getTimeInstantColumn() > -1) { continue; } if (measurementType.getIdColumn() > -1) { continue; } Measurement measurement = new Measurement(); if(record.get(measurementType.getValueColumn() - 1).isEmpty()){ continue; } else { String originalValue = record.get(measurementType.getValueColumn() - 1); String codeValue = Subject.findCodeValue( measurementType.getCharacteristicUri(), originalValue); if (null == codeValue) { measurement.setValue(originalValue); } else { measurement.setValue(codeValue); } } if(nTimeStampCol > -1){ String sTime = record.get(nTimeStampCol - 1); int timeStamp = new BigDecimal(sTime).intValue(); Date time = new Date((long)timeStamp * 1000); measurement.setTimestamp(time.toString()); } else if(nTimeInstantCol > -1){ measurement.setTimestamp(record.get(nTimeInstantCol - 1)); } else { measurement.setTimestamp(""); } measurement.setStudyUri(ValueCellProcessing.replaceNameSpaceEx(hadatacKb.getDataAcquisition().getStudyUri())); if(nIdCol > -1){ if (measurementType.getEntityUri().equals(ValueCellProcessing.replacePrefixEx("sio:Human"))) { Subject subject = Subject.findSubject(measurement.getStudyUri(), record.get(nIdCol - 1)); if (null != subject) { String subjectUri = subject.getUri(); subjectUri = Subject.checkObjectUri(subjectUri, measurementType.getCharacteristicUri()); measurement.setObjectUri(subjectUri); } else { measurement.setObjectUri(""); } } else if (measurementType.getEntityUri().equals(ValueCellProcessing.replacePrefixEx("sio:Sample"))) { String sampleUri = Subject.findSampleUri(measurement.getStudyUri(), record.get(nIdCol - 1)); if (sampleUri != null) { measurement.setObjectUri(sampleUri); } else { measurement.setObjectUri(""); } } } else { if(isSubjectPlatform) { measurement.setObjectUri(hadatacKb.getDeployment().getPlatform().getUri()); } else { measurement.setObjectUri(""); } } measurement.setUri(ValueCellProcessing.replacePrefixEx(measurement.getStudyUri()) + "/" + ValueCellProcessing.replaceNameSpaceEx(hadatacKb.getDataAcquisition().getUri()).split(":")[1] + "/" + hadatacCcsv.getDataset().getLocalName() + "/" + measurementType.getLocalName() + "-" + total_count); measurement.setOwnerUri(hadatacKb.getDataAcquisition().getOwnerUri()); measurement.setAcquisitionUri(hadatacKb.getDataAcquisition().getUri()); measurement.setUnit(measurementType.getUnitLabel()); measurement.setUnitUri(measurementType.getUnitUri()); measurement.setCharacteristic(measurementType.getCharacteristicLabel()); measurement.setCharacteristicUri(measurementType.getCharacteristicUri()); measurement.setInstrumentModel(hadatacKb.getDeployment().getInstrument().getLabel()); measurement.setInstrumentUri(hadatacKb.getDeployment().getInstrument().getUri()); measurement.setPlatformName(hadatacKb.getDeployment().getPlatform().getLabel()); measurement.setPlatformUri(hadatacKb.getDeployment().getPlatform().getUri()); measurement.setEntity(measurementType.getEntityLabel()); measurement.setEntityUri(measurementType.getEntityUri()); measurement.setDatasetUri(hadatacCcsv.getDatasetKbUri()); try { solr.addBean(measurement); } catch (IOException | SolrServerException e) { System.out.println("[ERROR] SolrClient.addBean - e.Message: " + e.getMessage()); } if((++total_count) % batch_size == 0){ try { System.out.println("solr.commit()..."); solr.commit(); System.out.println(String.format("Committed %s measurements!", batch_size)); } catch (IOException | SolrServerException e) { System.out.println("[ERROR] SolrClient.commit - e.Message: " + e.getMessage()); message += "[ERROR] Fail to commit to solr\n"; try { solr.close(); } catch (IOException e1) { System.out.println("[ERROR] SolrClient.close - e.Message: " + e1.getMessage()); message += "[ERROR] Fail to close solr\n"; } return new ParsingResult(1, message); } } } } try { try { System.out.println("solr.commit()..."); solr.commit(); System.out.println(String.format("Committed %s measurements!", total_count % batch_size)); } catch (IOException | SolrServerException e) { solr.close(); System.out.println("[ERROR] SolrClient.commit - e.Message: " + e.getMessage()); message += "[ERROR] Fail to commit to solr\n"; return new ParsingResult(1, message); } files.closeFile("csv", "r"); } catch (IOException e) { e.printStackTrace(); message += "[ERROR] Fail to close the csv file\n"; return new ParsingResult(1, message); } hadatacKb.getDataAcquisition().addNumberDataPoints(total_count); hadatacKb.getDataAcquisition().save(); System.out.println("Finished indexMeasurements()"); try { solr.close(); } catch (IOException e) { System.out.println("[ERROR] SolrClient.close - e.Message: " + e.getMessage()); message += "[ERROR] Fail to close solr\n"; } return new ParsingResult(0, message); } private ParsingResult loadFromKb(int mode) { System.out.println("loadFromKb is called!"); String message = ""; hadatacKb = HADataC.find(); hadatacKb.setDataAcquisition(DataAcquisition.find(hadatacCcsv)); if (hadatacCcsv.getDataAcquisition().getStatus() > 0) { if (hadatacKb.getDataAcquisition() == null) { message += Feedback.println(mode, "[ERROR] Data Acquisition not found in the knowledge base."); return new ParsingResult(1, message); } else { message += Feedback.println(mode, "[OK] Data Acquisition found on the knowledge base."); } } else { if (hadatacKb.getDataAcquisition() != null) { message += Feedback.println(mode, "[ERROR] Data Acquisition already exists in the knowledge base."); return new ParsingResult(1, message); } else { message += Feedback.println(mode, "[OK] Data Acquisition does not exist in the knowledge base."); } } // dataset if (hadatacCcsv.getDataAcquisition().getStatus() > 0) { if (hadatacKb.getDataAcquisition().containsDataset(hadatacCcsv.getDatasetKbUri())) { message += Feedback.println(mode, "[ERROR] Dataset was already processed."); } else { message += Feedback.println(mode, "[OK] Dataset is not already processed."); } } else { message += Feedback.println(mode, "[OK] Dataset is not already processed. This is a new Data Acquisition."); } Dataset dataset = new Dataset(); dataset.setUri(hadatacCcsv.getDatasetKbUri()); hadatacKb.setDataset(dataset); // deployment if (hadatacCcsv.getDataAcquisition().getStatus() > 0) { hadatacKb.setDeployment(Deployment.findFromDataAcquisition(hadatacKb)); } else { hadatacKb.setDeployment(Deployment.findFromPreamble(hadatacCcsv)); } if (hadatacKb.getDeployment() == null) { message += Feedback.println(mode, "[ERROR] Deployment is not defined in the knowledge base."); } else { message += Feedback.println(mode, "[OK] Deployment is defined in the knowledge base: <" + hadatacKb.getDeployment().getLocalName() + ">"); if (hadatacKb.getDeployment().getEndedAt() == null) { message += Feedback.println(mode, "[ERROR] Deployment is already finished at: " + hadatacKb.getDeployment().getEndedAt() + ""); } else { message += Feedback.println(mode, "[OK] Deployment is still open."); } } // measurement types hadatacKb.getDataset().setMeasurementTypes(MeasurementType.find(hadatacCcsv)); return new ParsingResult(0, message); } private ParsingResult loadFromPreamble(int mode, Model model) { String message = ""; // load hadatac hadatacCcsv = HADataC.find(model); if (hadatacCcsv == null) { System.out.println("hadatacCcsv == null"); message += Feedback.println(mode, "[ERROR] Preamble does not contain a single hadatac:KnowledgeBase."); return new ParsingResult(1, message); } else { System.out.println("[OK] Preamble contains a single hadatac:KnowledgeBase: <" + hadatacCcsv.getLocalName() + ">"); message += Feedback.println(mode, "[OK] Preamble contains a single hadatac:KnowledgeBase: <" + hadatacCcsv.getLocalName() + ">"); } // load dataset hadatacCcsv.setDataset(Dataset.find(model)); if (hadatacCcsv.getDataset() == null) { message += Feedback.println(mode, "[ERROR] Preamble does not contain a single vstoi:Dataset."); return new ParsingResult(1, message); } else { System.out.println("[OK] Preamble contains a single vstoi:Dataset: <" + hadatacCcsv.getDataset().getLocalName() + ">"); message += Feedback.println(mode, "[OK] Preamble contains a single vstoi:Dataset: <" + hadatacCcsv.getDataset().getLocalName() + ">"); } // load datacollection hadatacCcsv.setDataAcquisition(DataAcquisition.find(model, hadatacCcsv.getDataset()));; if (hadatacCcsv.getDataAcquisition() == null) { message += Feedback.println(mode, "[ERROR] Preamble does not contain a single hasneto:DataAcquisition."); return new ParsingResult(1, message); } else { System.out.println("[OK] Preamble contains a single hasneto:DataAcquisition: <" + hadatacCcsv.getDataAcquisition().getLocalName() + ">"); message += Feedback.println(mode, "[OK] Preamble contains a single hasneto:DataAcquisition: <" + hadatacCcsv.getDataAcquisition().getLocalName() + ">"); } // deployment if (hadatacCcsv.getDataAcquisition().getStatus() == 0) { System.out.println("Deployment find"); hadatacCcsv.setDeployment(Deployment.find(model, hadatacCcsv.getDataAcquisition())); if (hadatacCcsv.getDeployment() == null) { message += Feedback.println(mode, "[ERROR] This hasneto:DataAcquisition requires a vstoi:Deployment that is not specified."); return new ParsingResult(1, message); } else { message += Feedback.println(mode, "[OK] This hasneto:DataAcquisition requires a vstoi:Deployment that is specified: <" + hadatacCcsv.getDeployment().getLocalName() + ">"); } } else { message += Feedback.println(mode, "[OK] This hasneto:DataAcquisition does not require a vstoi:Deployment in the preamble."); } // load measurement types hadatacCcsv.getDataset().setMeasurementTypes(MeasurementType.find(model, hadatacCcsv.getDataset())); if (hadatacCcsv.getDataset().getMeasurementTypes().isEmpty()) { System.out.println("Measurement is empty"); message += Feedback.println(mode, "[ERROR] Preamble does not contain any well described measurement types."); return new ParsingResult(1, message); } else { message += Feedback.print(mode, "[OK] Preamble contains the following well described measurement types: "); Iterator<MeasurementType> i = hadatacCcsv.getDataset().getMeasurementTypes().iterator(); while (i.hasNext()) { message += Feedback.print(mode, "<" + i.next().getLocalName() + "> "); } message += Feedback.println(mode, ""); } return new ParsingResult(0, message); } private String getPreamble() throws IOException { BufferedReader br; String line; StringBuilder preamble = new StringBuilder(); boolean inPreamble = false; boolean inCsv = false; files.openFile("csv", "w"); br = files.getReader("ccsv"); while ((line = br.readLine()) != null) { if (inCsv) { files.writeln("csv", line); } if (line.contains("== END-PREAMBLE ==")) { inPreamble = false; inCsv = true; } if (inPreamble) { preamble.append(line + "\n"); } if (line.contains("== START-PREAMBLE ==")) { inPreamble = true; } } files.closeFile("csv", "w"); return preamble.toString(); } }