/* * Copyright (c) 2012. The Genome Analysis Centre, Norwich, UK * MISO project contacts: Robert Davey, Mario Caccamo @ TGAC * ********************************************************************* * * This file is part of MISO. * * MISO is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MISO is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with MISO. If not, see <http://www.gnu.org/licenses/>. * * ********************************************************************* */ package uk.ac.bbsrc.tgac.miso.notification.consumer.service.mechanism; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.integration.Message; import org.springframework.util.Assert; import uk.ac.bbsrc.tgac.miso.core.data.*; import uk.ac.bbsrc.tgac.miso.core.data.impl.RunImpl; import uk.ac.bbsrc.tgac.miso.core.data.impl.SequencerPartitionContainerImpl; import uk.ac.bbsrc.tgac.miso.core.data.impl.pacbio.PacBioRun; import uk.ac.bbsrc.tgac.miso.core.data.impl.pacbio.PacBioStatus; import uk.ac.bbsrc.tgac.miso.core.data.type.HealthType; import uk.ac.bbsrc.tgac.miso.core.data.type.PlatformType; import uk.ac.bbsrc.tgac.miso.core.exception.InterrogationException; import uk.ac.bbsrc.tgac.miso.core.manager.RequestManager; import uk.ac.bbsrc.tgac.miso.core.service.integration.mechanism.NotificationMessageConsumerMechanism; import uk.ac.bbsrc.tgac.miso.integration.util.IntegrationUtils; import uk.ac.bbsrc.tgac.miso.tools.run.RunFolderConstants; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * uk.ac.bbsrc.tgac.miso.core.service.integration.mechanism.impl * <p/> * Info * * @author Rob Davey * @date 03/02/12 * @since 0.1.5 */ public class PacBioNotificationMessageConsumerMechanism implements NotificationMessageConsumerMechanism<Message<Map<String, List<String>>>, Set<Run>> { protected static final Logger log = LoggerFactory.getLogger(PacBioNotificationMessageConsumerMechanism.class); public boolean attemptRunPopulation = true; public void setAttemptRunPopulation(boolean attemptRunPopulation) { this.attemptRunPopulation = attemptRunPopulation; } private final String runDirRegex = RunFolderConstants.PACBIO_FOLDER_NAME_GROUP_CAPTURE_REGEX; private final Pattern p = Pattern.compile(runDirRegex); @Override public Set<Run> consume(Message<Map<String, List<String>>> message) throws InterrogationException { RequestManager requestManager = message.getHeaders().get("handler", RequestManager.class); Assert.notNull(requestManager, "Cannot consume MISO notification messages without a RequestManager."); Map<String, List<String>> statuses = message.getPayload(); Set<Run> output = new HashSet<Run>(); for (String key : statuses.keySet()) { HealthType ht = HealthType.valueOf(key); JSONArray runs = (JSONArray) JSONArray.fromObject(statuses.get(key)).get(0); Map<String, Run> map = processRunJSON(ht, runs, requestManager); for (Run r : map.values()) { output.add(r); } } return output; } private Map<String, Run> processRunJSON(HealthType ht, JSONArray runs, RequestManager requestManager) { Map<String, Run> updatedRuns = new HashMap<String, Run>(); List<Run> runsToSave = new ArrayList<Run>(); DateFormat gsLogDateFormat = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy"); DateFormat startDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); for (JSONObject run : (Iterable<JSONObject>)runs) { String runName = run.getString("runName"); log.info("Processing " + runName); String status = ""; if (run.has("cells")) { JSONArray cells = run.getJSONArray("cells"); for (JSONObject cell : (Iterable<JSONObject>)cells) { if (cell.has("cellStatus")) { try { String s = new String(IntegrationUtils.decompress(URLDecoder.decode(cell.getString("cellStatus"), "UTF-8").getBytes())); status += s + "\n\n"; } catch (UnsupportedEncodingException e) { log.error("Cannot decode status xml: " + e.getMessage()); e.printStackTrace(); } catch (IOException e) { log.error("Cannot decompress and decode incoming status: " + e.getMessage()); e.printStackTrace(); } } } } if (!"".equals(status)) { try { //String runLog = run.getString("status"); if (!status.startsWith("ERROR")) { Status is = new PacBioStatus(status); is.setHealth(ht); is.setRunName(runName); Run r = null; Matcher m = p.matcher(runName); if (m.matches()) { try { r = requestManager.getRunByAlias(runName); } catch(IOException ioe) { log.warn("Cannot find run by this alias. This usually means the run hasn't been previously imported. If attemptRunPopulation is false, processing will not take place for this run!"); } } if (attemptRunPopulation) { if (r == null) { log.info("\\_ Saving new run and status: " + is.getRunName()); r = new PacBioRun(status); r.setAlias(run.getString("runName")); r.setDescription(m.group(2)); r.setPairedEnd(false); if (run.has("fullPath")) { r.setFilePath(run.getString("fullPath")); } SequencerReference sr = null; if (run.has("sequencerName")) { sr = requestManager.getSequencerReferenceByName(run.getString("sequencerName")); } if (sr != null) { if (run.has("startDate") && !"".equals(run.getString("startDate"))) { try { r.getStatus().setStartDate(startDateFormat.parse(run.getString("startDate"))); } catch (ParseException e) { log.error(e.getMessage()); e.printStackTrace(); } } if (run.has("completionDate") && !"".equals(run.getString("completionDate"))) { try { r.getStatus().setCompletionDate(startDateFormat.parse(run.getString("completionDate"))); } catch (ParseException e) { log.error(e.getMessage()); e.printStackTrace(); } } is.setInstrumentName(sr.getName()); r.setStatus(is); r.setSequencerReference(sr); } else { log.error("\\_ Cannot save " + is.getRunName() + ": no sequencer reference available."); } } else { log.info("\\_ Updating existing run and status: " + is.getRunName()); r.setAlias(runName); r.setPlatformType(PlatformType.PACBIO); r.setDescription(m.group(2)); r.setPairedEnd(false); if (r.getSequencerReference() == null) { SequencerReference sr = null; if (run.has("sequencerName")) { sr = requestManager.getSequencerReferenceByName(run.getString("sequencerName")); } if (sr != null) { r.getStatus().setInstrumentName(sr.getName()); r.setSequencerReference(sr); } } if (r.getSequencerReference() != null) { if (run.has("startDate") && !"".equals(run.getString("startDate"))) { try { r.getStatus().setStartDate(startDateFormat.parse(run.getString("startDate"))); } catch (ParseException e) { log.error(e.getMessage()); e.printStackTrace(); } } if (run.has("completionDate") && !"".equals(run.getString("completionDate"))) { try { r.getStatus().setCompletionDate(startDateFormat.parse(run.getString("completionDate"))); } catch (ParseException e) { log.error(e.getMessage()); e.printStackTrace(); } } //update path if changed if (run.has("fullPath") && !"".equals(run.getString("fullPath")) && r.getFilePath() != null && !"".equals(r.getFilePath())) { if (!run.getString("fullPath").equals(r.getFilePath())) { log.info("Updating run file path:" + r.getFilePath() + " -> " + run.getString("fullPath")); r.setFilePath(run.getString("fullPath")); } } // update status if run isn't completed or failed if (!r.getStatus().getHealth().equals(HealthType.Completed) && !r.getStatus().getHealth().equals(HealthType.Failed)) { log.info("Saving previously saved status: " + is.getRunName() + " (" + r.getStatus().getHealth().getKey() + " -> " + is.getHealth().getKey() + ")"); //if (!r.getStatus().getHealth().equals(is.getHealth())) { r.setStatus(is); //} //requestManager.saveStatus(is); } } } if (r.getSequencerReference() != null) { List<SequencerPartitionContainer<SequencerPoolPartition>> fs = ((PacBioRun)r).getSequencerPartitionContainers(); if (fs.isEmpty()) { if (run.has("plateId") && !"".equals(run.getString("plateId"))) { Collection<SequencerPartitionContainer<SequencerPoolPartition>> pfs = requestManager.listSequencerPartitionContainersByBarcode(run.getString("plateId")); if (!pfs.isEmpty()) { if (pfs.size() == 1) { SequencerPartitionContainer<SequencerPoolPartition> lf = new ArrayList<SequencerPartitionContainer<SequencerPoolPartition>>(pfs).get(0); if (lf.getSecurityProfile() != null && r.getSecurityProfile() == null) { r.setSecurityProfile(lf.getSecurityProfile()); } if (lf.getPlatform() == null && r.getSequencerReference().getPlatform() != null) { lf.setPlatform(r.getSequencerReference().getPlatform()); } // else { // lf.setPlatformType(PlatformType.PACBIO); // } JSONArray cells = run.getJSONArray("cells"); if (cells.size() > lf.getPartitions().size()) { int numNewcells = cells.size()-lf.getPartitions().size(); lf.setPartitionLimit(cells.size()); for (int i=0; i<numNewcells; i++){ lf.addNewPartition(); } } ((RunImpl)r).addSequencerPartitionContainer(lf); } else { //more than one flowcell hit to this barcode log.warn(r.getAlias() + ":: More than one container has this barcode. Cannot automatically link to a pre-existing barcode."); } } else { if (run.has("cells")) { JSONArray cells = run.getJSONArray("cells"); SequencerPartitionContainer f = new SequencerPartitionContainerImpl(); f.setPartitionLimit(cells.size()); f.initEmptyPartitions(); if (run.has("plateId") && !"".equals(run.getString("plateId"))) { f.setIdentificationBarcode(run.getString("plateId")); } if (f.getPlatform() == null && r.getSequencerReference().getPlatform() != null) { f.setPlatform(r.getSequencerReference().getPlatform()); } // else { // f.setPlatformType(PlatformType.PACBIO); // } f.setRun(r); log.info("\\_ Created new container with "+f.getPartitions().size()+" partitions"); long flowId = requestManager.saveSequencerPartitionContainer(f); f.setId(flowId); ((RunImpl)r).addSequencerPartitionContainer(f); //TODO match up samples to libraries and pools? Or match up pool numbers /* for (JSONObject obj : (Iterable<JSONObject>)cells) { int cellindex = obj.getInt("index"); String sample = obj.getString("sample"); SequencerPoolPartition p = f.getPartitionAt(cellindex); if (p.getPool() == null) { Pool pool = new PoolImpl(); } } */ } } } } else { SequencerPartitionContainer f = fs.iterator().next(); f.setSecurityProfile(r.getSecurityProfile()); if (f.getPlatform() == null && r.getSequencerReference().getPlatform() != null) { f.setPlatform(r.getSequencerReference().getPlatform()); } // else { // f.setPlatformType(PlatformType.PACBIO); // } if (f.getIdentificationBarcode() == null || "".equals(f.getIdentificationBarcode())) { if (run.has("plateId") && !"".equals(run.getString("plateId"))) { f.setIdentificationBarcode(run.getString("plateId")); requestManager.saveSequencerPartitionContainer(f); } } JSONArray cells = run.getJSONArray("cells"); if (cells.size() > f.getPartitions().size()) { int numNewcells = cells.size()-f.getPartitions().size(); f.setPartitionLimit(cells.size()); for (int i=0; i<numNewcells; i++){ f.addNewPartition(); } } } updatedRuns.put(r.getAlias(), r); runsToSave.add(r); } } else { log.warn("\\_ Run not saved. Saving status: " + is.getRunName()); requestManager.saveStatus(is); } } } catch (IOException e) { log.error(e.getMessage()); e.printStackTrace(); } } else { log.error("No notification status available for " + runName); } } try { if (runsToSave.size() > 0) { int[] saved = requestManager.saveRuns(runsToSave); log.info("Batch saved " + saved.length + " / "+ runs.size() + " runs"); } } catch (IOException e) { log.error("Couldn't save run batch: " + e.getMessage()); e.printStackTrace(); } return updatedRuns; } }