package gov.nih.ncgc.bard.capextract.handler; import gov.nih.ncgc.bard.capextract.CAPAnnotation; import gov.nih.ncgc.bard.capextract.CAPConstants; import gov.nih.ncgc.bard.capextract.CAPUtil; import gov.nih.ncgc.bard.capextract.ICapResourceHandler; import gov.nih.ncgc.bard.capextract.ScoreHandler; import gov.nih.ncgc.bard.capextract.jaxb.AbstractContextItemType; import gov.nih.ncgc.bard.capextract.jaxb.ContextItemType; import gov.nih.ncgc.bard.capextract.jaxb.ContextType; import gov.nih.ncgc.bard.capextract.jaxb.ContextType.ContextItems; import gov.nih.ncgc.bard.capextract.jaxb.Contexts; import gov.nih.ncgc.bard.capextract.jaxb.Experiment; import gov.nih.ncgc.bard.capextract.jaxb.ExternalSystem; import gov.nih.ncgc.bard.capextract.jaxb.Link; import gov.nih.ncgc.bard.search.SearchUtil; import gov.nih.ncgc.bard.tools.Util; import java.io.IOException; import java.math.BigInteger; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Vector; import org.apache.solr.client.solrj.SolrServerException; /** * Process CAP <code>Experiment</code> elements. * <p/> * Currently, the class focuses on getting the lookup from PubChem AID to CAP Experiment and Assay IDs. * * @author Rajarshi Guha */ public class ExperimentHandler extends CapResourceHandler implements ICapResourceHandler { static String PUBCHEM = "PubChem,NIH,http://pubchem.ncbi.nlm.nih.gov/assay/assay.cgi?"; private HashMap<BigInteger, String> _CAP_ExptID_PubChemAID_lookup = new HashMap<BigInteger, String>(); private HashMap<BigInteger, Integer> _CAP_ExptID_AssayID_lookup = new HashMap<BigInteger, Integer>(); private HashMap<String, String> _CAP_ExptID_ProjID_lookup = new HashMap<String, String>(); private Vector<String[]> _CAP_Proj_Expt_link = new Vector<String[]>(); private int bardExptId; private int pubchemAid; public ExperimentHandler() { super(); } public int getBardExptId() { return bardExptId; } public int getPubchemAid() { return pubchemAid; } /** * Process a CAP entity that is located at some URL. * * @param url The URL from which to retrieve the entity fron * @param resource The CAP resource that is meant to be processed. An implementing class * can choose to proceed or not based on this parameter. */ public int process(String url, CAPConstants.CapResource resource) throws IOException { //set this to -1 initially. The handler persists and may call on the captured bardExptId bardExptId = -1; if (resource != CAPConstants.CapResource.EXPERIMENT) return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED; Experiment expt = getResponse(url, resource); if (expt == null) return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED; BigInteger exptID = expt.getExperimentId(); BigInteger confLevel = expt.getConfidenceLevel(); String status = expt.getStatus(); String extractionStatus = expt.getReadyForExtraction(); log.info("Processing CAP experiment " + exptID + " " + url); log.info("Cap experiment = "+exptID + " status ="+status); log.info("Cap experiment = "+exptID + " extraction status ="+extractionStatus); //first check if it's approved if(!"Approved".equals(status) && !"Retired".equals(status) && !"Provisional".equals(status)) { log.warn("Unable to process "+ status +" experiments (aborting experiment load), experiment:" + url + " " + status); setExtractionStatus("Failed", url, resource); return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED; } if("Retired".equals(status)) { log.info("RETIRED EXPERIMENT! CAP Experiment " + exptID + " has Retired status. Initiating Retirement."); this.retireExperiment(exptID.longValue()); return CAPConstants.CAP_EXTRACT_LOAD_STATUS_COMPLETE; } //check the EXTRACTION status, if can't determine readyForExtraction, or it's 'Not Ready', don't load. if(extractionStatus == null || extractionStatus.equals("Not Ready")) { log.warn("Aborting Load!!! Cap experiment = "+exptID + " extraction status ="+extractionStatus); return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED; } ExternalReferenceHandler extrefHandler = new ExternalReferenceHandler(); ExternalSystemHandler extsysHandler = new ExternalSystemHandler(); AssayHandler assayHandler = new AssayHandler(); int bardAssayId = -1; // first lets go through all the links and look for an assay id // given an assay id, check to see if we already loaded it. If // so, carry on with the experiment. Otherwise first load the // assay and then get the BARD assay id for (Link link : expt.getLink()) { if (!link.getType().equals(CAPConstants.CapResource.ASSAY.getMimeType())) continue; String capAssayId = Util.getEntityIdFromUrl(link.getHref()); try { Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl()); PreparedStatement pst = conn.prepareStatement("select bard_assay_id, cap_assay_id from bard_assay where cap_assay_id = ?"); pst.setLong(1, Long.parseLong(capAssayId)); ResultSet rs = pst.executeQuery(); while (rs.next()) bardAssayId = rs.getInt("bard_assay_id"); rs.close(); pst.close(); conn.close(); if (bardAssayId == -1) { assayHandler.process(link.getHref(), CAPConstants.CapResource.ASSAY); bardAssayId = assayHandler.getBardAssayId(); if (bardAssayId == -1) { log.error("Invalid (missing referenced assay) bardAssayId even after inserting CAP assay id " + capAssayId + ". ABORTING EXPERIMENT LOAD"); bardExptId = -1; return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED; } } _CAP_ExptID_AssayID_lookup.put(exptID, bardAssayId); } catch (SQLException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } for (Link link : expt.getLink()) { // <link rel='related' title='Link to Assay' type='application/vnd.bard.cap+xml;type=assay' href='https://bard.broadinstitute.org/dataExport/api/assays/441' /> if (!link.getType().equals(CAPConstants.CapResource.EXTREF.getMimeType())) continue; // get a Pubchem AID extrefHandler.process(link.getHref(), CAPConstants.CapResource.EXTREF); String externalAssayRef = extrefHandler.getExternalAssayRef(); String aid = null; if (externalAssayRef != null && externalAssayRef.startsWith("aid=")) { aid = externalAssayRef.split("=")[1]; } for (Link refLink : extrefHandler.getLinks()) { if (refLink.getType().equals(CAPConstants.CapResource.EXTSYS.getMimeType())) { extsysHandler.process(refLink.getHref(), CAPConstants.CapResource.EXTSYS); ExternalSystem extsys = extsysHandler.getExtsys(); String source = extsys.getName() + "," + extsys.getOwner() + "," + extsys.getSystemUrl(); if (PUBCHEM.equals(source)) { if (_CAP_ExptID_PubChemAID_lookup.containsValue(aid)) { log.error("The same AID maps to multple experiments: " + aid + " eid:" + exptID + " eid:" + _CAP_ExptID_PubChemAID_lookup.get(exptID)); pubchemAid = -1; } else { _CAP_ExptID_PubChemAID_lookup.put(exptID, aid); pubchemAid = Integer.parseInt(aid); } } } } } List<CAPAnnotation> annos = new ArrayList<CAPAnnotation>(); Contexts contexts = expt.getContexts(); if (contexts != null) { for (ContextType context : contexts.getContext()) { BigInteger contextId = context.getId(); String contextName = context.getContextName(); String contextGroup = context.getContextGroup(); ContextItems contextItems = context.getContextItems(); if(contextItems != null) { for (ContextItemType contextItem : contextItems.getContextItem()) { String valueDisplay = contextItem.getValueDisplay(); int displayOrder = contextItem.getDisplayOrder(); // dict id for the annotation key String key = null; AbstractContextItemType.AttributeId attr = contextItem.getAttributeId(); if (attr != null) key = Util.getEntityIdFromUrl(attr.getLink().getHref()); // dict id for the annotation value String value = null; AbstractContextItemType.ValueId vc = contextItem.getValueId(); if (vc != null) value = Util.getEntityIdFromUrl(vc.getLink().getHref()); annos.add(new CAPAnnotation(contextId.intValue(), expt.getExperimentId().intValue(), valueDisplay, contextName, key, value, null, "cap-context", null, displayOrder, "experiment", null, contextGroup)); } } } } // lets do a first check to see if we have this experiment already // 07.17.2013 - moved this block after handling assays in case an assay links to the experment // and loads it in the code above. We need to check for this experiment after handling linked assays. int localBardExptId = -1; boolean doUpdate = false; try { Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl()); Statement query = conn.createStatement(); query.execute("select bard_expt_id, pubchem_aid from bard_experiment where cap_expt_id=" + expt.getExperimentId()); ResultSet rs = query.getResultSet(); while (rs.next()) { localBardExptId = rs.getInt(1); pubchemAid = rs.getInt(2); } rs.close(); query.close(); conn.close(); bardExptId = localBardExptId; if (bardExptId != -1) doUpdate = true; } catch (SQLException e) { } // ready to load in the data try { Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl()); String pubchemAidStr = null; PreparedStatement pstExpt; if (localBardExptId == -1) { pstExpt = conn.prepareStatement( "insert into bard_experiment (bard_assay_id, cap_expt_id, category, classification, description, pubchem_aid, type, name, confidence_level, status) values(?,?,?,?,?,?,?,?,?,?)", Statement.RETURN_GENERATED_KEYS); log.info("Inserting CAP experiment id " + expt.getExperimentId() + " as BARD experiment id " + localBardExptId); } else { pstExpt = conn.prepareStatement( "update bard_experiment set bard_assay_id=?, cap_expt_id=?, category=?, classification=?, description=?, pubchem_aid=?, type=?, name=?, confidence_level=?, status=? where bard_expt_id = ?"); log.info("Updating CAP experiment id " + expt.getExperimentId()); } pstExpt.setInt(1, _CAP_ExptID_AssayID_lookup.get(exptID)); pstExpt.setInt(2, exptID.intValue()); pstExpt.setInt(3, -1); pstExpt.setInt(4, -1); pstExpt.setString(5, expt.getDescription()); pubchemAidStr = _CAP_ExptID_PubChemAID_lookup.get(exptID); if(pubchemAidStr != null) pstExpt.setInt(6, Integer.parseInt(pubchemAidStr)); else pstExpt.setInt(6, -1); pstExpt.setInt(7, -1); pstExpt.setString(8, expt.getExperimentName()); if(confLevel != null) pstExpt.setFloat(9, (float) confLevel.intValue()); else pstExpt.setNull(9, java.sql.Types.FLOAT); pstExpt.setString(10, status); if (doUpdate) pstExpt.setLong(11, bardExptId); if(doUpdate) { // set the updated field even if none of the core entity fields change. setEntityUpdateField(bardExptId, resource); } pstExpt.executeUpdate(); if (!doUpdate) { // get the bard id that we just inserted ResultSet rs = pstExpt.getGeneratedKeys(); while (rs.next()) localBardExptId = rs.getInt(1); bardExptId = localBardExptId; rs.close(); } pstExpt.close(); // TODO this block implies we don't update expt annotations for pre-existing expts if (!doUpdate) { PreparedStatement pstAssayAnnot = conn.prepareStatement("insert into cap_annotation (source, entity, entity_id, anno_id, anno_key, anno_value, anno_value_text, anno_display, context_name, related, url, display_order) values(?,'experiment',?,?,?,?,?,?,?,?,?,?)"); for (CAPAnnotation anno : annos) { pstAssayAnnot.setString(1, anno.source); pstAssayAnnot.setInt(2, localBardExptId); pstAssayAnnot.setInt(3, anno.id); pstAssayAnnot.setString(4, anno.key); pstAssayAnnot.setString(5, anno.value); pstAssayAnnot.setString(6, anno.extValueId); // anno_value_text pstAssayAnnot.setString(7, anno.display); pstAssayAnnot.setString(8, anno.contextRef); // context_name pstAssayAnnot.setString(9, anno.related); // put into related field pstAssayAnnot.setString(10, anno.url); pstAssayAnnot.setInt(11, anno.displayOrder); pstAssayAnnot.addBatch(); } int[] updateCounts = pstAssayAnnot.executeBatch(); conn.commit(); pstAssayAnnot.close(); log.info("Inserted " + updateCounts.length + " annotations for CAP experiment id " + expt.getExperimentId()); } // Finally we update the scores of connected assays and projects ScoreHandler scoreHandler = new ScoreHandler(conn); scoreHandler.updateScores(bardExptId); conn.commit(); conn.close(); } catch (SQLException e) { e.printStackTrace(); log.error("Error inserting/updating the experiment or related annotations (see stack trace) for CAP expt id " + expt.getExperimentId() + "\n" + e.getMessage()); } return CAPConstants.CAP_EXTRACT_LOAD_STATUS_COMPLETE; } public void retireExperiment(long capExptId) { long bardExptId = 0l; try { Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl()); Statement stmt = conn.createStatement(); //get bard_expt_id ResultSet rs = stmt.executeQuery("select bard_expt_id from bard_experiment where cap_expt_id = "+capExptId); if(rs.next()) { bardExptId = rs.getLong(1); } else { //if bard assay doesn't exist, then we're done. log.info("Retirement Log ("+capExptId+"): No bardExptId exists. Exit Retirement."); return; } rs.close(); //delete experiment stmt.executeUpdate("delete from bard_experiment where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleting experiment, bard_expt_id: " + bardExptId); //delete experiment data stmt.executeUpdate("delete from bard_experiment_data where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleting experiment data, bard_expt_id: " + bardExptId); //delete experiment json responses stmt.executeUpdate("delete from bard_experiment_result where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleting experiment results, bard_expt_id: " + bardExptId); //delete exploded data stmt.executeUpdate("delete from exploded_histograms where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleted experiment exploded histograms, bard_expt_id:" + bardExptId); stmt.executeUpdate("delete from exploded_results where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleted experiment exploded results, bard_expt_id:" + bardExptId); stmt.executeUpdate("delete from exploded_statistics where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleted experiment exploded statistics, bard_expt_id:" + bardExptId); //delete project experiment mapping stmt.executeUpdate("delete from bard_project_experiment where bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleting project-experiment mapping, bard_expt_id: " + bardExptId); //delete project experiment steps stmt.executeUpdate("delete from project_step where prev_bard_expt_id = " + bardExptId + " or next_bard_expt_id = " + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleting project-experiment steps, bard_expt_id: " + bardExptId); //delete experiment annotations stmt.executeUpdate("delete from cap_annotation where entity = 'experiment' and entity_id =" + bardExptId); log.info("Retirement Log ("+capExptId+"): Deleting experiment annotations, bard_expt_id: " + bardExptId); //commit to finish experiment updates to DB conn.commit(); conn.close(); } catch (SQLException sqle) { sqle.printStackTrace(); } log.info("Retirement Log ("+capExptId+"): Completed DB clean-up for bardExptID: "+bardExptId); //clean up related search indices String solrCoreUrl = null; try { log.info("Retirement Log ("+capExptId+"): Removing documents from SOLR for bardExptID: "+bardExptId); solrCoreUrl = CAPConstants.getSolrURL(CAPConstants.SOLR_RESOURCE_KEY_EXPERIMENT); if(solrCoreUrl != null) { SearchUtil.deleteDocs(solrCoreUrl, Long.toString(bardExptId)); log.info("Retirement Log ("+capExptId+"): Issued command to remove documents from SOLR for bardExptID: "+bardExptId+" SOLR URL:"+solrCoreUrl); } else { log.warn("Retirement Log ("+capExptId+"): FAILED to remove documents from SOLR for bardExptID: "+bardExptId+" SOLR URL: NULL!"); } } catch (IOException e) { log.warn("Retirement Log ("+capExptId+"): IOException removing documents from SOLR for bardExptID: "+bardExptId+" SOLR URL:"+solrCoreUrl); e.printStackTrace(); } catch (SolrServerException e) { log.warn("Retirement Log ("+capExptId+"): SolrServerException, FAILED to remove documents from SOLR for bardExptID: "+bardExptId+" SOLR URL:"+solrCoreUrl); e.printStackTrace(); } } // // public void printLookup() { // try { // Connection conn = CAPUtil.connectToBARD(); // Statement st = conn.createStatement(); // // ResultSet result = st.executeQuery("select cap_expt_id, pubchem_aid from bard_experiment"); // where cap_expt_id=3134"); // while (result.next()) { // String capExptId = result.getString(1); // String pubchemAID = "aid="+result.getString(2); // if (!_CAP_ExptID_PubChemAID_lookup.containsKey(capExptId)) { // log.error("CAP Experiment no longer exists: CAP Expt ID="+capExptId); // } else { // if (!_CAP_ExptID_PubChemAID_lookup.get(capExptId).equals(pubchemAID)) // log.error("CAP Experiment now maps to different PubChemAID: CAP Expt ID, PubChemAID="+capExptId+","+_CAP_ExptID_PubChemAID_lookup.get(capExptId)); // _CAP_ExptID_PubChemAID_lookup.remove(capExptId); // } // } // result.close(); // for (String capExptId: _CAP_ExptID_PubChemAID_lookup.keySet()) // log.error("New CAP Experiment (and AID?): CAP Expt ID="+capExptId+" (AID="+_CAP_ExptID_PubChemAID_lookup.get(capExptId)+")"); // // ResultSet result2 = st.executeQuery("select cap_expt_id, cap_assay_id from bard_experiment"); // where cap_expt_id=3134"); // while (result2.next()) { // String capExptId = result2.getString(1); // String capAssayId = result2.getString(2); // if (!_CAP_ExptID_AssayID_lookup.containsKey(capExptId)) { // log.error("CAP Experiment no longer exists: CAP Expt ID="+capExptId); // } else { // if (!_CAP_ExptID_AssayID_lookup.get(capExptId).equals(capAssayId)) // log.error("CAP Experiment now maps to differen CAP Assay ID: CAP Expt ID="+capExptId); // _CAP_ExptID_AssayID_lookup.remove(capExptId); // } // } // result2.close(); // for (String capExptId: _CAP_ExptID_AssayID_lookup.keySet()) // log.error("New CAP Experiment (and CAP AID?): CAP Expt ID="+capExptId+" (AID="+_CAP_ExptID_AssayID_lookup.get(capExptId)+")"); // // ResultSet result3 = st.executeQuery("select b.cap_proj_id, c.cap_expt_id, c.cap_assay_id, a.bard_expt_id, a.bard_proj_id, a.pubchem_aid from bard_project_experiment a, bard_project b, bard_experiment c where a.bard_proj_id=b.bard_proj_id and a.bard_expt_id=c.bard_expt_id"); // and cap_expt_id=3134"); // while (result3.next()) { // String capProjId = result3.getString(1); // String capExptId = result3.getString(2); // // int match = -1; // for (int i=_CAP_Proj_Expt_link.size()-1; i>-1; i--) { // if (_CAP_Proj_Expt_link.get(i)[0].equals(capProjId) && // _CAP_Proj_Expt_link.get(i)[1].equals(capExptId)) { // match = i; // _CAP_Proj_Expt_link.remove(match); // } // } // if (match == -1) // log.error("Project Expt link no longer exists: CAP Proj, Expt="+capProjId+","+capExptId); // } // result3.close(); // for (String[] newer: _CAP_Proj_Expt_link) // log.error("New Project Expt link: CAP Proj, Expt="+newer[0]+","+newer[1]); // } catch (Exception e) {e.printStackTrace();} // //// for (String key: _CAP_ExptID_PubChemAID_lookup.keySet()) //// System.out.println(key+","+_CAP_ExptID_PubChemAID_lookup.get(key)+","+_CAP_ExptID_AssayID_lookup.get(key)+","+_CAP_ExptID_ProjID_lookup.get(key)); //// System.out.println("CAP Project -> Expt Links"); //// for (String[] entry: _CAP_Proj_Expt_link) { //// System.out.println(entry[0]+","+entry[1]+","+_CAP_ExptID_AssayID_lookup.get(entry[1])+","+_CAP_ExptID_PubChemAID_lookup.get(entry[1])); // } }