package gov.nih.ncgc.bard.capextract.handler;
import com.sun.org.apache.xerces.internal.dom.ElementNSImpl;
import gov.nih.ncgc.bard.capextract.CAPAnnotation;
import gov.nih.ncgc.bard.capextract.CAPConstants;
import gov.nih.ncgc.bard.capextract.CAPDictionary;
import gov.nih.ncgc.bard.capextract.CAPDictionaryElement;
import gov.nih.ncgc.bard.capextract.CAPUtil;
import gov.nih.ncgc.bard.capextract.CapResourceHandlerRegistry;
import gov.nih.ncgc.bard.capextract.ICapResourceHandler;
import gov.nih.ncgc.bard.capextract.jaxb.AbstractContextItemType;
import gov.nih.ncgc.bard.capextract.jaxb.ContextItemType;
import gov.nih.ncgc.bard.capextract.jaxb.ContextType;
import gov.nih.ncgc.bard.capextract.jaxb.Contexts;
import gov.nih.ncgc.bard.capextract.jaxb.DocumentType;
import gov.nih.ncgc.bard.capextract.jaxb.ExternalSystem;
import gov.nih.ncgc.bard.capextract.jaxb.Link;
import gov.nih.ncgc.bard.capextract.jaxb.Project;
import gov.nih.ncgc.bard.capextract.jaxb.ProjectExperiment;
import gov.nih.ncgc.bard.capextract.jaxb.ProjectStep;
import gov.nih.ncgc.bard.entity.Biology;
import gov.nih.ncgc.bard.search.SearchUtil;
import gov.nih.ncgc.bard.tools.Util;
import nu.xom.ParsingException;
import org.apache.solr.client.solrj.SolrServerException;
import org.w3c.dom.Node;
import javax.xml.bind.JAXBElement;
import java.io.IOException;
import java.math.BigInteger;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A one line summary.
*
* @author Rajarshi Guha
*/
public class ProjectHandler extends CapResourceHandler implements ICapResourceHandler {
boolean projectExists = false;
int bardProjId = -1;
Connection conn;
final static String PUBCHEM = "PubChem,NIH,http://pubchem.ncbi.nlm.nih.gov/assay/assay.cgi?";
public ProjectHandler() {
super();
}
/**
* Process a CAP entity that is located at some URL.
*
* @param url The URL from which to retrieve the entity fron
* @param resource The CAP resource that is meant to be processed. An implementing class
* can choose to proceed or not based on this parameter.
*/
public int process(String url, CAPConstants.CapResource resource) throws IOException {
if (resource != CAPConstants.CapResource.PROJECT) return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED;
// get the Project object here
Project project = getResponse(url, resource);
if (project == null) return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED;
if (project.getProjectSteps() == null) {
log.info("$$$ null project steps");
} else {
log.info("$$$ have project steps!!!");
if (project.getProjectSteps().getProjectStep() != null) {
log.info("$$$ have step list, list size=" + project.getProjectSteps().getProjectStep().size());
}
}
String readyToXtract = project.getReadyForExtraction();
String title = project.getProjectName();
BigInteger pid = project.getProjectId();
// log.info("\taurl = [" + readyToXtract + "] for " + title + " pid " + pid);
//JB: Note, project will not be exposed unless it's 'Ready'
int loadStatus = process(project, url, resource);
return loadStatus;
}
public int process(Project project, String url, CAPConstants.CapResource resource) {
String readyToXtract = project.getReadyForExtraction();
if (!"Ready".equals(readyToXtract)) log.error("Proceeding even though project not ready: " + readyToXtract);
String groupType = project.getGroupType();
if (!"Project".equals(groupType)) log.error("Group type other than Project: " + groupType);
/// retireProject
ExternalReferenceHandler extrefHandler = new ExternalReferenceHandler();
ExternalSystemHandler extsysHandler = new ExternalSystemHandler();
// 3/26/13 note: we used to perform a check for projectSteps, they are no longer mandatory for project loading
int capProjectId = project.getProjectId().intValue();
int pubchemAid = -1;
String status = project.getStatus();
if(status != null) {
if(!status.equals("Approved") && !status.equals("Retired") && !"Provisional".equals(status)) {
log.warn("ABORT PROJECT Load. CAP Project ID:"+capProjectId+" Status:"+status+" Only Approved, Provisional, and Retired projects can be processed.");
setExtractionStatus("Failed", url, resource);
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED;
}
if(status.equals("Retired")) {
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjectId+" Starting retirement process.");
retireProject(capProjectId);
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_COMPLETE;
}
}
try {
// look for a Pubchem AID (ie summary aid)
for (Link link : project.getLink()) {
if (!link.getType().equals(CAPConstants.CapResource.EXTREF.getMimeType())) continue;
// get a Pubchem AID
extrefHandler.process(link.getHref(), CAPConstants.CapResource.EXTREF);
String externalAssayRef = extrefHandler.getExternalAssayRef();
String aid = null;
if (externalAssayRef != null && externalAssayRef.startsWith("aid=")) {
aid = externalAssayRef.split("=")[1];
}
for (Link refLink : extrefHandler.getLinks()) {
if (refLink.getType().equals(CAPConstants.CapResource.EXTSYS.getMimeType())) {
extsysHandler.process(refLink.getHref(), CAPConstants.CapResource.EXTSYS);
ExternalSystem extsys = extsysHandler.getExtsys();
String source = extsys.getName() + "," + extsys.getOwner() + "," + extsys.getSystemUrl();
if (PUBCHEM.equals(source) && aid != null) pubchemAid = Integer.parseInt(aid);
}
}
}
log.info("Got Pubchem AID = " + pubchemAid + " for CAP project id = " + capProjectId);
conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
PreparedStatement pst = null;
Statement st = conn.createStatement();
ResultSet result = st.executeQuery("select bard_proj_id, name, description from bard_project where cap_proj_id=" + capProjectId);
bardProjId = -1;
while (result.next()) bardProjId = result.getInt(1);
result.close();
if (bardProjId == -1) {
log.info("Will insert new project for CAP project id = " + capProjectId);
pst = conn.prepareStatement(
"insert into bard_project (cap_proj_id, name, description, pubchem_aid) values (?,?,?,?)",
Statement.RETURN_GENERATED_KEYS);
pst.setInt(1, capProjectId);
pst.setString(2, project.getProjectName());
pst.setString(3, project.getDescription());
pst.setInt(4, pubchemAid);
int insertedRows = pst.executeUpdate();
if (insertedRows == 0) {
log.error("Could not insert project into bard_project for CAP project id = " + capProjectId);
throw new SQLException();
}
result = pst.getGeneratedKeys();
while (result.next()) bardProjId = result.getInt(1);
result.close();
pst.close();
log.info("Inserted CAP project id " + capProjectId + " as BARD project id " + bardProjId);
} else {
projectExists = true;
log.info("Will do an update for the CAP project id = " + project.getProjectId());
// set the updated field even if none of the core entity fields change.
setEntityUpdateField(bardProjId, CAPConstants.CapResource.PROJECT);
pst = conn.prepareStatement("update bard_project set name=?, description=?, pubchem_aid=? where cap_proj_id = ?");
pst.setString(1, project.getProjectName());
pst.setString(2, project.getDescription());
pst.setInt(3, pubchemAid);
pst.setInt(4, capProjectId);
pst.executeUpdate();
}
// at this point we have a valid bard project id, lets insert all the extra stuff
List<CAPAnnotation> annos = new ArrayList<CAPAnnotation>();
// deal with project annotations
annos.addAll(processAnnotations(project));
// handle project documents - we'll get back documents as CAPAnnotation objects
annos.addAll(processDocuments(project));
// deal with targets
processTargets(project);
// load expts
processExperiments(project, pubchemAid);
// handle project steps and include anny anno's we get from this, possibly empty
annos.addAll(processProjectSteps(project));
/*
* The sections below clear project annoations and probe info
*/
// delete all the annos for the project before reload...
pst = conn.prepareStatement("delete from cap_project_annotation where bard_proj_id = ?");
pst.setLong(1, bardProjId);
pst.executeUpdate();
pst.close();
//we need to clear probe information before storing. An update will not remove probes that have been removed.
//its assumed that a CAP project record is complete, not incremental.
pst = conn.prepareStatement("delete from project_probe where bard_proj_id = ?");
pst.setLong(1, bardProjId);
pst.execute();
pst.close();
// store the annotations we've collected
if (annos.size() > 0) {
// now lets insert them all
PreparedStatement pstAnnot = conn.prepareStatement("replace into cap_project_annotation (bard_proj_id, cap_proj_id, source, entity, anno_id, anno_key, anno_value, anno_display, related, context_name, display_order, url) values (?,?,?,?,?,?,?,?,?,?,?,?)");
for (CAPAnnotation anno : annos) {
pstAnnot.setInt(1, anno.entityId); // for project this is bard_project.bardProjId, for project-step this is project_step.stepId
pstAnnot.setInt(2, project.getProjectId().intValue());
pstAnnot.setString(3, anno.source);
if (anno.entity.equals("project-step")) {
pstAnnot.setString(4, "project-step");
} else {
pstAnnot.setString(4, "project");
}
pstAnnot.setInt(5, anno.id);
pstAnnot.setString(6, anno.key); // anno_value_text
pstAnnot.setString(7, anno.value);
pstAnnot.setString(8, anno.display); // context_name
pstAnnot.setString(9, anno.related); // put into related field
pstAnnot.setString(10, anno.contextRef);
pstAnnot.setInt(11, anno.displayOrder);
pstAnnot.setString(12, anno.url);
try {
pstAnnot.executeUpdate();
} catch (com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException e) {
log.info("Got a integrity violation constraint. Probably this anno " + anno.id + " already exists");
}
}
conn.commit();
pstAnnot.close();
log.info("\tLoaded " + annos.size() + " annotations (from " + annos.size() + " CAP annotations) for cap project id " + project.getProjectId());
//project source
//if we have annotations, lets backfill the project source.
Statement projectSourceStmt = conn.createStatement();
ResultSet projectSourceRS = projectSourceStmt.executeQuery("select anno_display from cap_project_annotation where anno_key = 559 and bard_proj_id = "+bardProjId);
String projectSource = "empty";
if(projectSourceRS.next()) {
projectSource = projectSourceRS.getString(1);
}
projectSourceRS.close();
if(projectSource != null && !projectSource.equals("empty")) {
//we have a name
projectSourceStmt.executeUpdate("update bard_project set source = '"+projectSource+"' where bard_proj_id = "+bardProjId);
conn.commit();
}
projectSourceStmt.close();
}
updateProbeLinks(annos, (long) bardProjId);
conn.commit();
st.close();
pst.close();
conn.close();
} catch (SQLException ex) {
ex.printStackTrace();
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} catch (ParsingException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} catch (ClassNotFoundException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_COMPLETE;
}
void updateProbeLinks(List<CAPAnnotation> annos, Long bardProjId) {
// we look at biology for this project and just pull out protein targets
List<String> targetAccs = new ArrayList<String>();
try {
PreparedStatement targetPst = conn.prepareStatement("select ext_id from bard_biology where entity = 'project' and biology_dict_id = 1398 and entity_id = ?");
targetPst.setLong(1, bardProjId);
ResultSet rs = targetPst.executeQuery();
while (rs.next()) targetAccs.add(rs.getString(1));
rs.close();
targetPst.close();
} catch (SQLException e) {
e.printStackTrace();
}
CAPDictionary dict = CAPConstants.getDictionary();
Map<Integer, List<CAPAnnotation>> annoGroups = groupAnnotationsByAnnoId(annos);
for (Integer annoId : annoGroups.keySet()) {
List<CAPAnnotation> grp = annoGroups.get(annoId);
boolean isProbeContext = false;
for (CAPAnnotation anno : grp) {
if (Util.isNumber(anno.key) && Integer.parseInt(anno.key) == 1776) {
isProbeContext = true;
break;
}
}
if (!isProbeContext) continue;
Long cid = null, sid = null;
String mlid = null, mlidurl = null;
// pull out cid,sid,mlid for this probe context
for (CAPAnnotation anno : grp) {
if (Util.isNumber(anno.key) && anno.display != null) {
if (dict.getNode(new BigInteger(anno.key)).getLabel().toLowerCase().contains("pubchem cid"))
cid = Long.parseLong(anno.extValueId);
else if (dict.getNode(new BigInteger(anno.key)).getLabel().toLowerCase().contains("pubchem sid")) {
sid = Long.parseLong(anno.extValueId.split(" ")[0]);
} else if (dict.getNode(new BigInteger(anno.key)).getLabel().toLowerCase().contains("probe report")) {
mlid = anno.display;
mlidurl = anno.url;
}
}
}
PreparedStatement pst;
try {
// fill in cid, if we didn't get it
if (cid == null && sid != null) {
pst = conn.prepareStatement("select distinct cid from cid_sid where sid = ?");
pst.setLong(1, sid);
ResultSet rs = pst.executeQuery();
while (rs.next()) cid = rs.getLong("cid");
rs.close();
pst.close();
}
// if there's still no cid, skip it
if (cid == null) {
log.warn("No CID for SID " + sid + " in BARD project ID " + bardProjId + ", probe id " + mlid);
continue;
}
pst = conn.prepareStatement("select * from project_probe where bard_proj_id = ? and cid = ?");
pst.setLong(1, bardProjId);
pst.setLong(2, cid);
ResultSet rs = pst.executeQuery();
boolean linkExists = false;
while (rs.next()) linkExists = true;
rs.close();
pst.close();
if (linkExists) { // will this ever happen?
pst = conn.prepareStatement("update project_probe set probe_id = ? where bard_proj_id = ? and cid = ?");
pst.setString(1, mlid);
pst.setLong(2, bardProjId);
pst.setLong(3, cid);
pst.executeUpdate();
log.info("Updated probe-project link for BARD project id " + bardProjId + " and probe id " + mlid + " CID " + cid);
} else {
pst = conn.prepareStatement("insert into project_probe (bard_proj_id, cid, sid, probe_id, bard_expt_id) values (?,?,null,?, -1)");
pst.setLong(1, bardProjId);
pst.setLong(2, cid);
pst.setString(3, mlid);
try {
pst.executeUpdate();
} catch (com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException e) {
}
log.info("Made probe-project link for BARD project id " + bardProjId + " and probe id " + mlid + " CID " + cid);
}
pst.close();
// next we update the compound table
pst = conn.prepareStatement("update compound set compound_class = 'ML Probe', probe_id = ?, url = ?, updated = now() where cid = ?");
pst.setString(1, mlid);
pst.setString(2, mlidurl);
pst.setLong(3, cid);
pst.executeUpdate();
pst.close();
log.info("Updated compound class for CID " + cid);
// finally update the compound target table by assuming that all project targets
// are also probe targets
pst = conn.prepareStatement("insert into compound_target (cid, target_acc, evidence) values (?, ?, 'probe')");
for (String acc : targetAccs) {
pst.setLong(1, cid);
pst.setString(2, acc);
try {
pst.executeUpdate();
} catch (com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException e) {
}
pst.clearParameters();
log.info("Updated compound target for CID " + cid + " with Uniprot " + acc);
}
pst.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
Map<Integer, List<CAPAnnotation>> groupAnnotationsByAnnoId(List<CAPAnnotation> annos) {
Map<Integer, List<CAPAnnotation>> ret = new HashMap<Integer, List<CAPAnnotation>>();
for (CAPAnnotation anno : annos) {
Integer id = anno.id;
List<CAPAnnotation> l;
if (ret.containsKey(id)) {
l = ret.get(id);
} else {
l = new ArrayList<CAPAnnotation>();
}
l.add(anno);
ret.put(id, l);
}
return ret;
}
List<CAPAnnotation> processAnnotations(Project project) {
List<CAPAnnotation> annos = new ArrayList<CAPAnnotation>();
CAPDictionary dict = CAPConstants.getDictionary();
Contexts contexts = project.getContexts();
// if there are contexts, process them, else skip processing
// and return the empty annotation list. Contexts may be missing.
if (contexts != null) {
List<ContextType> contextTypes = contexts.getContext();
for (ContextType contextType : contextTypes) {
int contextId = contextType.getId().intValue();
String contextName = contextType.getContextName();
String contextGroup = contextType.getContextGroup();
ContextType.ContextItems contextItems = contextType.getContextItems();
if (contextItems == null) {
log.warn("Context ID " + contextId + " for CAP project " + project.getProjectId() + " was null (ie had no context items)");
continue;
}
for (ContextItemType contextItemType : contextItems.getContextItem()) {
// dict id for the annotation key
String key = null;
AbstractContextItemType.AttributeId attr = contextItemType.getAttributeId();
if (attr != null) {
key = Util.getEntityIdFromUrl(attr.getLink().getHref());
}
// dict id for the annotation value
String valueUrl = null;
String value = null;
String extValueId = contextItemType.getExtValueId();
String valueDisplay = contextItemType.getValueDisplay();
String related = null;
AbstractContextItemType.ValueId vc = contextItemType.getValueId();
if (vc != null) {
value = Util.getEntityIdFromUrl(vc.getLink().getHref());
String dictUrl = dict.getNode(vc.getLabel()).getExternalUrl();
if (dictUrl != null && !dictUrl.equals("null") && extValueId != null)
valueUrl = dictUrl + extValueId;
} else {
// if there is no valueId field and there is an extValueId field, we
// construct the valueUrl from the key + extValueId
if (extValueId != null) {
CAPDictionaryElement dictNode = dict.getNode(new BigInteger(key));
valueUrl = dictNode.getExternalUrl() == null ? "" : dictNode.getExternalUrl() + extValueId;
}
}
// hack so that CID gets displayed rather than IUPAC name due to weird inconsistency in CAP annotations
if (attr != null && attr.getLabel().contains("CID") && extValueId != null)
valueDisplay = extValueId;
annos.add(new CAPAnnotation(contextId, bardProjId, valueDisplay, contextName, key, value,
contextItemType.getExtValueId(), "cap-context", valueUrl,
contextItemType.getDisplayOrder(), "project", related, contextGroup));
}
}
}
return annos;
}
void processTargets(Project project) throws SQLException, ClassNotFoundException, IOException {
// pull all biologies out
CAPDictionary dict = CAPUtil.getCAPDictionary();
List<BiologyInfo> bioInfo = new ArrayList<BiologyInfo>();
Contexts contexts = project.getContexts();
if (contexts == null) return;
List<ContextType> contextTypes = contexts.getContext();
if (contextTypes == null) return;
// Loop over all contexts
for (ContextType contextType : contextTypes) {
String contextName = contextType.getContextName();
ContextType.ContextItems contextItems = contextType.getContextItems();
if (contextItems == null) continue;
// is this a biology context?
boolean isBiologyContext = false;
for (ContextItemType contextItemType : contextItems.getContextItem()) {
AbstractContextItemType.AttributeId attr = contextItemType.getAttributeId();
String dictId = Util.getEntityIdFromUrl(attr.getLink().getHref());
if (dictId != null && dictId.equals("541")) {
isBiologyContext = true;
break;
}
}
isBiologyContext = isBiologyContext || contextName.equals("biology");
if (!isBiologyContext) continue;
List<Integer> targetDictIds = Arrays.asList(new Integer[]{
525, 507, 1419, 885, 1795, 880, 881, 882, 883, 1398, 1504
});
for (ContextItemType contextItemType : contextItems.getContextItem()) {
AbstractContextItemType.AttributeId attrid = contextItemType.getAttributeId();
String dictId = Util.getEntityIdFromUrl(attrid.getLink().getHref());
if (Util.isNumber(dictId) && targetDictIds.contains(Integer.parseInt(dictId))) {
CAPDictionaryElement node = dict.getNode(new BigInteger(dictId));
String dictLabel = node.getLabel();
String extId = contextItemType.getExtValueId();
String description = contextItemType.getValueDisplay();
String extRef = null;
if (node.getExternalUrl() != null && extId != null)
extRef = node.getExternalUrl()+extId;
BiologyInfo bi = new BiologyInfo(dictLabel, Integer.parseInt(dictId), extId, extRef, description);
bioInfo.add(bi);
}
}
}
// delete pre-existing biology for this id
PreparedStatement pst = conn.prepareStatement("delete from bard_biology where entity = 'project' and entity_id = ?");
pst.setInt(1, bardProjId);
pst.executeUpdate();
pst.close();
// lets dump to db
PreparedStatement pstTarget =
conn.prepareStatement("insert into bard_biology (biology, biology_dict_id, biology_dict_label, description, entity, entity_id, ext_id, ext_ref) " +
" values (?,?,?,?,?,?,?,?)");
for (BiologyInfo abi : bioInfo) {
String biology = Biology.BiologyType.getBiologyTypeFromDictId(abi.dictId).toString();
pstTarget.setString(1, biology);
pstTarget.setInt(2, abi.dictId);
pstTarget.setString(3, abi.dictLabel);
pstTarget.setString(4, abi.description);
pstTarget.setString(5, "project");
pstTarget.setInt(6, bardProjId);
pstTarget.setString(7, abi.extId);
pstTarget.setString(8, abi.extRef);
try {
pstTarget.executeUpdate();
} catch (com.mysql.jdbc.exceptions.jdbc4.MySQLIntegrityConstraintViolationException e) {
}
pstTarget.clearParameters();
}
pstTarget.close();
log.info("Inserted " + bioInfo.size() + " target entries for BARD project id = " + bardProjId);
}
void processExperiments(Project project, int pubchemAid) throws SQLException, IOException {
Project.ProjectExperiments projexpt = project.getProjectExperiments();
if (projexpt == null) {
log.warn("ProjectExperiment for CAP project id " + project.getProjectId() + " was null. No experiments to process");
return;
}
List<ProjectExperiment> experiments = projexpt.getProjectExperiment();
//We could update existing pe's by usint 'replace' over 'insert' BUT
//suppose some pe's are removed.
//Solution? - delete existing pe records for the incoming project
Statement stmt = conn.createStatement();
stmt.execute("delete from bard_project_experiment where bard_proj_id = " + bardProjId);
PreparedStatement pstProjExpt = conn.prepareStatement("insert into bard_project_experiment (bard_proj_id, bard_expt_id, pubchem_aid, expt_type, pubchem_summary_aid) values (?,?,?,?,?)");
for (ProjectExperiment experiment : experiments) {
String exptType;
ProjectExperiment.StageRef stageRef = experiment.getStageRef();
exptType = stageRef == null ? null : stageRef.getLabel();
Link exptLink = experiment.getExperimentRef().getLink();
CAPConstants.CapResource res = CAPConstants.getResource(exptLink.getType());
if (res != CAPConstants.CapResource.EXPERIMENT) continue;
ICapResourceHandler handler = CapResourceHandlerRegistry.getInstance().getHandler(res);
if (handler != null) {
handler.process(exptLink.getHref(), res);
// NOTE: if the load aborts, the bard_exptId will be -1 when returned below
// an example is when an assay isn't loaded because of status and the experiment
// can't load since its assay isn't loaded.
int bardExptId = ((ExperimentHandler) handler).getBardExptId();
int exptPubchemAid = ((ExperimentHandler) handler).getPubchemAid();
if (bardExptId == -1) continue;
pstProjExpt.setInt(1, bardProjId);
pstProjExpt.setInt(2, bardExptId);
pstProjExpt.setInt(3, exptPubchemAid);
pstProjExpt.setString(4, exptType);
pstProjExpt.setInt(5, pubchemAid);
pstProjExpt.addBatch();
}
}
int[] rowsInserted = pstProjExpt.executeBatch();
conn.commit();
pstProjExpt.close();
log.info("Inserted " + rowsInserted.length + " project-experiment entries");
}
List<CAPAnnotation> processDocuments(Project project) throws SQLException, IOException, ParsingException {
List<CAPAnnotation> annos = new ArrayList<CAPAnnotation>();
PreparedStatement pstDoc = conn.prepareStatement("insert into cap_document (cap_doc_id, type, name, url) values (?, ?, ?, ?)");
boolean runPst = false;
for (Link link : project.getLink()) {
CAPConstants.CapResource res = CAPConstants.getResource(link.getType());
if (res != CAPConstants.CapResource.PROJECTDOC) continue;
// for some reason unmarshalling doesn't work properly on assayDocument docs
JAXBElement jaxbe = getResponse(link.getHref(), CAPConstants.getResource(link.getType()));
DocumentType doc = (DocumentType) jaxbe.getValue();
String docContent = doc.getDocumentContent();
String docType = doc.getDocumentType(); // Description, Protocol, Comments, Paper, External URL, Other
String docName = doc.getDocumentName();
if ("Description".equals(docType)) {
} else if ("Protocol".equals(docType)) {
} else if ("Comments".equals(docType)) {
} else {
// hack to add cap project documents as annotations on a project
int docId = Integer.parseInt(Util.getEntityIdFromUrl(link.getHref()));
// check to see if document in cap_document
// query the table by cap_doc_id
boolean hasDoc = false;
Statement query = conn.createStatement();
query.execute("select cap_doc_id from cap_document where cap_doc_id=" + docId);
ResultSet rs = query.getResultSet();
while (rs.next()) {
hasDoc = true;
}
rs.close();
query.close();
if (!hasDoc) {
pstDoc.setInt(1, docId);
pstDoc.setString(2, docType);
pstDoc.setString(3, docName);
pstDoc.setString(4, docContent);
pstDoc.addBatch();
runPst = true;
}
// add annotation for document back to project
annos.add(new CAPAnnotation(docId, bardProjId, docName, docType, "doc", docContent,
docContent, "cap-doc", link.getHref(), 0, "project", null, null));
// see if we can insert a PubMed paper
if (docType.equals("Paper") && docContent.startsWith("http://www.ncbi.nlm.nih.gov/pubmed")) {
String pmid = Util.getEntityIdFromUrl(docContent);
boolean status = CAPUtil.insertPublication(conn, pmid);
if (status) log.info("Inserted Pubmed publication " + pmid);
// see if we should make a link in project_pub
PreparedStatement pstPub = conn.prepareStatement("select * from project_pub where bard_proj_id = ?");
pstPub.setInt(1, bardProjId);
ResultSet prs = pstPub.executeQuery();
boolean linkExists = false;
while (prs.next()) linkExists = true;
pstPub.close();
if (!linkExists) {
pstPub = conn.prepareStatement("insert into project_pub (bard_proj_id, pmid) values (?,?)");
pstPub.setInt(1, bardProjId);
pstPub.setInt(2, Integer.parseInt(pmid));
pstPub.execute();
pstPub.close();
}
}
}
}
if (runPst)
pstDoc.execute();
conn.commit();
pstDoc.close();
return annos;
}
List<CAPAnnotation> processProjectSteps(Project project) throws SQLException {
PreparedStatement exptLookup = conn.prepareStatement("select bard_expt_id from bard_experiment where cap_expt_id = ?");
ResultSet rs;
// are there any experiments to process?
if (project.getProjectExperiments() == null) {
exptLookup.close();
log.warn("ProjectExperiment for CAP project id " + project.getProjectId() + " was null. No experiment steps to process");
return new ArrayList<CAPAnnotation>();
}
// first go through projectExperiment elements and build a map of
// pprojectExperiment id <-> bard experiment id
Map<Integer, Integer> map = new HashMap<Integer, Integer>();
List<ProjectExperiment> pes = project.getProjectExperiments().getProjectExperiment();
for (ProjectExperiment pe : pes) {
int peId = pe.getProjectExperimentId().intValue();
int capExptid = Integer.parseInt(Util.getEntityIdFromUrl(pe.getExperimentRef().getLink().getHref()));
exptLookup.setLong(1, capExptid);
rs = exptLookup.executeQuery();
int bardExptId = -1;
while (rs.next()) bardExptId = rs.getInt(1);
if (bardExptId == -1) {
log.warn("Could not get a bard expt id for cap expt id = " + capExptid + " from projectExperiment id " + peId + " from bard project id " + bardProjId);
} else map.put(peId, bardExptId);
exptLookup.clearParameters();
rs.close();
}
exptLookup.close();
List<CAPAnnotation> annos = new ArrayList<CAPAnnotation>();
PreparedStatement pstep = conn.prepareStatement("insert into project_step(bard_proj_id, step_id, prev_bard_expt_id, next_bard_expt_id, edge_name) " +
" values (?,?,?,?,?)");
//We're building project steps for the projects, we could update the project steps but we should
//delete and rebuild to eliminate links that might be missing.
Statement stmt = conn.createStatement();
stmt.execute("delete from project_step where bard_proj_id=" + bardProjId);
//if there are no project steps or there are 0 project steps, trunter the empty annos for project steps
if (project.getProjectSteps() == null || project.getProjectSteps().getProjectStep().size() == 0)
return annos; //empty annos
List<ProjectStep> steps = project.getProjectSteps().getProjectStep();
for (ProjectStep step : steps) {
int stepId = step.getProjectStepId().intValue();
Integer nextBardExptId = map.get(step.getNextProjectExperimentRef().intValue());
Integer prevBardExptId = map.get(step.getPrecedingProjectExperimentRef().intValue());
ElementNSImpl o = (ElementNSImpl) step.getEdgeName();
//have to deal with null edge names
String ename = null;
Node childNode = null;
if (o != null) {
childNode = o.getFirstChild();
if (childNode != null) {
ename = childNode.getNodeValue();
}
}
if (nextBardExptId == null || prevBardExptId == null) continue;
pstep.setLong(1, bardProjId);
pstep.setLong(2, stepId);
pstep.setLong(3, prevBardExptId);
pstep.setLong(4, nextBardExptId);
//have to deal with possibly not having edge names
if (ename != null) {
pstep.setString(5, ename);
} else {
pstep.setNull(5, java.sql.Types.VARCHAR);
}
pstep.addBatch();
// handle the annotation(s) for this step
CAPDictionary dict = CAPConstants.getDictionary();
Contexts contexts = step.getContexts();
//may not have contexts
if (contexts != null) {
List<ContextType> contextTypes = contexts.getContext();
for (ContextType contextType : contextTypes) {
int contextId = contextType.getId().intValue();
String contextName = contextType.getContextName();
String contextGroup = contextType.getContextGroup();
ContextType.ContextItems contextItems = contextType.getContextItems();
for (ContextItemType contextItemType : contextItems.getContextItem()) {
// dict id for the annotation key
String key = null;
AbstractContextItemType.AttributeId attr = contextItemType.getAttributeId();
if (attr != null) {
key = Util.getEntityIdFromUrl(attr.getLink().getHref());
}
// dict id for the annotation value
String valueUrl = null;
String value = null;
AbstractContextItemType.ValueId vc = contextItemType.getValueId();
if (vc != null) {
value = Util.getEntityIdFromUrl(vc.getLink().getHref());
valueUrl = dict.getNode(vc.getLabel()).getExternalUrl() + contextItemType.getExtValueId();
}
String valueDisplay = contextItemType.getValueDisplay();
String related = String.valueOf(bardProjId);
annos.add(new CAPAnnotation(contextId, stepId, valueDisplay,
contextName, key, value, contextItemType.getExtValueId(),
"cap-context", valueUrl, contextItemType.getDisplayOrder(),
"project-step", related, contextGroup));
}
}
}
}
int[] rowsAdded = pstep.executeBatch();
conn.commit();
pstep.close();
log.info("Added " + rowsAdded.length + " project steps for BARD project id " + bardProjId + " CAP project id " + project.getProjectId());
return annos;
}
private void retireProject(long capProjId) {
long bardProjId = 0l;
try {
//note autocommit is false
Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select bard_proj_id from bard_project where cap_proj_id = " + capProjId);
if(rs.next()) {
bardProjId = rs.getLong(1);
} else {
//if we don't have a project ID, the project already isn't in the system.
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Note: Project not present.");
return;
}
rs.close();
//drop project
stmt.executeUpdate("delete from bard_project where bard_proj_id =" + bardProjId);
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Deleted from bard_project.");
//drop project experiment mapping
stmt.executeUpdate("delete from bard_project_experiment where bard_proj_id =" + bardProjId);
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Deleted from bard_project_experiment.");
//drop project steps
stmt.executeUpdate("delete from project_step where bard_proj_id =" + bardProjId);
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Deleted from project_step.");
//drop project annotation
stmt.executeUpdate("delete from cap_project_annotation where bard_proj_id =" + bardProjId);
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Deleted from cap_project_annotation.");
//drop project biology
stmt.executeUpdate("delete from bard_biology where entity = 'project' and entity_id =" + bardProjId);
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Deleted from bard_biology.");
//drop project's probes
stmt.executeUpdate("delete from project_probe where bard_proj_id =" + bardProjId);
log.warn("RETIRE PROJECT, CAP Project ID:"+capProjId+" Deleted from project_probe.");
//don't drop the corresponding experiments, other projects might refer to them
//clean up related search indices
conn.commit();
conn.close();
} catch (SQLException sqle) {
sqle.printStackTrace();
}
log.info("Retirement Log ("+capProjId+"): Completed DB clean-up for bardProjID: "+bardProjId);
//clean up related search indices
String solrCoreUrl = null;
String solrCompoundUrl = null;
try {
log.info("Retirement Log ("+capProjId+"): Removing documents from SOLR for bardProjID: "+bardProjId);
solrCoreUrl = CAPConstants.getSolrURL(CAPConstants.SOLR_RESOURCE_KEY_PROJECT);
if(solrCoreUrl != null) {
SearchUtil.deleteDocs(solrCoreUrl, Long.toString(bardProjId));
log.info("Retirement Log ("+capProjId+"): Issued command to remove documents from SOLR for bardProjID: "+bardProjId+" SOLR URL:"+solrCoreUrl);
} else {
log.warn("Retirement Log ("+capProjId+"): FAILED to remove documents from SOLR for bardProjID: "+bardProjId+" SOLR URL: NULL!");
}
} catch (IOException e) {
log.warn("Retirement Log ("+capProjId+"): IOException removing documents from SOLR for bardProjID: "+bardProjId+" SOLR URL:"+solrCoreUrl);
e.printStackTrace();
} catch (SolrServerException e) {
log.warn("Retirement Log ("+capProjId+"): SolrServerException, FAILED to remove documents from SOLR for bardAssayID: "+bardProjId+" SOLR URL:"+solrCoreUrl);
e.printStackTrace();
}
}
class BiologyInfo {
String dictLabel, extId, extRef, description;
Integer dictId;
BiologyInfo(String dictLabel, Integer dictId, String extId, String extRef, String description) {
this.dictLabel = dictLabel;
this.extId = extId;
this.extRef = extRef;
this.description = description;
this.dictId = dictId;
}
}
}