package gov.nih.ncgc.bard.capextract.handler;
import gov.nih.ncgc.bard.capextract.CAPConstants;
import gov.nih.ncgc.bard.capextract.CAPUtil;
import gov.nih.ncgc.bard.capextract.ICapResourceHandler;
import gov.nih.ncgc.bard.capextract.ResultExploder;
import gov.nih.ncgc.bard.capextract.ResultHistogram;
import gov.nih.ncgc.bard.capextract.ResultStatistics;
import gov.nih.ncgc.bard.capextract.SslHttpClient;
import gov.nih.ncgc.bard.capextract.jaxb.Contexts;
import gov.nih.ncgc.bard.capextract.jaxb.Experiment;
import gov.nih.ncgc.bard.capextract.jaxb.ExperimentMeasure;
import gov.nih.ncgc.bard.capextract.resultextract.BardExptDataResponse;
import gov.nih.ncgc.bard.capextract.resultextract.BardResultFactory;
import gov.nih.ncgc.bard.capextract.resultextract.BardResultType;
import gov.nih.ncgc.bard.capextract.resultextract.CAPExperimentResult;
import gov.nih.ncgc.bard.capextract.resultextract.ResultTuple;
import gov.nih.ncgc.bard.resourcemgr.BardDBUtil;
import gov.nih.ncgc.bard.tools.Util;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Blob;
import java.sql.Connection;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* This class reads in cap experiment data and formats json responses based on
* the BardExptResultResponse class. The process stages data in cap_expt_result which
* can be advanced on to bard_experiment_data and bard_experiment_result tables.
*
* @author braistedjc
*
*/
public class ExperimentResultHandler extends CapResourceHandler implements ICapResourceHandler {
private Logger logger = Logger.getLogger(ExperimentResultHandler.class.getName());
private static int RESPONSE_CLASS_SAMPLE_SIZE = 100;
private Connection conn;
private Connection conn2;
private ArrayList<Long> tempProjectList;
private ArrayList<ResultTuple> priorityElems;
/**
* This method pulls result data from CAP using the supplied CAP experiment id and saves it in a
* local file in bard-scratch. The data file is read and the transformed data is loaded into cap_expt_result.
*
* @param url The URL for the experiment object.
* @param resource the JSON Result resource
*/
public int process(String url, CAPConstants.CapResource resource) {
try {
long capExptId = Long.parseLong(url.substring(url.lastIndexOf('/')+1));
//get tuples that id priority element
priorityElems = fetchPriorityElements(capExptId);
long start = System.currentTimeMillis();
logger.info("Starting load for CAP Expt ID="+capExptId+" DBserverHost="+CAPConstants.getBardDBJDBCUrl());
//make sure we have a priority element
if(priorityElems.size() < 1) {
logger.warning("CAP Expt Id ="+capExptId+" lacks a CAP specified priority element. ABORT EXPT DATA LOAD!!!");
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED;
}
//open connection
conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
conn.setAutoCommit(false);
conn2 = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
conn2.setAutoCommit(false);
Statement stmt = conn.createStatement();
ResultSet rs;
//first clear the data for the experiment in the staging table
stmt.execute("delete from cap_expt_result where cap_expt_id="+capExptId);
//dump the cap data into a file in bard-scratch
String stageFile = CAPConstants.getBardScratchDir()+"/result_load_cap_expt_"+capExptId+".txt";
this.stageDataToFile(url, resource, capExptId, stageFile);
logger.info("Data is staged in file: "+stageFile);
start = System.currentTimeMillis();
//get related entity ids for the capExptId
Hashtable <String, Long> ids = getIds(capExptId);
//we need to verify that we have a corresponding bard expt id, if not log warning and get out.
Long bardExptId = ids.get("bardExptId");
if(bardExptId == null) {
logger.warning("A bardExtId does not exist corresponding to capExptId:"+capExptId+". Experiment data load aborted. Load experiment first.");
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_FAILED;
}
//get project ids for the cap experiment
ArrayList <ArrayList<Long>> projIds = getProjectIds(ids.get("bardExptId"));
//construct a result factory and initialize with entity ids common to all responses
BardResultFactory resultFactory = new BardResultFactory();
logger.info("CAP Expt ID="+capExptId+" ResultFactory Initialized, entity ID's Exist.");
//order the priority tuples if more than one tuple
if(this.priorityElems.size() > 1) {
resultFactory.sortPriorityElementTuples(priorityElems);
}
//prepare for insert of staged data
PreparedStatement insertPS = conn.prepareStatement("insert into cap_expt_result set seq_result_id = ?, cap_expt_id = ?, sid = ?, cid = ?, outcome = ?, score=?, potency=?, cap_json = ?, bard_json = ?");
ObjectMapper mapper = new ObjectMapper();
long procCnt = 0;
String capData;
Long cid;
Double score, potency;
Integer outcome;
BufferedReader br = new BufferedReader(new FileReader(stageFile));
//here we need to determine the response class by polling a collection of input responses from CAP
//use the BufferedReader to iterate, then reset the BR for processing
BardExptDataResponse sampleResponse = determineResultClass(capExptId, br);
//now pass the experiment level response class on to the response factory when initializing
resultFactory.initialize(ids.get("bardExptId"), capExptId, ids.get("bardAssayId"), ids.get("capAssayId"), projIds, fetchContexts(capExptId),
sampleResponse.getResponseType(), sampleResponse.getExptScreeningConc(), sampleResponse.getExptConcUnit(), priorityElems);
br = new BufferedReader(new FileReader(stageFile));
//process each result (fore each substance). The helper class just acts as a container.
while((capData = br.readLine()) != null) {
capData = capData.trim();
if(capData.length() > 0) { //skip empty lines
//build the CAP expt result object
CAPExperimentResult result = mapper.readValue(capData, CAPExperimentResult.class);
//get the cid for the sid
rs = stmt.executeQuery("select cid from cid_sid where rel_type = 1 and sid ="+result.getSid());
if(rs.next()) {
cid = rs.getLong(1);
} else {
cid = null;
}
//convert CAP result object to BardExptResultResponse class
//the BARDResultFactory takes care of formatting and hierarchy.
//the factory already has the entity ids
BardExptDataResponse bardResponse = resultFactory.processCapResult(result);
//set cid, if the cid was null, the staged value is '0'.
//we need to check for cid = 0 and set cid to NULL in the response's Long cid field.
if(cid != null && cid > 0)
bardResponse.setCid(cid);
else
bardResponse.setCid(null);
//set insert data
insertPS.setLong(1, procCnt);
insertPS.setLong(2, capExptId);
insertPS.setLong(3, bardResponse.getSid());
if(cid != null)
insertPS.setLong(4, cid);
else
insertPS.setNull(4, java.sql.Types.INTEGER);
outcome = bardResponse.getOutcome();
score = bardResponse.getScore();
potency = bardResponse.getPotency();
if(outcome != null && outcome != 0)
insertPS.setInt(5, outcome);
else
insertPS.setNull(5, java.sql.Types.INTEGER);
if(score != null)
insertPS.setDouble(6, score);
else
insertPS.setNull(6,java.sql.Types.DOUBLE);
if(potency != null)
insertPS.setDouble(7, potency);
else
insertPS.setNull(7, java.sql.Types.DOUBLE);
insertPS.setString(8, capData);
insertPS.setString(9, mapper.writeValueAsString(bardResponse));
insertPS.addBatch();
//update and commit each 100
if(procCnt % 100 == 0) {
insertPS.executeBatch();
conn.commit();
}
procCnt++;
}
}
//last batch insert and commmit
insertPS.executeBatch();
conn.commit();
//lets gzip the file in bard-scratch
gzipStagingFile(stageFile);
//verify result staging load, count in table vs. process count
if(verifyStagedLoad(procCnt, capExptId)) {
//delete results if they exist
stmt.execute("delete from bard_experiment_data where bard_expt_id ="+bardExptId);
stmt.execute("delete from bard_experiment_result where bard_expt_id ="+bardExptId);
//load data tables (bard_experiment_data and bard_experiment_result)
loadDataServingTables(capExptId, bardExptId);
}
//now update test sid count, cid count, acitve count, and probe count, and has probe.
updateExperimentTestingStats(bardExptId);
//close the connnection
conn.close();
conn2.close();
logger.info("Starting to explode data for BARD Experiment "+bardExptId);
ResultExploder re = new ResultExploder();
re.explodeResults(bardExptId);
logger.info("Finished exploding data for BARD Experiment "+bardExptId);
ResultStatistics rstats = new ResultStatistics();
rstats.generateStatistics(bardExptId);
logger.info("Evaluated statistics for BARD Experiment "+bardExptId);
ResultHistogram rh = new ResultHistogram();
rh.generateHistogram(bardExptId);
logger.info("Generated histograms for BARD Experiment "+bardExptId);
logger.info("Process time for CAP expt "+capExptId+" , BARD expt "+bardExptId+": "+(System.currentTimeMillis()-start));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return CAPConstants.CAP_EXTRACT_LOAD_STATUS_COMPLETE;
}
private BardExptDataResponse determineResultClass(Long capExptId, BufferedReader bufferedReader) throws IOException {
//set to unclassified first
Integer resultType = new Integer(BardExptDataResponse.ResponseClass.UNCLASS.ordinal());
String capData;
ArrayList <BardExptDataResponse> respList = new ArrayList <BardExptDataResponse>();
ArrayList <Integer> respSizeList = new ArrayList<Integer>();
ArrayList <BardResultType> resultList;
BardResultFactory resultFactory = new BardResultFactory();
ObjectMapper mapper = new ObjectMapper();
long ced, bed, cad, bad;
ced = bed = cad = bad = 0l;
ArrayList <ArrayList<Long>> pids = new ArrayList <ArrayList<Long>>();
ArrayList <Long> pidList = new ArrayList<Long>();
pidList.add(1l);
pidList.add(2l);
pids.add(pidList);
Contexts contexts = this.fetchContexts(capExptId);
CAPExperimentResult capResult;
BardExptDataResponse response;
int procCnt = 0;
BardExptDataResponse maxResponse = null;
while((capData = bufferedReader.readLine()) != null) {
//break if hit the sample limit
if(procCnt >= RESPONSE_CLASS_SAMPLE_SIZE)
break;
capData = capData.trim();
if(capData.length() > 0) {
//track process count
procCnt++;
//need to initialize factory to make a new response object to pass out
resultFactory.initialize(bed, ced, bad, cad, pids, contexts, BardExptDataResponse.ResponseClass.UNDEF.ordinal(),
null, null, priorityElems);
capResult = mapper.readValue(capData, CAPExperimentResult.class);
//build a response **** Has a type determined
response = resultFactory.processCapResult(capResult);
resultList = resultFactory.getResultList();
respList.add(response);
//add the size
if(resultList != null) {
respSizeList.add(resultList.size());
} else {
respSizeList.add(0);
}
}
}
if(respList.size() > 0) {
log.info("Sampling "+ respList.size()+ " responses to determine responseClass for capExptId"+capExptId);
int maxSize = 0;
maxResponse = respList.get(0);
for(int i = 0; i < respSizeList.size(); i++) {
if(respSizeList.get(i) > maxSize) {
maxSize = respSizeList.get(i);
maxResponse = respList.get(i);
}
}
//now finally, get the result class...
resultType = maxResponse.getResponseType();
} else {
log.info("PROBLEM Trying to sample response class: NO responses available to determine responseClass for capExptId"+capExptId);
}
return maxResponse;
}
private void loadDataServingTables(long capExptId, long bardExptId) throws SQLException, JsonParseException, JsonMappingException, IOException {
Statement stmt = conn2.createStatement();
long dataId;
ResultSet rs = stmt.executeQuery("select max(expt_data_id) from bard_experiment_data");
if(rs.next()) {
dataId = rs.getLong(1) + 1;
} else {
log.warn("Abort result load for capExptID:"+capExptId+" Can't get max expt_data_id");
return;
}
stmt.setFetchSize(Integer.MIN_VALUE);
ResultSet stagedDataRS = stmt.executeQuery("select sid, cid, outcome, score, potency, cap_json, bard_json from cap_expt_result where cap_expt_id="+capExptId);
/*
* bard_expt_data fields:
* expt_data_id (AI), bard_expt_id, eid (defunct), sid, cid, classification (defunct), updated (date),
* runset(varchar), outcome, score, potency, ADD efficacy and test conc?
*
* bard_experiment_result fields:
* expt_result_id (AI), expt_data_id, bard_expt_id, replicate_id (defunct), eid (defunct),
* sid, json_data_array, json_dose_response (defunct), json_response.
*
*/
PreparedStatement insertDataPS = conn.prepareStatement("insert into bard_experiment_data " +
"set bard_expt_id=?, sid=?, cid=?, updated=?, outcome=?, score=?, potency=?, expt_data_id=?");
PreparedStatement insertResultPS = conn.prepareStatement("insert into bard_experiment_result " +
"set expt_data_id=?, bard_expt_id=?, sid=?, json_data_array=?, json_response=?");
Statement aiStmt = conn.createStatement();
Long cid, sid;
String bardData;
String capData;
BardExptDataResponse bardResponse;
Double score, potency;
Integer outcome;
Date date = new Date(System.currentTimeMillis());
long procCnt = 0;
while(stagedDataRS.next()) {
procCnt++;
cid = stagedDataRS.getLong("cid");
capData = stagedDataRS.getString("cap_json");
bardData = stagedDataRS.getString("bard_json");
sid = stagedDataRS.getLong("sid");
score = stagedDataRS.getDouble("score");
if(stagedDataRS.wasNull())
score = null;
outcome = stagedDataRS.getInt("outcome");
if(stagedDataRS.wasNull())
outcome = null;
potency = stagedDataRS.getDouble("potency");
if(stagedDataRS.wasNull())
potency = null;
//bard_experiment_data fields
insertDataPS.setLong(1, bardExptId);
insertDataPS.setLong(2, sid);
if(cid != null)
insertDataPS.setLong(3, cid);
else
insertDataPS.setNull(3, java.sql.Types.INTEGER);
insertDataPS.setDate(4, date);
if(outcome != null)
insertDataPS.setInt(5, outcome);
else
insertDataPS.setNull(5, java.sql.Types.TINYINT);
if(score != null)
insertDataPS.setDouble(6, score);
else
insertDataPS.setNull(6, java.sql.Types.DOUBLE);
if(potency != null)
insertDataPS.setDouble(7, potency);
else
insertDataPS.setNull(7, java.sql.Types.DOUBLE);
insertDataPS.setLong(8, dataId);
//bard_experiment_result fields
insertResultPS.setLong(1, dataId);
insertResultPS.setLong(2, bardExptId);
insertResultPS.setLong(3, sid);
insertResultPS.setString(4, capData);
insertResultPS.setString(5, bardData);
insertDataPS.addBatch();
insertResultPS.addBatch();
//increment the shared data id
dataId++;
if(procCnt % 100 == 0) {
insertDataPS.executeBatch();
insertResultPS.executeBatch();
conn.commit();
}
}
insertDataPS.executeBatch();
insertResultPS.executeBatch();
conn.commit();
}
/*
* Verfies the count of staged data in the DB.
* Preconditions: Assumes an existing db connection (conn).
*/
private boolean verifyStagedLoad(long dataCnt, long capExptId) throws SQLException {
boolean verified = false;
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select count(*) from cap_expt_result where cap_expt_id = "+capExptId);
if(rs.next()) {
if(rs.getLong(1) == dataCnt) {
verified = true;
log.info("Verified: File data count == DB Count ("+dataCnt+") for capExptId:"+capExptId);
} else {
log.warn("Data Staging Verification FAILED: File data count:"+dataCnt+" != DB data count:"+rs.getLong(1)+" for capExptId:"+capExptId);
}
}
rs.close();
return verified;
}
/*
* Stages data into a given file name.
*/
private long stageDataToFile(String url, CAPConstants.CapResource resource, long capExptId, String fileName) throws IOException, SQLException {
long procCnt = 0;
String urlStr = url+"/results";
String mime = resource.getMimeType();
HttpGet get = new HttpGet(urlStr);
get.setHeader("Accept", mime);
get.setHeader(CAPConstants.CAP_APIKEY_HEADER, CAPConstants.getApiKey());
HttpClient client = SslHttpClient.getHttpClient();
try {
HttpResponse response = client.execute(get);
BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(response.getEntity().getContent())));
PrintWriter bw = new PrintWriter(new FileWriter(fileName));
String entry;
while((entry = br.readLine()) != null) {
bw.println(entry);
}
br.close();
bw.close();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return procCnt;
}
/*
* Pulls entity ids related to this experiment
*/
private Hashtable <String, Long> getIds(long capExptId) throws SQLException {
Hashtable <String, Long> ids = new Hashtable <String, Long> ();
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select a.bard_expt_id, b.bard_assay_id, b.cap_assay_id " +
"from bard_experiment a, bard_assay b " +
"where a.bard_assay_id = b. bard_assay_id and a.cap_expt_id = "+capExptId);
if(rs.next()) {
Long beid = rs.getLong(1);
Long baid = rs.getLong(2);
Long caid = rs.getLong(3);
if(beid != null)
ids.put("bardExptId", beid);
if(baid != null)
ids.put("bardAssayId", baid);
if(caid != null)
ids.put("capAssayId", caid);
}
rs.close();
return ids;
}
/*
* GZIPs the file in scratch
*/
private void gzipStagingFile(String filePath) throws IOException {
File file = new File(filePath);
FileInputStream fis = new FileInputStream(file);
File gzFile = new File(filePath+".gz");
GZIPOutputStream gos = new GZIPOutputStream(new FileOutputStream(gzFile));
BufferedOutputStream bos = new BufferedOutputStream(gos);
byte [] buffer = new byte[1024];
int length;
while((length = fis.read(buffer, 0, buffer.length)) != -1) {
bos.write(buffer, 0, length);
}
fis.close();
bos.flush();
bos.close();
file.delete();
}
/*
* Helper method to pull project ids for the given bard experiment id
*/
private ArrayList <ArrayList<Long>> getProjectIds(Long bardExptId) throws SQLException {
ArrayList <ArrayList<Long>> ids = null;
if(bardExptId != null) {
ids = new ArrayList <ArrayList<Long>> ();
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select a.bard_proj_id, b.cap_proj_id from bard_project_experiment a, bard_project b where a.bard_expt_id = "+ bardExptId + " "
+"and a.bard_proj_id = b.bard_proj_id");
long capProjId;
ArrayList <Long> uniqueCapProjectIds = new ArrayList<Long>();
while(rs.next()) {
capProjId = rs.getLong(2);
//if cap id is null (shouldn't be), just add the bard id and add the list to ids
if(rs.wasNull()) {
tempProjectList = new ArrayList<Long>();
tempProjectList.add(rs.getLong(1));
ids.add(tempProjectList);
} else if(!uniqueCapProjectIds.contains(capProjId)) {
//if the cap id isn't null, then check that it hasn't been added before
tempProjectList = new ArrayList<Long>();
tempProjectList.add(rs.getLong(1));
tempProjectList.add(capProjId);
ids.add(tempProjectList);
}
}
}
return ids;
}
/*
* Helper method to pull the Contexts for th given CAP experiment ID
*/
private Contexts fetchContexts(Long capExptId) {
Contexts contexts = null;
try {
Experiment w = getResponse(CAPConstants.getCAPRoot()+"/experiments/"+capExptId,
CAPConstants.CapResource.EXPERIMENT);
if(w != null) {
contexts = w.getContexts();
}
} catch (IOException e) {
e.printStackTrace();
}
return contexts;
}
/*
* Helper method to pull the Contexts for th given CAP experiment ID
*/
private ArrayList<ResultTuple> fetchPriorityElements(Long capExptId) {
ArrayList<ResultTuple> resultTuples = new ArrayList<ResultTuple>();
try {
Experiment w = getResponse(CAPConstants.getCAPRoot()+"/experiments/"+capExptId,
CAPConstants.CapResource.EXPERIMENT);
if(w != null) {
if(w.getExperimentMeasures() == null || w.getExperimentMeasures().getExperimentMeasure().size() < 1) {
//if we don't have any experiment measures (null or size = 0) return the empty result tuples
return resultTuples;
}
List<ExperimentMeasure> measures = w.getExperimentMeasures().getExperimentMeasure();
//iterate over measures and build corresponding
for(ExperimentMeasure measure : measures) {
if(measure.isPriorityElement()) {
ResultTuple resultTuple = makeResultTuple(measure);
resultTuples.add(resultTuple);
if(!resultTuple.isAtRoot()) {
populateParentInfo(measure, measures, resultTuple);
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
return resultTuples;
}
/*
* Helper method to pull the Contexts for th given CAP experiment ID
*/
public ArrayList<ResultTuple> fetchPriorityElements(Experiment experiment) {
ArrayList<ResultTuple> resultTuples = new ArrayList<ResultTuple>();
if(experiment != null) {
List<ExperimentMeasure> measures = experiment.getExperimentMeasures().getExperimentMeasure();
//iterate over measures and build corresponding
for(ExperimentMeasure measure : measures) {
if(measure.isPriorityElement()) {
//System.out.println("Priority Element");
ResultTuple resultTuple = makeResultTuple(measure);
resultTuples.add(resultTuple);
if(!resultTuple.isAtRoot()) {
populateParentInfo(measure, measures, resultTuple);
}
}
}
}
return resultTuples;
}
private ResultTuple makeResultTuple(ExperimentMeasure measure) {
ResultTuple tuple = new ResultTuple();
tuple.setDictId(Integer.parseInt(Util.getEntityIdFromUrl(measure.getResultTypeRef().getLink().getHref())));
if(measure.getStatsModifierRef() != null)
tuple.setStatsModifier(Integer.parseInt(Util.getEntityIdFromUrl(measure.getStatsModifierRef().getLink().getHref())));
else
tuple.setStatsModifier(null);
tuple.setAtRoot(measure.getParentExperimentMeasureRef() == null);
return tuple;
}
private void populateParentInfo(ExperimentMeasure measure, List<ExperimentMeasure> measures, ResultTuple tuple) {
BigInteger parentNodeId = measure.getParentExperimentMeasureRef();
for(ExperimentMeasure m : measures) {
if(m.getExperimentMeasureId().equals(parentNodeId)) {
tuple.setParentDictId(Integer.parseInt(Util.getEntityIdFromUrl(m.getResultTypeRef().getLink().getHref())));
//have parent, now break
break;
}
}
}
public void updateExperimentTestingStats(Long bardExptId) throws SQLException {
if(bardExptId != null) {
Statement stmt = conn.createStatement();
Long sampleCount = 0l;
Long cidCount = 0l;
Long activeCount = 0l;
Long probeCount = 0l;
Boolean haveProbe = false;
ResultSet rs = stmt.executeQuery("select count(distinct(sid)) from bard_experiment_data where bard_expt_id="+bardExptId);
if(rs.next())
sampleCount = rs.getLong(1);
rs = stmt.executeQuery("select count(distinct(cid)) from bard_experiment_data where bard_expt_id ="+bardExptId);
if(rs.next())
cidCount = rs.getLong(1);
rs = stmt.executeQuery("select count(distinct(sid)) from bard_experiment_data where (outcome=2 or outcome=5) and bard_expt_id="+bardExptId);
if(rs.next())
activeCount = rs.getLong(1);
rs = stmt.executeQuery("select count(distinct(sid)) from bard_experiment_data where outcome=5 and bard_expt_id="+bardExptId);
if(rs.next())
probeCount = rs.getLong(1);
haveProbe = (probeCount != null && probeCount > 0);
stmt.executeUpdate("update bard_experiment set " +
" sample_count="+sampleCount+
", cid_count="+cidCount+
", active_count="+activeCount+
", probe_count="+probeCount+
", have_probe="+haveProbe+
" where bard_expt_id = "+bardExptId
);
conn.commit();
}
}
/*
* Utility method to pull an example of each experiment result in the db.
*/
public void testResultTypes(String serverURL, String experimentQuery, String outFile, String version) {
try {
try {
conn = BardDBUtil.connect(serverURL);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(experimentQuery);
Vector <Long> v = new Vector<Long>();
while(rs.next()) {
v.add(rs.getLong(1));
}
rs.close();
String activeSidSQL = "select sid from bard_experiment_data where (outcome = 2 or outcome = 5)" +
" and bard_expt_id=";
Vector <Long> totallyInactiveBeds = new Vector<Long>();
String anySidSQL = "select sid from bard_experiment_data where bard_expt_id=";
Hashtable <Long, Long> bidSidHash = new Hashtable <Long, Long>();
int inactiveCnt = 0;
for(long bed : v) {
rs = stmt.executeQuery(activeSidSQL+bed+" limit 1");
if(rs.next()) {
bidSidHash.put(bed, rs.getLong(1));
System.out.println("sid ="+rs.getLong(1));
} else {
rs = stmt.executeQuery(anySidSQL+bed+" limit 1");
if(rs.next()) {
bidSidHash.put(bed, rs.getLong(1));
inactiveCnt++;
} else {
totallyInactiveBeds.add(bed);
}
}
}
rs.close();
System.out.println("No results for "+totallyInactiveBeds.size()+" experiments!");
System.out.println("Experiment count with no actives ="+inactiveCnt);
System.out.println("Number of bard expt ids="+v.size());
PrintWriter pw = new PrintWriter(new FileWriter(outFile));
pw.println("RespClass\tPE\tCapExptId\tCapAssayId\tCapProjId\tPubchemAID\tBardExptId\tsid\tisSIDActive?\tpubchem URL (Result Def)\tBARD REST URL\tBard JSON\tCAP Measure Cnt\tPubhem tid Cnt(+2)\tcapCnt/pubchemCnt");
PreparedStatement ps = conn.prepareStatement("select b.cap_expt_id, a.json_response, a.sid, b.pubchem_aid, a.json_data_array from bard_experiment_result a, bard_experiment b " +
" where a.bard_expt_id=b.bard_expt_id and a.bard_expt_id = ? and a.sid = ? limit 1");
int progress = 0;
Blob response;
BufferedReader in;
String line;
long aid;
long sid;
long cid;
long pubchemAID;
Long activeSID;
int numWrites = 0;
int capCnt;
int pubchemTidCnt;
BardExptDataResponse bardResponse;
ObjectMapper mapper = new ObjectMapper();
for(Long bid : v) {
ps.setLong(1, bid);
activeSID = bidSidHash.get(bid);
if(activeSID != null) {
ps.setLong(2, activeSID);
rs = ps.executeQuery();
if(rs.next()) {
progress++;
aid = rs.getLong(1);
response = rs.getBlob(2);
sid = rs.getLong("sid");
pubchemAID = rs.getLong(4);
String responseStr = "";
if(response != null) {
in = new BufferedReader(new InputStreamReader(response.getBinaryStream()));
numWrites++;
while((line = in.readLine()) != null) {
responseStr += line;
}
BardExptDataResponse r = mapper.readValue(responseStr, BardExptDataResponse.class);
pw.print(r.getResponseClass()+"\t");
String pes = "";
for(BardResultType result : r.getPriorityElements()) {
pes += result.getDisplayName() + ",";
}
if(pes.length() > 0)
pes = pes.substring(0, pes.length()-1);
pw.print(pes+"\t");
String projId = "";
for(BardExptDataResponse.ProjectIdPair p : r.getProjects()) {
projId += p.getCapProjId() + "/";
}
if(projId.length() > 0)
projId = projId.substring(0, projId.length()-1);
pw.print(aid+"\t"+r.getCapAssayId()+"\t"+projId+"\t"+pubchemAID+"\t"+bid+"\t"+sid+"\t");
if(responseStr.contains("value\":\"Active"))
pw.print("Active\t");
else
pw.print("Not Active\t");
pw.print("=hyperlink(\"http://pubchem.ncbi.nlm.nih.gov/assay/assay.cgi?aid="+pubchemAID+"#aDefinitions\")\t");
pw.print("=hyperlink(\"http://bard.nih.gov/api/"+version+"/exptdata/"+bid+"."+sid+"\")\t");
//write bard response
pw.print(responseStr+"\t");
in.close();
} else {
System.out.println("Null experiment response for expt: "+bid);
}
//cap data
response = rs.getBlob(5);
responseStr = "";
if(response != null) {
in = new BufferedReader(new InputStreamReader(response.getBinaryStream()));
while((line = in.readLine()) != null) {
responseStr += line;
}
} else {
System.out.println("Null CAP experiment response for expt: "+bid);
}
capCnt = getCAPMeasureCount(responseStr);
//adding 2 for pubchem outcome and score which are measures in cap
//so we're adding tid count + the 2 standard pubchem values
pubchemTidCnt = this.getPubchemTIDCount(pubchemAID) + 2;
pw.print(capCnt+"\t");
pw.print(pubchemTidCnt+"\t");
DecimalFormat format = new DecimalFormat("0.000");
if(pubchemTidCnt > 0)
pw.println(format.format((float)capCnt/(float)pubchemTidCnt));
else
pw.println("\t");
// responseStr = responseStr.replace("\n", "");
//end the line
// pw.println(responseStr);
if(progress %100 == 0)
System.out.println("Progress = "+progress);
}
} else {
System.out.println("No SID for expt = "+bid);
}
}
System.out.println("num writes="+numWrites);
pw.flush();
pw.close();
conn.close();
} catch (SQLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public int getCAPMeasureCount(String capResponse) {
int mCnt = 0;
String key = "\"resultTypeId\":";
mCnt = capResponse.split(key).length-1;
return mCnt;
}
public int getPubchemTIDCount(long aid) {
int tidCnt = 0;
String url = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/"+aid+"/description/JSON";
String data = "";
byte [] buff = new byte[1024];
try {
URL pubchemURL = new URL(url);
InputStream is = pubchemURL.openStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line;
while((line = br.readLine()) != null) {
data += line.trim();
}
is.close();
tidCnt = data.split("\"tid\":").length-1;
if(aid == 624024) {
//System.out.println(data);
System.out.println("tid cnt ="+tidCnt);
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return tidCnt;
}
public void updateExperimentTestStats(String dbURL) {
try {
conn = CAPUtil.connectToBARD(dbURL);
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select bard_expt_id from bard_experiment");
ArrayList <Long> beds = new ArrayList <Long>();
while(rs.next()) {
beds.add(rs.getLong(1));
}
rs.close();
stmt.close();
for(Long bed : beds) {
this.updateExperimentTestingStats(bed);
}
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void explodeAllHistograms() {
try {
Connection conn = CAPUtil.connectToBARD(CAPConstants.getBardDBJDBCUrl());
Statement stmt = conn.createStatement();
stmt.setFetchSize(Integer.MIN_VALUE); //allow streaming...
ResultSet rs = stmt.executeQuery("select distinct(bardExptId) from exploded_histograms");
ArrayList <Long> exptList = new ArrayList<Long>();
while(rs.next()) {
exptList.add(rs.getLong(1));
}
rs.close();
stmt.close();
conn.close();
for (Long bardExptId : exptList) {
log.info("===== Exploding Histogram for Bard Expt ID: "+bardExptId+" =====");
this.explodeHistogram(bardExptId);
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void explodeHistogram(long bardExptId) throws SQLException {
ResultHistogram rh = new ResultHistogram();
rh.generateHistogram(bardExptId);
logger.info("Generated histograms for BARD Experiment "+bardExptId);
}
public static void main(String [] args) {
ExperimentResultHandler worker = new ExperimentResultHandler();
try {
worker.explodeHistogram(320l);
//worker.explodeHistogram(349l);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long start = System.currentTimeMillis();
//worker.fetchPriorityElements(4976l);
//worker.getPubchemTIDCount(624024);
// worker.testResultTypes("jdbc:mysql://bohr.ncats.nih.gov/bard3",
// "select bard_expt_id from bard_experiment where bard_expt_id in (" +
// "select bard_expt_id from bard_project_experiment where bard_proj_id = 3)",
// "C:/Users/braistedjc/Desktop/json_response_samples_max_20131030_PID_879_test.txt");
// worker.testResultTypes("jdbc:mysql://maxwell.ncats.nih.gov/bard3",
// "select distinct(bard_expt_id) from bard_experiment",
// "C:/Users/braistedjc/Desktop/json_response_samples_20131105_All_projects_test_redo_3.txt", "v18");
//worker.updateExperimentTestStats("jdbc:mysql://maxwell.ncats.nih.gov/bard3");
//worker.processCapExperimentResultViaFileCache(36, "jdbc:mysql://protein.nhgri.nih.gov/bard3", "/ifs/prod/bard/entity_mgr/bard-scratch/");
System.out.println("et="+((System.currentTimeMillis()-start)));
//
}
}