package gov.nih.ncgc.bard.resourcemgr.precomp;
import gov.nih.ncgc.bard.capextract.CAPUtil;
import gov.nih.ncgc.bard.resourcemgr.BardDBUtil;
import gov.nih.ncgc.bard.resourcemgr.extresource.pubchem.BardCompoundPubchemExtrasLoader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import chemaxon.formats.MolFormatException;
import chemaxon.struc.Molecule;
import chemaxon.util.MolHandler;
public class BardCompoundTestStatsWorker {
static final private Logger logger =
LoggerFactory.getLogger(BardCompoundPubchemExtrasLoader.class.getName());
String sqlSelectCIDActiveData = "select cid,count(distinct(bard_expt_id)) from bard_experiment_data " +
"where (outcome = 2 or outcome = 5) group by cid";
String sqlSelectCIDTestedData = "select cid,count(distinct(bard_expt_id)) from bard_experiment_data " +
"group by cid";
String sqlSelectCIDActiveAssayData = "select b.cid, count(distinct(a.bard_assay_id)) "+
"from bard_experiment a, bard_experiment_data b " +
"where a.bard_expt_id=b.bard_expt_id and (b.outcome = 2 or b.outcome = 5) group by b.cid";
String sqlSelectCIDTestedAssayData = "select b.cid, count(distinct(a.bard_assay_id)) "+
"from bard_experiment a, bard_experiment_data b " +
"where a.bard_expt_id=b.bard_expt_id group by b.cid";
String sqlUpdateCIDActiveCnt = "update compound set active_expt_cnt = ? where cid = ?";
String sqlUpdateCIDTestedCnt = "update compound set tested_expt_cnt = ? where cid = ?";
String sqlUpdateCIDAssayActiveCnt = "update compound set active_assay_cnt = ? where cid = ?";
String sqlUpdateCIDAssayTestedCnt = "update compound set tested_assay_cnt = ? where cid = ?";
String sqlSetActiveExptNullToZero = "update compound set active_expt_cnt = 0 where active_expt_cnt is null";
String sqlSetTestedExptNullToZero = "update compound set tested_expt_cnt = 0 where tested_expt_cnt is null";
String sqlSetActiveAssayNullToZero = "update compound set active_assay_cnt = 0 where active_assay_cnt is null";
String sqlSetTestedAssayNullToZero = "update compound set tested_assay_cnt = 0 where tested_assay_cnt is null";
private Connection conn;
public void updateCompoundTestStatus(String serverURL) {
try {
logger.info("Starting compound test stat update");
conn = CAPUtil.connectToBARD(serverURL);
conn.setAutoCommit(false);
// zero out untested compounds
this.zeroUntestedCompounds();
long start = System.currentTimeMillis();
updateCompoundStatus(sqlSelectCIDActiveData,sqlUpdateCIDActiveCnt, "active expt update");
logger.info("ET active update = " + ((float)(System.currentTimeMillis()-start))/1000.0/60.0);
conn.commit();
start = System.currentTimeMillis();
updateCompoundStatus(sqlSelectCIDTestedData,sqlUpdateCIDTestedCnt, "tested expt update");
logger.info("ET tested update = " + ((float)(System.currentTimeMillis()-start))/1000.0/60.0);
conn.commit();
start = System.currentTimeMillis();
updateCompoundStatus(sqlSelectCIDActiveAssayData,sqlUpdateCIDAssayActiveCnt, "active assay update");
logger.info("ET active Assay update = " + ((float)(System.currentTimeMillis()-start))/1000.0/60.0);
conn.commit();
start = System.currentTimeMillis();
updateCompoundStatus(sqlSelectCIDTestedAssayData,sqlUpdateCIDAssayTestedCnt, "tested assay update");
logger.info("ET tested Assay update = " + ((float)(System.currentTimeMillis()-start))/1000.0/60.0);
conn.commit();
logger.info("Finished compound tested state update");
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void zeroUntestedCompounds() {
try {
logger.info("Begin Zero Untested Compounds");
//collect tested cids
Statement stmt = conn.createStatement();
stmt.setFetchSize(Integer.MIN_VALUE); //stream
ResultSet rs = stmt.executeQuery("select cid from bard_experiment_data");
HashSet <Long> testedCidSet = new HashSet <Long>();
while(rs.next()) {
testedCidSet.add(rs.getLong(1));
}
rs.close();
logger.info("Tested cid set size = "+testedCidSet.size());
//don't loop through all cids, just check if any with tested stats are not in the tested cid list
//collect tested cids in compound
rs = stmt.executeQuery("select cid from compound where tested_expt_cnt > 0");
ArrayList <Long> cidsToZero = new ArrayList <Long>();
while(rs.next()) {
if(!testedCidSet.contains(rs.getLong(1))) {
cidsToZero.add(rs.getLong(1));
}
}
rs.close();
stmt.close();
testedCidSet = null;
logger.info("Number of CIDs to zero (previously tested, now retired):"+cidsToZero.size());
PreparedStatement ps = conn.prepareStatement(
"update compound set tested_expt_cnt = 0, active_expt_cnt = 0, " +
"tested_assay_cnt = 0, active_assay_cnt = 0 where cid = ?");
long cnt = 0;
for(Long cid : cidsToZero) {
ps.setLong(1, cid);
ps.addBatch();
if(cnt % 100 == 0) {
ps.executeBatch();
conn.commit();
}
}
ps.executeBatch();
conn.commit();
ps.close();
logger.info("Finished zeroing untested compounds");
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void updateCompoundStatus(String selectSQL, String updateSQL, String msg) {
try {
PreparedStatement ps = conn.prepareStatement(selectSQL);
ps.setFetchSize(Integer.MIN_VALUE);
ResultSet rs = ps.executeQuery();
Hashtable <Long, Long> hash = new Hashtable<Long,Long>();
while(rs.next()) {
hash.put(rs.getLong(1), rs.getLong(2));
}
rs.close();
ps = conn.prepareStatement(updateSQL);
Enumeration <Long> keys = hash.keys();
Long cid;
Long cnt;
long updateCnt = 0;
while(keys.hasMoreElements()) {
cid = keys.nextElement();
cnt = hash.get(cid);
ps.setLong(1,cnt);
ps.setLong(2,cid);
ps.addBatch();
updateCnt++;
if(updateCnt % 1000 == 0) {
ps.executeBatch();
conn.commit();
logger.info(msg+" Update CNT="+updateCnt);
}
}
ps.executeBatch();
conn.commit();
ps.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
logger.warn(e.getMessage());
}
}
}