package gov.nih.ncgc.bard.resourcemgr.extresource.pubchem; import ftp.FtpBean; import ftp.FtpException; import ftp.FtpListResult; import gov.nih.ncgc.bard.resourcemgr.BardDBUtil; import gov.nih.ncgc.bard.resourcemgr.BardExtResourceLoader; import gov.nih.ncgc.bard.resourcemgr.BardExternalResource; import gov.nih.ncgc.bard.resourcemgr.IBardExtResourceLoader; import gov.nih.ncgc.bard.resourcemgr.precomp.BardCompoundTestStatsWorker; import gov.nih.ncgc.util.MolFpFactory; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.Date; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.Properties; import java.util.Vector; import java.util.logging.Logger; import java.util.zip.GZIPInputStream; import chemaxon.formats.MolFormatException; import chemaxon.formats.MolImporter; import chemaxon.struc.Molecule; import chemaxon.util.MolHandler; public class PubchemCompoundLoader extends BardExtResourceLoader implements IBardExtResourceLoader { static final private Logger logger = Logger.getLogger(PubchemCompoundLoader.class.getName()); private String SERVICE_KEY_FULL_LOAD = "COMPOUND-REFRESH-FULL"; private String SERVICE_KEY_DAILY_LOAD = "COMPOUND-REFRESH-DAILY"; private String SERVICE_KEY_SPECIFIC_LOAD = "COMPOUND-REFRESH-SPECIFIC-DIR"; private int MAX_RINGs_FOR_FP_COMP = 75; public final String cidKey = "PUBCHEM_COMPOUND_CID"; public final String iupacNameKey = "PUBCHEM_IUPAC_NAME"; public final String pubchemFormulaKey = "PUBCHEM_MOLECULAR_FORMULA"; public final String pubchemCanSmilesKey = "PUBCHEM_OPENEYE_CAN_SMILES"; public final String pubchemIsoSmilesKey = "PUBCHEM_OPENEYE_ISO_SMILES"; public final String pubchemMwKey = "PUBCHEM_MOLECULAR_WEIGHT"; public final String pubchemMonoisoMwKey = "PUBCHEM_MONOISOTOPIC_WEIGHT"; //compound property keys and corresponding column types private Vector <String> propertyKeys; private Vector <String> types; //handle batches of 25 files at a time private int fileBatchSize = 10; //db connection private Connection conn; //molecule sdf file parser private MolImporter mi; private String sqlCompoundInsert = "insert into temp_compound (cid, formula, iupac_name, can_smiles, iso_smiles, mw, mw_monoiso)" + "values (?,?,?,?,?,?,?)"; private String sqlMolfileInsert = "insert into temp_compound_molfile (cid, molfile_mol) values (?,?)"; private String sqlCompoundPropInsert = "insert into temp_compound_props values ("; private String sqlCompoundFpInsert = "insert into temp_compound_fp values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; private String sqlCompoundReplace = "replace into compound (cid, formula, iupac_name, can_smiles, iso_smiles, mw, mw_monoiso)" + "values (?,?,?,?,?,?,?)"; private String sqlMolfileReplace = "replace into compound_molfile (cid, molfile_mol) values (?,?)"; private String sqlCompoundPropReplace = "replace into compound_props values ("; private String sqlCompoundFpReplace = "replace into compound_fp values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; private String sqlTempCompoundFpReplace = "replace into temp_compound_fp values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; private String updateCompoundDate = "update compound set creation = ? where cid = ?"; private String sqlUpdateKilledCID = "update compound set is_killed = true where cid = ?"; //prepared statements private PreparedStatement insertCompoundPS, insertCompoundPropsPS, insertMolfilePS, insertCompoundFpPS; private PreparedStatement replaceCompoundPS, replaceCompoundPropsPS, replaceMolfilePS, replaceCompoundFpPS, replaceTempCompoundFpPS; //connection and processing global variables private String dbURL, driver, user, pw; private MolFpFactory molFpFactory; private long molCount; private String path; private String tempPath; private File file, tempFile; private GZIPInputStream zis; private String tempFileName; private byte [] buffer; private int len; private FileOutputStream fos; private File directory; private String [] fileArr; private Molecule mol; private InputStream is; private MolHandler mh; private boolean insert = true; private boolean insertFP = false; private long cid = -1; private String type; private int [] fpArr; private int fpIndex; private String val; int heavyAtomCount = 0; public PubchemCompoundLoader() { } @Override public boolean load() { //set local paths, the temp path is where subsets of the data are sent for extraction and loading //this enables batching path = service.getLocalResPath(); tempPath = path + "/temp/"; log.info("STARTING COMPOUND LOAD"); //need to fetch all resources under a given URL //resource fetch is pretty specfic for this load so it's handled within the loader boolean haveFiles = false; try { haveFiles = fetchResources(); } catch (IOException e) { e.printStackTrace(); } catch (FtpException e) { e.printStackTrace(); } //kick off the batch loading process String commandKey = service.getServiceKey(); boolean loaded = false; if(haveFiles) { if(commandKey.contains(SERVICE_KEY_FULL_LOAD)) { //full load into temp tables. batchLoadCompounds(service.getLocalResPath(), service.getDbURL()); loaded = true; } else { batchReplaceCompounds(); loaded = true; } } else { log.info("FAILED TO UPDATE RESOURCE FILES: ABORT"); } log.info("NOT Calling to update compound test status!!! Rework pending..."); // rework stats worker for low impact selects. //after compound upload, refresh testing results BardCompoundTestStatsWorker statWorker = new BardCompoundTestStatsWorker(); statWorker.updateCompoundTestStatus(service.getDbURL()); log.info("Finished test status update."); return loaded; } private boolean fetchResources() throws IOException, FtpException { boolean haveFiles = false; //evaluate the kind of load String commandKey = service.getServiceKey(); FtpBean ftp = new FtpBean(); ArrayList <BardExternalResource> resources = service.getExtResources(); if(resources == null || resources.size() < 1) return false; BardExternalResource resource = resources.get(0); String dest = service.getLocalResPath(); //clear dest directory File destDir = new File(dest); String [] files = destDir.list(); File file; for(String fileName : files) { file = new File(destDir+"/"+fileName); if(file.isFile()) { file.delete(); } } //connect ftp.ftpConnect(resource.getResourceServer(), resource.getResourceUserName(), resource.getResourcePassword()); if(commandKey.contains(SERVICE_KEY_FULL_LOAD)) { //update the whole thing ftp.setDirectory(resource.getResourcePath()); FtpListResult list = ftp.getDirectoryContent(); while (list.next() ) { ftp.getBinaryFile(list.getName(), dest+"/"+list.getName()); } haveFiles = true; } else if(commandKey.contains(SERVICE_KEY_DAILY_LOAD)) { //fetch today's latest resources ftp.setDirectory(resource.getResourcePath()); FtpListResult list = ftp.getDirectoryContent(); //collect the data directories ArrayList<String> dateList = new ArrayList<String>(); while (list.next()) { dateList.add(list.getName()); } String [] dateArr = new String[dateList.size()]; for(int i = 0; i < dateArr.length; i++) { dateArr[i] = dateList.get(i); } Arrays.sort(dateArr); String dateDir = dateArr[dateArr.length-1]; ftp.setDirectory(resource.getResourcePath()+"/"+dateDir+"/SDF"); list = ftp.getDirectoryContent(); while (list.next() ) { ftp.getBinaryFile(list.getName(), dest+"/"+list.getName()); } haveFiles = true; } else if(commandKey.contains(SERVICE_KEY_SPECIFIC_LOAD)) { log.info("SPECIFIC COMPOUND LOAD FETCHING FILES"); //fetch all resources within the external resource directory ftp.setDirectory(resource.getResourcePath()); FtpListResult list = ftp.getDirectoryContent(); while (list.next() ) { ftp.getBinaryFile(list.getName(), dest+"/"+list.getName()); } haveFiles=true; } ftp.close(); return haveFiles; } @Override public String getLoadStatusReport() { return null; } /** * Loads compound files found at path supplied in constructor * and expects .sdf.gz files (sdf files in gzip format). */ public void batchLoadCompounds(String filePath, String dbURL) { try { path = filePath; tempPath = path + "/temp_load/"; //get connection conn = BardDBUtil.connect(dbURL); conn.setAutoCommit(false); //prepare sql statements prepareSQLInsertStatements(); //get compound property column names and types to automate property load getPropertyColumnNamesAndTypes(); //need to modify statement based on number of property columns in schema prepareInsertPropertyStatement(); //make a vector of file name batches to process Vector <Vector<String>> fileBatches = partitionFiles(path); //set a molecule counter, create an ftp factory this.molCount = 0; molFpFactory = MolFpFactory.getInstance(16, 2, 6); mh = new MolHandler(mol); //iterate over file batches for(Vector<String> files: fileBatches) { copyAndUnzipFilesToTemp(files); loadCompounds(files); this.deleteFilesInTemp(); } logger.info("load complete"); conn.commit(); conn.close(); logger.info("closed connection"); //fp generation might not be complete for compounds for many rings. //large compounds are excluded with heavy atom count > 100. //this method evaluates the number of rings and evaluates those with fewer than maxRings updateFPValuesForLargeCompoundsPassingRingCount(dbURL, true, 75); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Loads compound files found at path supplied in constructor * and expects .sdf.gz files (sdf files in gzip format). */ public void batchReplaceCompounds() { try { //get connection conn = BardDBUtil.connect(service.getDbURL()); conn.setAutoCommit(false); //prepare sql statements prepareReplaceSQLInsertStatements(); //get compound property column names and types to automate property load getPropertyColumnNamesAndTypes(); //need to modify statement based on number of property columns in schema prepareReplacePropertyStatement(); //make a vector of file name batches to process Vector <Vector<String>> fileBatches = partitionFiles(path); //set a molecule counter, create an ftp factory this.molCount = 0; molFpFactory = MolFpFactory.getInstance(16, 2, 6); //iterate over file batches for(Vector<String> files: fileBatches) { copyAndUnzipFilesToTemp(files); insertOrUpdateCompounds(); this.deleteFilesInTemp(); } logger.info("load complete"); conn.commit(); conn.close(); logger.info("closed connection"); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Loads compound files found at path supplied in constructor * and expects .sdf.gz files. */ public long batchReplaceCompounds(Properties loaderProps) { long newCmpdCnt = -1; try { //get connection conn = BardDBUtil.connect(); conn.setAutoCommit(false); newCmpdCnt = getCompoundCount(); //prepare sql statements prepareReplaceSQLInsertStatements(); //get compound property column names and types to automate property load getPropertyColumnNamesAndTypes(); //need to modify statement based on number of property columns in schema prepareReplacePropertyStatement(); //make a vector of file name batches to process path = loaderProps.getProperty("bard.loader.scratch.dir") + "/Compound"; tempPath = path+"/temp/"; //make file list partition Vector <Vector<String>> fileBatches = partitionFiles(path); //make temp dir File tempDir = new File(tempPath); tempDir.mkdir(); //set a molecule counter, create an ftp factory this.molCount = 0; molFpFactory = MolFpFactory.getInstance(16, 2, 6); //iterate over file batches for(Vector<String> files: fileBatches) { copyAndUnzipFilesToTemp(files); insertOrUpdateCompounds(); deleteFilesInTemp(); } logger.info("load complete"); conn.commit(); //Now update the killed cid updateKilledCIDFlags( loaderProps.getProperty("bard.filepath.pubhchem.compound.killedcids")+ "/"+loaderProps.getProperty("bard.filename.pubchem.compound.killedcids")); conn.commit(); conn.close(); logger.info("closed connection"); newCmpdCnt = getCompoundCount() - newCmpdCnt; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return newCmpdCnt; } /* * Precondition: Connection conn is connected and autocommit is false. * @param killedCIDFileName full path to filename * @throws IOException * @throws SQLException */ private void updateKilledCIDFlags(String killedCIDFilePath) throws IOException, SQLException { logger.info("Updating Killed CID Flag in Compound"); File file = new File(killedCIDFilePath); if(file.exists()) { BufferedReader br = new BufferedReader(new FileReader(file)); String line = null; Vector <String> cidList = new Vector<String>(); while((line = br.readLine()) != null) { line = line.trim(); if(line != "") { cidList.add(line); } } br.close(); long cidLong; PreparedStatement ps = conn.prepareStatement(sqlUpdateKilledCID); for(String cid : cidList) { try { cidLong = Long.parseLong(cid); ps.setLong(1, cidLong); ps.execute(); conn.commit(); } catch (NumberFormatException nfe) { nfe.printStackTrace(); continue; } } } } /** * Loads compound files found at path supplied in constructor * and expects .sdf.gz files (sdf files in gzip format). */ public void batchLoadCompoundFp() { try { //get connection connect(); //prepare sql statements prepareSQLInsertStatements(); //get compound property column names and types to automate property load getPropertyColumnNamesAndTypes(); //need to modify statement based on number of property columns in schema prepareInsertPropertyStatement(); //make a vector of file name batches to process Vector <Vector<String>> fileBatches = partitionFiles(path); //iterate over file batches for(Vector<String> files: fileBatches) { copyAndUnzipFilesToTemp(files); loadCompoundFp(); this.deleteFilesInTemp(); } logger.info("load complete"); conn.commit(); conn.close(); logger.info("closed connection"); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public long getCompoundCount() { long compoundCnt = 0; try { Connection connection = BardDBUtil.connect(); Statement stmt = connection.createStatement(); ResultSet rs = stmt.executeQuery("select count(cid) from compound"); if(rs.next()) { compoundCnt = rs.getLong(1); } connection.close(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return compoundCnt; } private void connect() { try { Class.forName(driver); conn = DriverManager.getConnection(dbURL, user, pw); conn.setAutoCommit(false); } catch (ClassNotFoundException e) { e.printStackTrace(); logger.warning("Error: No driver class found for driver="+driver); } catch (SQLException e) { e.printStackTrace(); } } private void prepareSQLInsertStatements() { try { insertCompoundPS = conn.prepareStatement(sqlCompoundInsert); insertMolfilePS = conn.prepareStatement(sqlMolfileInsert); insertCompoundFpPS = conn.prepareStatement(sqlCompoundFpInsert); } catch (SQLException e) { e.printStackTrace(); } } private void prepareReplaceSQLInsertStatements() { try { replaceCompoundPS = conn.prepareStatement(sqlCompoundReplace); replaceMolfilePS = conn.prepareStatement(sqlMolfileReplace); replaceCompoundFpPS = conn.prepareStatement(sqlCompoundFpReplace); } catch (SQLException e) { e.printStackTrace(); } } //populates the property keys and types private void getPropertyColumnNamesAndTypes() throws SQLException { propertyKeys = new Vector <String> (); types = new Vector <String> (); DatabaseMetaData metadata = conn.getMetaData(); ResultSet rs = metadata.getColumns(null,null, "compound_props", null); while(rs.next()) { propertyKeys.add(rs.getString("COLUMN_NAME")); types.add(rs.getString("TYPE_NAME")); } //dump key names for(String key: propertyKeys) { logger.info("key ="+key); } for(String type:types) { logger.info("type ="+type); } } //augment the property insert with the appropreiate number of fields according to schema //requires that propertyKeys is populated private void prepareInsertPropertyStatement() throws SQLException { for(int i = 0; i < propertyKeys.size()-1; i++) { sqlCompoundPropInsert += "?,"; } sqlCompoundPropInsert += "?)"; insertCompoundPropsPS = conn.prepareStatement(sqlCompoundPropInsert); } private void prepareReplacePropertyStatement() throws SQLException { for(int i = 0; i < propertyKeys.size()-1; i++) { sqlCompoundPropReplace += "?,"; } sqlCompoundPropReplace += "?)"; replaceCompoundPropsPS = conn.prepareStatement(sqlCompoundPropReplace); } public void loadCompounds(Vector<String> files) throws SQLException { directory = new File(tempPath); //fileArr = directory.list(); int fileCount = 0; for(String fileName : files) { fileName = fileName.replace(".gz", ""); if(fileName.endsWith(".sdf")) { loadCompound(tempPath+fileName); fileCount++; } logger.info("loaded file "+fileCount+" name= "+fileName); } } private void insertOrUpdateCompounds() { File directory = new File(tempPath); String [] files = directory.list(); int fileCount = 1; for(String fileName : files) { if(fileName.endsWith(".sdf")) { insertOrUpdateCompound(tempPath+"/"+fileName); fileCount++; } logger.info("loaded file "+fileCount+" name= "+fileName); } } public void loadCompoundFp() throws SQLException { File directory = new File(tempPath); String [] files = directory.list(); int fileCount = 1; for(String fileName : files) { if(fileName.endsWith(".sdf")) { loadCompoundFp(tempPath+fileName); fileCount++; } logger.info("loaded file "+fileCount+" name= "+fileName); } } private void loadCompoundFp(String filePath) { InputStream is; try { is = new FileInputStream(filePath); MolImporter mi = new MolImporter (is); String cidStr; long cid; int fpIndex; int [] fpArr; for (Molecule mol = new Molecule (); mi.read(mol); ) { cidStr = mol.getProperty("PUBCHEM_COMPOUND_CID"); if(cidStr != null) { cid = Long.parseLong(cidStr); this.insertCompoundFpPS.setLong(1, cid); MolHandler mh = new MolHandler(mol); mh.aromatize(); //aromatize BEFORE fingerprinting fpArr = mh.generateFingerprintInInts(16,2,6); fpIndex = 2; for(int fp : fpArr) { this.insertCompoundFpPS.setLong(fpIndex, convertToUnsignedInt(fp)); fpIndex++; } } insertCompoundFpPS.execute(); molCount++; if(molCount % 1000 == 0) { conn.commit(); } } is.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void loadCompound(String filePath) { insert = true; insertFP = false; cid = -1; try { is = new FileInputStream(filePath); mi = new MolImporter (is); for (mol = new Molecule (); mi.read(mol); ) { //log.info("Loading molecule (load count):" + molCount); molCount++; fpArr = null; insertFP = false; int keyIndex = 1; for(String key : propertyKeys) { type = types.get(keyIndex-1); try { val = null; val = mol.getProperty(key); if(type.equalsIgnoreCase("varchar")) { if(mol.getProperty(key) != null) { this.insertCompoundPropsPS.setString(keyIndex, mol.getProperty(key)); } else { this.insertCompoundPropsPS.setNull(keyIndex, java.sql.Types.VARCHAR); } } else if(type.equalsIgnoreCase("int")) { if(mol.getProperty(key) != null) { this.insertCompoundPropsPS.setInt(keyIndex, Integer.parseInt(mol.getProperty(key))); } else { this.insertCompoundPropsPS.setNull(keyIndex, java.sql.Types.INTEGER); } } else if(type.equalsIgnoreCase("bigint")) { if(mol.getProperty(key) != null) { this.insertCompoundPropsPS.setLong(keyIndex, Long.parseLong(mol.getProperty(key))); } else { this.insertCompoundPropsPS.setNull(keyIndex, java.sql.Types.BIGINT); } } else if(type.equalsIgnoreCase("float")) { if(mol.getProperty(key) != null) { this.insertCompoundPropsPS.setFloat(keyIndex, Float.parseFloat(mol.getProperty(key))); } else { this.insertCompoundPropsPS.setNull(keyIndex, java.sql.Types.FLOAT); } } else if(type.equalsIgnoreCase("text")) { if(mol.getProperty(key) != null) { this.insertCompoundPropsPS.setString(keyIndex, mol.getProperty(key)); } else { this.insertCompoundPropsPS.setNull(keyIndex, java.sql.Types.VARCHAR); } } } catch (Exception e) { System.out.println("key numberFormatException: "+key); e.printStackTrace(); keyIndex++; continue; } keyIndex++; } //now take care of compound cid = -1; //cid if(mol.getProperty(cidKey) != null) { insert = true; insertCompoundPS.setLong(1, Long.parseLong(mol.getProperty(cidKey))); cid = Long.parseLong(mol.getProperty(cidKey)); } else { insert = false; //don't enter if there's no clear cid logger.warning("Entry didn't load, no CID, filepath: "+filePath); } //formula insertCompoundPS.setString(2, mol.getFormula()); //IUPAC name insertCompoundPS.setString(3, mol.getProperty(this.iupacNameKey)); //can smiles insertCompoundPS.setString(4, mol.getProperty(this.pubchemCanSmilesKey)); //iso smiles insertCompoundPS.setString(5, mol.getProperty(this.pubchemIsoSmilesKey)); //mw if(mol.getProperty(this.pubchemMwKey) != null) { insertCompoundPS.setFloat(6, Float.parseFloat(mol.getProperty(this.pubchemMwKey))); } else { insertCompoundPS.setNull(6, java.sql.Types.FLOAT); } //monoisotopic mw if(mol.getProperty(this.pubchemMonoisoMwKey) != null) { insertCompoundPS.setFloat(7, Float.parseFloat(mol.getProperty(this.pubchemMonoisoMwKey))); } else { insertCompoundPS.setNull(7, java.sql.Types.FLOAT); } heavyAtomCount = 0; if(mol.getProperty("PUBCHEM_HEAVY_ATOM_COUNT") != null) { try { heavyAtomCount = Integer.parseInt(mol.getProperty("PUBCHEM_HEAVY_ATOM_COUNT")); } catch (NumberFormatException nfe) { log.info("Can't get heavy atom count for cid="+cid); heavyAtomCount = 0; } } if(heavyAtomCount < 100) { //Insert the compoundFP fpArr = molFpFactory.generate(mol); fpIndex = 2; mh.setMolecule(mol); mh.aromatize(); //aromatize BEFORE fingerprinting fpArr = mh.generateFingerprintInInts(16,2,6); if(fpArr != null && insert) { if(fpArr.length == 16) insertFP = true; this.insertCompoundFpPS.setLong(1, cid); for(int fp : fpArr) { this.insertCompoundFpPS.setLong(fpIndex, convertToUnsignedInt(fp)); fpIndex++; } } } else { //if too big, set to zero, log the CID, come back to compute this one based on molfile. //in cleanup round log.info("No Fingerprint Computed for CID="+cid+" Heavy atom count="+heavyAtomCount); insertFP = true; insertCompoundFpPS.setLong(1, cid); for(int i = 0; i < 16; i++) { insertCompoundFpPS.setLong(i+2, convertToUnsignedInt(0)); } } if(insert) { //insert the compound insertCompoundPS.addBatch(); //insert the compound properties insertCompoundPropsPS.addBatch(); //insert the molfile loadMolFile(cid, mol); //insert fp if(insertFP) insertCompoundFpPS.addBatch(); } else { logger.warning("Didn't Insert CID="+cid); } //check for commit if(molCount % 100 == 0) { insertCompoundPS.executeBatch(); insertCompoundPropsPS.executeBatch(); insertMolfilePS.executeBatch(); insertCompoundFpPS.executeBatch(); conn.commit(); if(molCount % 100000 == 0) { logger.info("Mol Commit Total ="+molCount); } } } //execute and commit the end of the file insertCompoundPS.executeBatch(); insertCompoundPropsPS.executeBatch(); insertMolfilePS.executeBatch(); insertCompoundFpPS.executeBatch(); conn.commit(); is.close(); mi.close(); is=null; mi=null; } catch (MolFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block logger.warning("Didn't insert in tables CID="+cid); e.printStackTrace(); } } private void insertOrUpdateCompound(String filePath) { boolean insert = true; boolean insertFP = false; long cid = -1; String type; int [] fpArr; int fpIndex; String val; try { InputStream is = new FileInputStream(filePath); MolImporter mi = new MolImporter (is); for (Molecule mol = new Molecule (); mi.read(mol); ) { molCount++; fpArr = null; insertFP = false; int keyIndex = 1; for(String key : propertyKeys) { type = types.get(keyIndex-1); try { val = null; val = mol.getProperty(key); if(type.equalsIgnoreCase("varchar")) { if(mol.getProperty(key) != null) { this.replaceCompoundPropsPS.setString(keyIndex, mol.getProperty(key)); } else { this.replaceCompoundPropsPS.setNull(keyIndex, java.sql.Types.VARCHAR); } } else if(type.equalsIgnoreCase("int")) { if(mol.getProperty(key) != null) { this.replaceCompoundPropsPS.setInt(keyIndex, Integer.parseInt(mol.getProperty(key))); } else { this.replaceCompoundPropsPS.setNull(keyIndex, java.sql.Types.INTEGER); } } else if(type.equalsIgnoreCase("bigint")) { if(mol.getProperty(key) != null) { this.replaceCompoundPropsPS.setLong(keyIndex, Long.parseLong(mol.getProperty(key))); } else { this.replaceCompoundPropsPS.setNull(keyIndex, java.sql.Types.BIGINT); } } else if(type.equalsIgnoreCase("float")) { if(mol.getProperty(key) != null) { this.replaceCompoundPropsPS.setFloat(keyIndex, Float.parseFloat(mol.getProperty(key))); } else { this.replaceCompoundPropsPS.setNull(keyIndex, java.sql.Types.FLOAT); } } else if(type.equalsIgnoreCase("text")) { if(mol.getProperty(key) != null) { this.replaceCompoundPropsPS.setString(keyIndex, mol.getProperty(key)); } else { this.replaceCompoundPropsPS.setNull(keyIndex, java.sql.Types.VARCHAR); } } } catch (Exception e) { System.out.println("key numberFormatException: "+key); e.printStackTrace(); keyIndex++; continue; } keyIndex++; } //now take care of compound cid = -1; //cid if(mol.getProperty(cidKey) != null) { insert = true; replaceCompoundPS.setLong(1, Long.parseLong(mol.getProperty(cidKey))); cid = Long.parseLong(mol.getProperty(cidKey)); } else { insert = false; //don't enter if there's no clear cid logger.warning("Entry didn't load, no CID, filepath: "+filePath); } //formula replaceCompoundPS.setString(2, mol.getFormula()); //IUPAC name replaceCompoundPS.setString(3, mol.getProperty(this.iupacNameKey)); //can smiles replaceCompoundPS.setString(4, mol.getProperty(this.pubchemCanSmilesKey)); //iso smiles replaceCompoundPS.setString(5, mol.getProperty(this.pubchemIsoSmilesKey)); //mw if(mol.getProperty(this.pubchemMwKey) != null) { replaceCompoundPS.setFloat(6, Float.parseFloat(mol.getProperty(this.pubchemMwKey))); } else { replaceCompoundPS.setNull(6, java.sql.Types.FLOAT); } //monoisotopic mw if(mol.getProperty(this.pubchemMonoisoMwKey) != null) { replaceCompoundPS.setFloat(7, Float.parseFloat(mol.getProperty(this.pubchemMonoisoMwKey))); } else { replaceCompoundPS.setNull(7, java.sql.Types.FLOAT); } //Insert the compoundFP fpArr = molFpFactory.generate(mol); fpIndex = 2; MolHandler mh = new MolHandler(mol); mh.aromatize(); //aromatize BEFORE fingerprinting fpArr = mh.generateFingerprintInInts(16,2,6); if(fpArr != null && insert) { if(fpArr.length == 16) insertFP = true; this.replaceCompoundFpPS.setLong(1, cid); for(int fp : fpArr) { this.replaceCompoundFpPS.setLong(fpIndex, convertToUnsignedInt(fp)); fpIndex++; } } if(insert) { //insert the compound replaceCompoundPS.execute(); //insert the compound properties replaceCompoundPropsPS.execute(); //insert the molfile insertOrUpdateMolFile(cid, mol); //insert fp if(insertFP) replaceCompoundFpPS.execute(); } else { logger.warning("Didn't Insert CID="+cid); } //check for commit if(molCount % 1000 == 0) { conn.commit(); logger.info("Mol Commit Total ="+molCount); } } is.close(); } catch (MolFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block logger.warning("Didn't insert in tables CID="+cid); e.printStackTrace(); } } public void copyAndUnzipFilesToTemp(Vector <String> fileNameV) { buffer = new byte[1024]; int len; for(String fileName: fileNameV) { System.out.println("file="+fileName); file = new File(path+"/"+fileName); if(!file.isDirectory()) { tempFileName = tempPath+fileName; tempFileName = tempFileName.substring(0, tempFileName.lastIndexOf(".gz")); tempFile = new File(tempFileName); try { zis = new GZIPInputStream(new FileInputStream(file)); fos = new FileOutputStream(tempFile); while( (len = zis.read(buffer)) > 0 ) { fos.write(buffer, 0, len); } fos.flush(); fos.close(); zis.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } public void deleteFilesInTemp() { // if(!tempPath.endsWith("temp/")) { // logger.info("Didn't delete files. Wrong DIR"); // } File dir = new File(tempPath); String [] fileNames = dir.list(); File file; for(String fileName : fileNames) { file = new File(tempPath+fileName); file.delete(); } logger.info("Deleted Temp Files"); } private void loadMolFile(long cid, Molecule mol) { try { insertMolfilePS.setLong(1, cid); insertMolfilePS.setString(2, mol.toFormat("mol")); insertMolfilePS.addBatch(); } catch (SQLException e) { e.printStackTrace(); } } private void insertOrUpdateMolFile(long cid, Molecule mol) { try { replaceMolfilePS.setLong(1, cid); replaceMolfilePS.setString(2, mol.toFormat("mol")); replaceMolfilePS.execute(); } catch (SQLException e) { e.printStackTrace(); } } public void closeConnections() { try { System.out.println("close conn and factory"); molFpFactory = null; if(conn != null && !conn.isClosed()) conn.close(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /* * Makes collections of file names to run as loading batches. */ private Vector <Vector<String>> partitionFiles(String path) throws FileNotFoundException { File dir = new File(path); String [] files = dir.list(); //sort the arrays Arrays.sort(files); Vector <Vector<String>> fileBatches = new Vector <Vector<String>> (); Vector <String> fileVector = new Vector <String>(); int fileCount = 0; boolean partialBatch = true; for(String fileName : files) { if(fileName.endsWith(".gz")) { fileVector.add(fileName); fileCount++; partialBatch = true; if(fileCount % fileBatchSize == 0) { fileBatches.add(fileVector); fileVector = new Vector <String>(); partialBatch = false; } } } if(partialBatch) { fileBatches.add(fileVector); } return fileBatches; } public void fp() { try { //360022 String filePath = "C:/Test_Data/compound/test/temp/ChEBI_50252.mol"; InputStream is = new FileInputStream(filePath); mi = new MolImporter (is); MolFpFactory molFpFactory = MolFpFactory.getInstance(16,2,5); int [] fpArr; int [] fpMolArr; int fpIndex; String cidStr; for (Molecule mol = new Molecule (); mi.read(mol); ) { MolHandler mh = new MolHandler(mol); mh.aromatize(); System.out.println("finished aromatize"); fpArr = mh.generateFingerprintInInts(16,2,6); for(int i = 0; i < fpArr.length; i++) { long longVal; longVal = (convertToUnsignedInt((fpArr[i]))); //System.out.print(longVal+"\t"); } } } catch (Exception e) { } } public static long convertToUnsignedInt(int input) { return input & 0xFFFFFFFFL; } public boolean updateCompoundCreateDate(Properties loaderProps) { try { String cidDateFilePath = loaderProps.getProperty("bard.loader.scratch.dir") + "/Compound-Extras/CID-Date.gz"; //if it's a gzip, gunzip if(cidDateFilePath.endsWith(".gz")) { GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(cidDateFilePath)); byte [] buffer = new byte[1024]; cidDateFilePath = cidDateFilePath.replace(".gz", ""); FileOutputStream fos = new FileOutputStream(cidDateFilePath); int len; while((len = gzis.read(buffer)) > 0) { fos.write(buffer, 0, len); } gzis.close(); fos.close(); } conn = BardDBUtil.connect(); conn.setAutoCommit(false); PreparedStatement ps = conn.prepareStatement(updateCompoundDate); BufferedReader bw = new BufferedReader(new FileReader(cidDateFilePath)); String line; String [] toks; long updateCnt = 0; while( (line = bw.readLine()) != null) { toks = line.split("\t"); if(toks.length < 2) continue; try { ps.setLong(2, Long.parseLong(toks[0])); ps.setDate(1 , Date.valueOf(toks[1])); ps.addBatch(); updateCnt++; if(updateCnt % 1000 == 0) { ps.executeBatch(); conn.commit(); } if(updateCnt % 100000 == 0) { logger.info("Compound Date update cnt="+updateCnt); } } catch (Exception e) { e.printStackTrace(); continue; } } ps.executeUpdate(); conn.commit(); conn.close(); logger.info("Compound Date update complete. Update Count="+updateCnt); return true; }catch (SQLException sqle) { sqle.printStackTrace(); return false; } catch (IOException e) { e.printStackTrace(); return false; } catch (ClassNotFoundException e) { e.printStackTrace(); return false; } } public boolean updateCompoundRank(Properties loaderProps) { try { Connection conn = BardDBUtil.connect(loaderProps.getProperty("bard.db.connection.url"), loaderProps.getProperty("bard.db.driver.name"), "bard_manager", "bard_manager"); Statement stmt = conn.createStatement(); logger.info("Start compound rank update."); stmt.execute("create table if not exists temp_compound_rank like compound_rank"); logger.info("Created temp_compound_rank."); stmt.execute("truncate table temp_compound_rank"); stmt.execute("insert into temp_compound_rank select * from compound_rank"); logger.info("Inserted compound_rank data into temp_compound_rank. Starting popularity update."); stmt.execute("insert into temp_compound_rank (cid, popularity) "+ "select id,@syn_count:=count(id) from synonyms where type = 1 "+ "group by id ON DUPLICATE KEY UPDATE popularity=@syn_count"); logger.info("Starting sid count rank update."); stmt.execute("insert into temp_compound_rank (cid, sid_count) "+ "select cid, @sid_c := count(sid) from cid_sid " + "group by cid ON DUPLICATE KEY UPDATE sid_count=@sid_c"); logger.info("Finished temp rank update."); //do a table swap if passes delta BardDBUtil.swapTempTableToProductionIfPassesSizeDelta("temp_compound_rank", "compound_rank", 0.98); conn.close(); } catch (SQLException e) { e.printStackTrace(); return false; } catch (ClassNotFoundException e) { e.printStackTrace(); return false; } return true; } public void dumpPubchemXLOGP3(String dataDir, String stageFilePath) { this.path = dataDir; this.tempPath = dataDir+"/temp/"; try { Vector <Vector <String>>batches = this.partitionFiles(dataDir); File tempDir = new File(this.tempPath); String xlogStr; String cid; PrintWriter pw = new PrintWriter(new FileWriter(stageFilePath)); long cidsProcessed = 0; long cidsWithXLogP = 0; for(Vector <String> files : batches) { this.copyAndUnzipFilesToTemp(files); File [] sdfFiles = tempDir.listFiles(); for(File file : sdfFiles) { if(!file.getName().endsWith(".sdf")) continue; InputStream is = new FileInputStream(file); MolImporter mi = new MolImporter (is); for (Molecule mol = new Molecule (); mi.read(mol); ) { cidsProcessed++; xlogStr = mol.getProperty("PUBCHEM_XLOGP3"); cid = mol.getProperty("PUBCHEM_COMPOUND_CID"); if(cid != null && xlogStr != null) { pw.println(cid.trim()+"\t"+xlogStr.trim()); cidsWithXLogP++; } if(cidsProcessed % 100000 == 0) { logger.info("Processed lines = "+cidsProcessed+" cids with xlogP "+cidsWithXLogP); } } mi.close(); is.close(); } this.deleteFilesInTemp(); } pw.flush(); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MolFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void updateFPValuesForLargeCompoundsPassingRingCount(String dbURL, boolean loadIntoTemp, int maxRings) { try { conn = BardDBUtil.connect(dbURL); conn.setAutoCommit(false); Statement stmt = conn.createStatement(); stmt.setFetchSize(Integer.MIN_VALUE); ArrayList <Long> cidList = new ArrayList<Long>(); String updateMsg = "Collecting compounds without fp costructed due to molecule size. Max Rings: "; if(maxRings > 0) { updateMsg += maxRings+" (based on INPUT RING LIMIT)"; } else { updateMsg += this.MAX_RINGs_FOR_FP_COMP+" (based on DEFAULT RING LIMIT)"; } log.info(updateMsg); //set table names based on whether it's a temp load or not. String compoundMolfileTableName = "compound_molfile"; String compoundFpTableName = "compound_fp"; if(loadIntoTemp) { compoundMolfileTableName = "temp_compound_molfile"; compoundFpTableName = "temp_compound_fp"; } //just grab the compounds with zero for fp, just check some entries, not all. ResultSet rs = stmt.executeQuery("select cid from "+compoundFpTableName+" where " + "fp1=0 and fp2=0 and fp4=0 and fp8=0 and fp12=0 and fp15=0"); while(rs.next()) { cidList.add(rs.getLong(1)); } rs.close(); log.info("Total CID count without Fp = "+cidList.size()); log.info("Starting SSSR evaluation to build list of fp candidates"); String molFile; Molecule mol; MolHandler mh = new MolHandler(); MolImporter mi = new MolImporter(); molFpFactory = MolFpFactory.getInstance(16, 2, 6); int rings; int [] bins = new int[6]; for(int i = 0; i < 6; i++) { bins[i]=0; } Integer cnt; ArrayList <Long> cidsToProcess = new ArrayList<Long>(); for(long cid : cidList) { rs = stmt.executeQuery("select molfile_mol from "+compoundMolfileTableName+" where cid = "+cid); if(rs.next()) { mol = MolImporter.importMol(rs.getString(1)); rings = mol.getEdgeCount() - mol.getAtomCount() + 1; //if max rings is set to a resonable value use it, if(maxRings > 0) { if(rings <= maxRings) { cidsToProcess.add(cid); } } else { // else update the rings based on default value if(rings <= MAX_RINGs_FOR_FP_COMP) { cidsToProcess.add(cid); } } } rs.close(); } //release the cid list cidList = null; log.info("Finished SSSR evaluation to build list of fp candidates. CID for Fp count="+cidsToProcess.size()); log.info("Start FP Generation"); int [] fpArr; long start, finished; long molCnt = 0; //prepare fp insert replaceTempCompoundFpPS = conn.prepareStatement(sqlTempCompoundFpReplace); //make a new non-streaming stmt. stmt = conn.createStatement(); for(long cid : cidsToProcess) { rs = stmt.executeQuery("select molfile_mol from "+compoundMolfileTableName+" where cid = "+cid); if(rs.next()) { start = System.currentTimeMillis(); mol = MolImporter.importMol(rs.getString(1)); fpArr = molFpFactory.generate(mol); finished = System.currentTimeMillis(); molCnt++; if( (finished - start) > 900000) { log.info("Fingerprint required > 15min. to complete, cid="+cid+" time="+((finished-start)/1000f/60f)+" min."); } replaceTempCompoundFpPS.setLong(1, cid); fpIndex = 2; for(int fp : fpArr) { replaceTempCompoundFpPS.setLong(fpIndex, convertToUnsignedInt(fp)); fpIndex++; } //execute the update, most time spent generating fp replaceTempCompoundFpPS.executeUpdate(); conn.commit(); if(molCnt % 1000 == 0) { log.info("Processing large compound fp, processed count: "+molCnt+" of "+cidsToProcess.size()+" compounds"); } } } conn.commit(); conn.close(); log.info("Finished fp large molecule cleanup"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (MolFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * A utility method to add cids from source db to dest db. * @param sourceDBURL * @param destDBURL * @return */ public long synchronizeCompoundBetweenServers(String sourceDBURL, String destDBURL) { long updateCnt = 0; log.info("start bohr to maxwell sync"); try { Connection sourceConn = BardDBUtil.connect(sourceDBURL); Connection destConn = BardDBUtil.connect(destDBURL); destConn.setAutoCommit(true); Statement destStmt = destConn.createStatement(); destStmt.setFetchSize(Integer.MIN_VALUE); ResultSet rs = destStmt.executeQuery("select cid from compound"); long cid; ResultSet rsSource; HashSet <Long> destCidList = new HashSet <Long>(); // HashSet <Long> sourceCidList2 = new HashSet <Long>(); // HashSet <Long> sourceCidList3 = new HashSet <Long>(); ArrayList <Long> sourceCidList = new ArrayList <Long>(); long cmpcnt = 0; while (rs.next()) { cmpcnt++; cid = rs.getLong(1); if(cid != 0) { destCidList.add(cid); } if(cmpcnt % 1000000 == 0) { log.info("Compound interogation count="+cmpcnt); } } rs.close(); log.info("Dest cid list size ="+destCidList.size()); Statement stmt = sourceConn.createStatement(); stmt.setFetchSize(Integer.MIN_VALUE); rsSource = stmt.executeQuery("select cid from compound"); cmpcnt = 0; while (rsSource.next()) { cmpcnt++; cid = rsSource.getLong(1); if(!destCidList.contains(cid)) sourceCidList.add(cid); if(cmpcnt % 1000000 == 0) { log.info("Compound dest interogation count="+cmpcnt); } } log.info("new cid count = "+sourceCidList.size()); destCidList = null; rsSource.close(); //We have the list of cids to copy over to maxwell String sqlCompoundInsert = "replace into compound (cid, formula, iupac_name, can_smiles, iso_smiles, mw, mw_monoiso)" + "values (?,?,?,?,?,?,?)"; String sqlMolfileInsert = "replace into compound_molfile values (?,?)"; String sqlCompoundPropInsert = "replace into compound_props values ("; String sqlCompoundFpInsert = "replace into compound_fp values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; for(int i = 0; i < 34 ; i++) { sqlCompoundPropInsert += "?,"; } sqlCompoundPropInsert = sqlCompoundPropInsert.substring(0, sqlCompoundPropInsert.length()-1); sqlCompoundPropInsert += ")"; PreparedStatement compoundPS = destConn.prepareStatement(sqlCompoundInsert); PreparedStatement molfilePS = destConn.prepareStatement(sqlMolfileInsert); PreparedStatement propsPS = destConn.prepareStatement(sqlCompoundPropInsert); PreparedStatement fpPS = destConn.prepareStatement(sqlCompoundFpInsert); PreparedStatement compoundQuery = sourceConn.prepareStatement("select * from compound where cid = ?"); PreparedStatement molfileQuery = sourceConn.prepareStatement("select * from compound_molfile where cid = ?"); PreparedStatement propsQuery = sourceConn.prepareStatement("select * from compound_props where pubchem_compound_cid = ?"); PreparedStatement fpQuery = sourceConn.prepareStatement("select * from compound_fp where cid = ?"); ResultSet queryRS; cmpcnt = 0; for(long missingCid : sourceCidList) { cmpcnt++; //compound compoundQuery.setLong(1, missingCid); queryRS = compoundQuery.executeQuery(); if(queryRS.next()) { //(cid, formula, iupac_name, can_smiles, iso_smiles, mw, mw_monoiso) compoundPS.setLong(1, queryRS.getLong("cid")); compoundPS.setString(2, queryRS.getString("formula")); compoundPS.setString(3, queryRS.getString("iupac_name")); compoundPS.setString(4, queryRS.getString("can_smiles")); compoundPS.setString(5, queryRS.getString("iso_smiles")); compoundPS.setDouble(6, queryRS.getDouble("mw")); compoundPS.setDouble(7, queryRS.getDouble("mw_monoiso")); compoundPS.execute(); } //molfile molfileQuery.setLong(1, missingCid); queryRS = molfileQuery.executeQuery(); if(queryRS.next()) { molfilePS.setLong(1, missingCid); molfilePS.setString(2, queryRS.getString(2)); molfilePS.execute(); } //props propsQuery.setLong(1, missingCid); queryRS = propsQuery.executeQuery(); if(queryRS.next()) { propsPS.setLong(1, missingCid); for(int i = 2 ; i < 35 ; i++) { propsPS.setString(i, queryRS.getString(i)); } propsPS.execute(); } //fp fpQuery.setLong(1, missingCid); queryRS = fpQuery.executeQuery(); if(queryRS.next()) { fpPS.setLong(1, missingCid); for(int i = 2; i < 18; i++) { fpPS.setLong(i, queryRS.getLong(i)); } fpPS.execute(); } if(cmpcnt % 10000 == 0) { log.info("Insert/Sync compound count ="+cmpcnt); } } sourceConn.close(); destConn.close(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return updateCnt; } public static void main(String [] args) { PubchemCompoundLoader loader = new PubchemCompoundLoader(); //loader.dumpPubchemXLOGP3("/ifs/prod/braistedjc/db_scripts/pubchem_compound", "/ifs/prod/braistedjc/db_scripts/conf/cid_xlogp3_tmp.txt"); //loader.synchronizeCompoundBetweenServers("jdbc:mysql://bohr.ncats.nih.gov:3306/bard3?zeroDateTimeBehavior=convertToNull", "jdbc:mysql://maxwell.ncats.nih.gov:3306/bard3?zeroDateTimeBehavior=convertToNull"); // loader.updateFPValuesForLargeCompoundsPassingRingCount("jdbc:mysql://bohr.ncats.nih.gov:3306/bard3?zeroDateTimeBehavior=convertToNull", true, 75); // loader.batchLoadCompounds("/ifs/prod/bard/resource_mgr/bard-scratch/Compound/", "jdbc:mysql://bohr.ncats.nih.gov:3306/bard3?zeroDateTimeBehavior=convertToNull"); //// if(args.length != 3) { //// logger.warning("Need paramters: path=<path_to_sdf_files> url=<database_url> driver=<driver_name>"); //// System.exit(1); //// } // // String arg, path, dbURL, driver, user, pw; // path = dbURL = driver = user = pw = null; // pw = "bard_manager"; // user = "bard_manager"; // // driver = "com.mysql.jdbc.Driver"; // dbURL = "jdbc:mysql://protein.nhgri.nih.gov:3306/bard2?zeroDateTimeBehavior=convertToNull"; // path = "/ifs/prod/braistedjc/db_scripts/pubchem_compound/Date/CID-Date"; // // String [] toks; // //// for(int i = 0; i < args.length; i++) { //// arg = args[i]; //// toks = arg.split("="); //// if(toks.length != 2) { //// logger.warning("Need paramters: path=<path_to_sdf_files> url=<database_url> driver=<driver_name>"); //// logger.warning("Make sure \'=\' is used for each parameter"); //// System.exit(1); //// } //// if(toks[0].equals("path")) { //// path = toks[1].trim(); //// } else if(toks[0].equals("url")) { //// dbURL = toks[1].trim(); //// } else if(toks[0].equals("driver")) { //// driver = toks[1].trim(); //// } //// } // //// if(path == null || dbURL == null || driver == null ||user == null || pw == null) { //// logger.warning("Have a null param. Need paramters: path=<path_to_sdf_files> url=<database_url> driver=<driver_name>"); //// System.exit(1); //// } // // //dbURL = "jdbc:mysql://protein.nhgri.nih.gov:3306/bard2?zeroDateTimeBehavior=convertToNull" // //driver = "com.mysql.jdbc.Driver" // MLBDCompoundLoader loader = new MLBDCompoundLoader(dbURL, driver, user, pw, path); // //loader.batchReplaceCompounds(); //loader.updateCompoundCreateDate(path); //MLBDCompoundLoader loader = new MLBDCompoundLoader(); //loader.fp(); //MLBDCompoundLoader loader = new MLBDCompoundLoader(); // loader.batchLoadCompoundFp(); System.exit(0); } }