package gov.nih.ncgc.bard.resourcemgr.util;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Properties;
import java.util.Vector;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import ftp.FtpBean;
import ftp.FtpException;
import ftp.FtpListResult;
import gov.nih.ncgc.bard.resourcemgr.BardDBManager;
public class BardResourceFetch {
private static Logger logger = Logger.getLogger(BardResourceFetch.class.getName());
public boolean fetchFTPDirectoryResource(String server, String user, String pw, String pathToSourceDir, String dest) throws IOException, FtpException {
boolean transfer = true;
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(server, user, pw);
ftpbean.setDirectory(pathToSourceDir);
FtpListResult list = ftpbean.getDirectoryContent();
while (list.next() ) {
ftpbean.getBinaryFile(list.getName(), dest+"/"+list.getName());
if(!ftpbean.getReplyMessage().startsWith("2"))
transfer = false;
}
return transfer;
}
public boolean fetchFTPFileResource(String server, String user, String pw, String pathToFile, String dest) throws IOException, FtpException {
boolean transfer = false;
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(server, user, pw);
ftpbean.getBinaryFile(pathToFile, dest);
return (ftpbean.getReplyMessage().startsWith("2"));
}
public boolean fetchFTPCurrentDirectoryResource(String server, String user, String pw, String pathToSourceDir,String dest) throws IOException, FtpException {
boolean transfer = true;
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(server, user, pw);
ftpbean.setDirectory(pathToSourceDir);
FtpListResult list = ftpbean.getDirectoryContent();
String fileName = "";
while (list.next() ) {
fileName = list.getName();
ftpbean.getBinaryFile(fileName, dest);
}
ftpbean.close();
return transfer;
}
public String fetchLatestUpdateResources(String server, String user, String pw, String pathToSourceDir, String dest) throws IOException, FtpException {
String resource = null;
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(server, user, pw);
ftpbean.setDirectory(pathToSourceDir);
FtpListResult list = ftpbean.getDirectoryContent();
while (list.next() ) {
System.out.println(list.getDate());
System.out.println(list.getName());
System.out.println(list.getType());
}
ftpbean.close();
return resource;
}
public void clearBardScratch(String dirPath) {
//quit if not a compound dir or a substance dir -- safety check before delete
//kind of a hack but good to be careful about deleting directories on a server.
if(!dirPath.endsWith("Compound/")
&& !dirPath.endsWith("Compound-Extras/")
&& !dirPath.endsWith("Substance/")
&& !dirPath.endsWith("Substance-Extras/")
&& !dirPath.endsWith("KEGG-Disease")
&& !dirPath.endsWith("GO-Associations")
&& !dirPath.endsWith("GO-Term-Database")
&& !dirPath.endsWith("assay_desc_zip")
&& !dirPath.endsWith("Compound-Killed-CIDs/")) {
logger.warning("Didn't clear scratch, incorrect dir:" + dirPath);
return;
}
File dir = new File(dirPath);
File file;
logger.info("Deleting files in:"+dirPath);
if(dir.isDirectory()) {
String [] fileNames = dir.list();
for(String fileName: fileNames) {
file = new File(dirPath+"/"+fileName);
if(file.isFile()) {
file.delete();
logger.info("Delete previous file = "+file.getName());
}
}
}
}
public boolean fetchLatestCompoundResources(Properties props, int resourceUpdatePeriodKey){
boolean loaded = true;
//clear bard
try {
String compoundBaseDir = props.getProperty("pubchem.compound.daily.dir");
if(resourceUpdatePeriodKey == BardDBManager.COMPOUND_DAILY)
compoundBaseDir = props.getProperty("pubchem.compound.daily.dir");
else if(resourceUpdatePeriodKey == BardDBManager.COMPOUND_WEEKLY)
compoundBaseDir = props.getProperty("pubchem.compound.weekly.dir");
else if(resourceUpdatePeriodKey == BardDBManager.COMPOUND_MONTHLY)
compoundBaseDir = props.getProperty("pubchem.compound.monthly.dir");
String destDir = props.getProperty("bard.loader.scratch.dir");
if(destDir == null || destDir == "")
return false;
destDir +="/Compound/";
//clear the destination
clearBardScratch(destDir);
//make the compound dir
File dest = new File(destDir);
dest.mkdir();
logger.info("Made the temp Compound Dir:"+destDir);
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(props.getProperty("ncbi.ftp.root"), props.getProperty("ncbi.ftp.user"), props.getProperty("ncbi.ftp.password"));
logger.info("Established NCBI FTP Connection");
ftpbean.setDirectory(compoundBaseDir);
logger.info("Set FTP source directory. RESP:"+ftpbean.getReply());
FtpListResult list = ftpbean.getDirectoryContent();
Vector <String> dateDirNames = new Vector <String> ();
// the folder names will reveal the latest file.
while (list.next() ) {
if(list.getType() == 1) {
dateDirNames.add(list.getName());
}
}
String [] dates = new String[dateDirNames.size()];
for(int i = 0; i < dates.length; i++) {
dates[i] = dateDirNames.get(i);
}
Arrays.sort(dates);
ftpbean.setDirectory(compoundBaseDir+"/"+dates[dates.length-1]+"/SDF");
logger.info("Set FTP LATEST source directory. RESP:"+ftpbean.getReply()+ "Dir = "+ftpbean.getDirectory());
int cnt = 0;
list = ftpbean.getDirectoryContent();
//logger.info("File list size ="+list.getSize());
while(list.next()) {
if(list.getName().endsWith(".gz")) {
ftpbean.getBinaryFile(list.getName(), destDir+"/"+list.getName());
cnt++;
logger.info("File #="+cnt+" source="+list.getName()+" dest="+destDir+"/"+list.getName());
}
}
//get killed cids list
logger.info("Retrieving Killed CID list.");
String baseResourceDir = compoundBaseDir+"/"+dates[dates.length-1];
ftpbean.setDirectory(baseResourceDir);
logger.info("Current Remote Dir Changed. Reply = "+ftpbean.getReply());
list = ftpbean.getDirectoryContent();
destDir = props.getProperty("bard.filepath.pubhchem.compound.killedcids");
//clear destination - Compound-Extras
this.clearBardScratch(destDir);
while(list.next()) {
if(list.getName().equalsIgnoreCase(props.getProperty("bard.filename.pubchem.compound.killedcids"))) {
//have the file, send it to the killed cid directory
ftpbean.getBinaryFile(list.getName(), destDir+list.getName());
}
}
//now get the compound extras
destDir = props.getProperty("bard.loader.scratch.dir")+"/Compound-Extras/";
//make the compound extras dir
dest = new File(destDir);
dest.mkdir();
//clear destination - Compound-Extras
this.clearBardScratch(destDir);
ftpbean.setDirectory(props.getProperty("pubchem.compound.extras.dir"));
list = ftpbean.getDirectoryContent();
Vector <String> extras = new Vector <String> ();
while(list.next()) {
extras.add(list.getName());
}
int [] extraStatus = new int[3];
//cid-date
if(extras.contains("CID-Date.gz")) {
ftpbean.getBinaryFile("CID-Date.gz", destDir+"/CID-Date.gz");
} else {
//continue but set status bit
extraStatus[0]=1;
}
//cid-sid
if(extras.contains("CID-SID.gz")) {
ftpbean.getBinaryFile("CID-SID.gz", destDir+"/CID-SID.gz");
} else {
//continue but set status bit
extraStatus[1]=1;
}
//cid-synonyms
if(extras.contains("CID-Synonym-filtered.gz")) {
ftpbean.getBinaryFile("CID-Synonym-filtered.gz", destDir+"/CID-Synonym-filtered.gz");
} else {
//continue but set status bit
extraStatus[2]=1;
}
if(extraStatus[0] == 1 || extraStatus[1] == 1 || extraStatus[2] == 1) {
String msg = "ERROR Couldn't Fetch Compound Extra Resources";
if(extraStatus[0] == 1) {
msg += " CID-Date.gz";
}
if(extraStatus[1] == 1) {
msg += " CID-SID.gz";
}
if(extraStatus[2] == 1) {
msg += " CID-Synonym-filtered.gz";
}
logger.warning(msg);
logger.info(msg);
//return false if fetch fails even for extras
return false;
}
ftpbean.close();
} catch (FtpException e) {
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
public boolean fetchGOHTTPAssociationResources(Properties dbManagerProps) {
boolean haveFiles = true;
String humanURL = dbManagerProps.getProperty("go.http.associations.human.file");
String mouseURL = dbManagerProps.getProperty("go.http.associations.mouse.file");
String ratURL = dbManagerProps.getProperty("go.http.associations.rat.file");
String destPath = dbManagerProps.getProperty("bard.filepath.go.association.dir");
//clear this scratch area
clearBardScratch(destPath);
try {
getHttpFile(humanURL, destPath+"/human_go.gz");
logger.info("Have human go gzip");
getHttpFile(mouseURL, destPath+"/mouse_go.gz");
logger.info("Have mosue go gzip");
getHttpFile(ratURL, destPath+"/rat_go.gz");
logger.info("Have rat go gzip");
} catch (MalformedURLException e) {
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return haveFiles;
}
public static void getHttpFile(String urlStr, String outputFilePath) throws IOException {
boolean haveFile = true;
URL url = new URL(urlStr);
InputStream is = url.openStream();
byte [] buff = new byte[2048];
BufferedInputStream bis = new BufferedInputStream(is);
FileOutputStream fos = new FileOutputStream(outputFilePath);
int len;
while((len = bis.read(buff)) > 0) {
fos.write(buff, 0, len);
}
fos.close();
bis.close();
}
public boolean fetchGOAssociationResources(Properties dbManagerProps) {
boolean haveFiles = true;;
try {
String goRemoteFtpAssocDir = dbManagerProps.getProperty("go.ftp.associations.dir");
String goLocalScratchDir = dbManagerProps.getProperty("bard.filepath.go.association.dir");
String humanFile = dbManagerProps.getProperty("go.ftp.associations.human.file");
String mouseFile = dbManagerProps.getProperty("go.ftp.associations.mouse.file");
String ratFile = dbManagerProps.getProperty("go.ftp.associations.rat.file");
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(dbManagerProps.getProperty("go.ftp.server"), dbManagerProps.getProperty("go.ftp.user"));
logger.info("Established go ftp connection with:"+dbManagerProps.getProperty("go.ftp.server"));
ftpbean.setDirectory(goRemoteFtpAssocDir);
logger.info("Set Remote FTP Directory:" + goRemoteFtpAssocDir);
ftpbean.getBinaryFile(humanFile, goLocalScratchDir+"/"+humanFile);
logger.info("Have file:"+humanFile);
ftpbean.getBinaryFile(mouseFile, goLocalScratchDir+"/"+mouseFile);
logger.info("Have file:"+mouseFile);
ftpbean.getBinaryFile(ratFile, goLocalScratchDir+"/"+ratFile);
logger.info("Have file:"+ratFile);
ftpbean.close();
} catch (IOException e) {
e.printStackTrace();
return false;
} catch (FtpException e) {
e.printStackTrace();
return false;
}
return haveFiles;
}
public boolean fetchGOTermDBData(Properties props) {
boolean fetched = true;
String goTermDBDir = props.getProperty("bard.filepath.go.termdb.dir");
String goTermTarZip = props.getProperty("bard.filename.go.termdb.targzip");
//clear bard scratch GO Term DB Dir
clearBardScratch(goTermDBDir);
//get file
String goDBURL = props.getProperty("go.http.termdb.file");
try {
getHttpFile(goDBURL, goTermDBDir+"/"+goTermTarZip);
} catch (IOException e) {
e.printStackTrace();
return false;
}
return fetched;
}
public boolean fetchSpecificCompoundResources(Properties props, String ftpResourceDir){
boolean loaded = true;
//clear bard
try {
String compoundBaseDir = ftpResourceDir;
String destDir = props.getProperty("bard.loader.scratch.dir");
if(destDir == null || destDir == "")
return false;
destDir +="/Compound";
//clear the destination
clearBardScratch(destDir);
//make the compound dir
File dest = new File(destDir);
dest.mkdir();
logger.info("Made the temp Compound Dir:"+destDir);
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(props.getProperty("ncbi.ftp.root"), props.getProperty("ncbi.ftp.user"), props.getProperty("ncbi.ftp.password"));
logger.info("Established NCBI FTP Connection");
ftpbean.setDirectory(compoundBaseDir);
logger.info("Set FTP source directory. RESP:"+ftpbean.getReply());
int cnt = 0;
FtpListResult list = ftpbean.getDirectoryContent();
while(list.next()) {
if(list.getName().endsWith(".gz")) {
ftpbean.getBinaryFile(list.getName(), destDir+"/"+list.getName());
cnt++;
logger.info("File #="+cnt+" source="+list.getName()+" dest="+destDir+"/"+list.getName());
}
}
//now get the compound extras
destDir = props.getProperty("bard.loader.scratch.dir")+"/Compound-Extras/";
//make the compound extras dir
dest = new File(destDir);
dest.mkdir();
//clear destination - Compound-Extras
this.clearBardScratch(destDir);
ftpbean.setDirectory(props.getProperty("pubchem.compound.extras.dir"));
list = ftpbean.getDirectoryContent();
Vector <String> extras = new Vector <String> ();
while(list.next()) {
extras.add(list.getName());
}
int [] extraStatus = new int[3];
//cid-date
if(extras.contains("CID-Date.gz")) {
ftpbean.getBinaryFile("CID-Date.gz", destDir+"/CID-Date.gz");
} else {
//continue but set status bit
extraStatus[0]=1;
}
//cid-sid
if(extras.contains("CID-SID.gz")) {
ftpbean.getBinaryFile("CID-SID.gz", destDir+"/CID-SID.gz");
} else {
//continue but set status bit
extraStatus[1]=1;
}
//cid-synonyms
if(extras.contains("CID-Synonym-filtered.gz")) {
ftpbean.getBinaryFile("CID-Synonym-filtered.gz", destDir+"/CID-Synonym-filtered.gz");
} else {
//continue but set status bit
extraStatus[2]=1;
}
if(extraStatus[0] == 1 || extraStatus[1] == 1 || extraStatus[2] == 1) {
String msg = "ERROR Couldn't Fetch Compound Extra Resources";
if(extraStatus[0] == 1) {
msg += " CID-Date.gz";
}
if(extraStatus[1] == 1) {
msg += " CID-SID.gz";
}
if(extraStatus[2] == 1) {
msg += " CID-Synonym-filtered.gz";
}
logger.warning(msg);
logger.info(msg);
//return false if fetch fails even for extras
return false;
}
ftpbean.close();
} catch (FtpException e) {
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
public boolean fetchCompoundWeeklyExtraRsources(Properties props) {
boolean loaded = true;
//clear bard
try {
String compoundWeeklyBaseDir = props.getProperty("pubchem.compound.weekly.dir");
String destDir = props.getProperty("bard.loader.scratch.dir");
if(destDir == null || destDir == "")
return false;
//the compound extras dir
destDir = props.getProperty("bard.loader.scratch.dir")+"/Compound-Extras/";
//clear the destination
clearBardScratch(destDir);
//make the compound dir
File dest = new File(destDir);
dest.mkdir();
logger.info("Made the temp Compound Dir:"+destDir);
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(props.getProperty("ncbi.ftp.root"), props.getProperty("ncbi.ftp.user"), props.getProperty("ncbi.ftp.password"));
logger.info("Established NCBI FTP Connection");
ftpbean.setDirectory(compoundWeeklyBaseDir);
logger.info("Set FTP to Weekly base directory. RESP:"+ftpbean.getReply());
FtpListResult list = ftpbean.getDirectoryContent();
Vector <String> dateDirNames = new Vector <String> ();
// the folder names will reveal the latest file.
while (list.next() ) {
if(list.getType() == 1) {
dateDirNames.add(list.getName());
}
}
String [] dates = new String[dateDirNames.size()];
for(int i = 0; i < dates.length; i++) {
dates[i] = dateDirNames.get(i);
}
Arrays.sort(dates);
//set latest weekly
ftpbean.setDirectory(compoundWeeklyBaseDir+"/"+dates[dates.length-1]+"/Extras/");
logger.info("Remote directory found: "+compoundWeeklyBaseDir+"/"+dates[dates.length-1]+"/Extras/");
logger.info("Set FTP source directory. RESP:"+ftpbean.getReply());
list = ftpbean.getDirectoryContent();
Vector <String> extras = new Vector <String> ();
while(list.next()) {
extras.add(list.getName());
}
int [] extraStatus = new int[3];
//cid-date
if(extras.contains("CID-Date.gz")) {
ftpbean.getBinaryFile("CID-Date.gz", destDir+"/CID-Date.gz");
} else {
//continue but set status bit
extraStatus[0]=1;
}
//cid-sid
if(extras.contains("CID-SID.gz")) {
ftpbean.getBinaryFile("CID-SID.gz", destDir+"/CID-SID.gz");
} else {
//continue but set status bit
extraStatus[1]=1;
}
//cid-synonyms
if(extras.contains("CID-Synonym-filtered.gz")) {
ftpbean.getBinaryFile("CID-Synonym-filtered.gz", destDir+"/CID-Synonym-filtered.gz");
} else {
//continue but set status bit
extraStatus[2]=1;
}
if(extraStatus[0] == 1 || extraStatus[1] == 1 || extraStatus[2] == 1) {
String msg = "ERROR Couldn't Fetch Compound Extra Resources";
if(extraStatus[0] == 1) {
msg += " CID-Date.gz";
}
if(extraStatus[1] == 1) {
msg += " CID-SID.gz";
}
if(extraStatus[2] == 1) {
msg += " CID-Synonym-filtered.gz";
}
logger.warning(msg);
logger.info(msg);
//return false if fetch fails even for extras
return false;
}
ftpbean.close();
} catch (FtpException e) {
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
public boolean fetchLatestSubstanceResources(Properties props, int resourceUpdatePeriodKey){
boolean loaded = true;
//clear bard
try {
String substanceBaseDir = props.getProperty("pubchem.substance.daily.dir");
if(resourceUpdatePeriodKey == BardDBManager.SUBSTANCE_DAILY)
substanceBaseDir = props.getProperty("pubchem.substance.daily.dir");
else if(resourceUpdatePeriodKey == BardDBManager.SUBSTANCE_WEEKLY)
substanceBaseDir = props.getProperty("pubchem.substance.weekly.dir");
else if(resourceUpdatePeriodKey == BardDBManager.SUBSTANCE_MONTHLY)
substanceBaseDir = props.getProperty("pubchem.substance.monthly.dir");
String destDir = props.getProperty("bard.loader.scratch.dir");
if(destDir == null || destDir == "")
return false;
destDir +="/Substance/";
//clear the destination
clearBardScratch(destDir);
//make the substance dir
File dest = new File(destDir);
dest.mkdir();
logger.info("Made the temp Substance Dir:"+destDir);
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(props.getProperty("ncbi.ftp.root"), props.getProperty("ncbi.ftp.user"), props.getProperty("ncbi.ftp.password"));
logger.info("Established NCBI FTP Connection");
ftpbean.setDirectory(substanceBaseDir);
logger.info("Set FTP source directory. RESP:"+ftpbean.getReply());
FtpListResult list = ftpbean.getDirectoryContent();
Vector <String> dateDirNames = new Vector <String> ();
// the folder names will reveal the latest file.
while (list.next() ) {
if(list.getType() == 1) {
dateDirNames.add(list.getName());
}
}
String [] dates = new String[dateDirNames.size()];
for(int i = 0; i < dates.length; i++) {
dates[i] = dateDirNames.get(i);
}
Arrays.sort(dates);
ftpbean.setDirectory(substanceBaseDir+"/"+dates[dates.length-1]+"/SDF");
logger.info("Set FTP LATEST source directory. RESP:"+ftpbean.getReply()+ "Dir = "+ftpbean.getDirectory());
int cnt = 0;
list = ftpbean.getDirectoryContent();
//logger.info("File list size ="+list.getSize());
while(list.next()) {
if(list.getName().endsWith(".gz")) {
ftpbean.getBinaryFile(list.getName(), destDir+"/"+list.getName());
cnt++;
logger.info("File #="+cnt+" source="+list.getName()+" dest="+destDir+"/"+list.getName());
}
}
//now get the compound extras
destDir = props.getProperty("bard.loader.scratch.dir")+"/Substance-Extras/";
//make the compound extras dir
dest = new File(destDir);
dest.mkdir();
//clear destination - Compound-Extras
this.clearBardScratch(destDir);
ftpbean.setDirectory(props.getProperty("pubchem.substance.extras.dir"));
list = ftpbean.getDirectoryContent();
Vector <String> extras = new Vector <String> ();
while(list.next()) {
extras.add(list.getName());
}
int [] extraStatus = new int[2];
//cid-date
if(extras.contains("SID-Date.gz")) {
ftpbean.getBinaryFile("SID-Date.gz", destDir+"/SID-Date.gz");
} else {
//continue but set status bit
extraStatus[0]=1;
}
//cid-synonyms
if(extras.contains("Source-Names")) {
ftpbean.getBinaryFile("Source-Names", destDir+"/Source-Names");
} else {
//continue but set status bit
extraStatus[2]=1;
}
if(extraStatus[0] == 1 || extraStatus[1] == 1) {
String msg = "ERROR Couldn't Fetch Compound Extra Resources";
if(extraStatus[0] == 1) {
msg += " SID-Date.gz";
}
if(extraStatus[1] == 1) {
msg += " Source-Names";
}
logger.warning(msg);
logger.info(msg);
//return false if fetch fails even for extras
return false;
}
ftpbean.close();
} catch (FtpException e) {
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
public boolean fetchSpecificSubstanceResources(Properties props, String ftpResourcePath){
boolean loaded = true;
//clear bard
try {
String substanceBaseDir = ftpResourcePath;
String destDir = props.getProperty("bard.loader.scratch.dir");
if(destDir == null || destDir == "")
return false;
destDir +="/Substance/";
//clear the destination
clearBardScratch(destDir);
//make the substance dir
File dest = new File(destDir);
dest.mkdir();
logger.info("Made the temp Substance Dir:"+destDir);
FtpBean ftpbean = new FtpBean();
ftpbean.ftpConnect(props.getProperty("ncbi.ftp.root"), props.getProperty("ncbi.ftp.user"), props.getProperty("ncbi.ftp.password"));
logger.info("Established NCBI FTP Connection");
ftpbean.setDirectory(substanceBaseDir);
logger.info("Set FTP source directory. RESP:"+ftpbean.getReply());
FtpListResult list = ftpbean.getDirectoryContent();
//Vector <String> dateDirNames = new Vector <String> ();
// the folder names will reveal the latest file.
logger.info("Set FTP LATEST source directory. RESP:"+ftpbean.getReply()+ "Dir = "+ftpbean.getDirectory());
int cnt = 0;
list = ftpbean.getDirectoryContent();
//logger.info("File list size ="+list.getSize());
while(list.next()) {
if(list.getName().endsWith(".gz")) {
ftpbean.getBinaryFile(list.getName(), destDir+"/"+list.getName());
cnt++;
logger.info("File #="+cnt+" source="+list.getName()+" dest="+destDir+"/"+list.getName());
}
}
//now get the compound extras
destDir = props.getProperty("bard.loader.scratch.dir")+"/Substance-Extras/";
//make the compound extras dir
dest = new File(destDir);
dest.mkdir();
//clear destination - Compound-Extras
this.clearBardScratch(destDir);
ftpbean.setDirectory(props.getProperty("pubchem.substance.extras.dir"));
list = ftpbean.getDirectoryContent();
Vector <String> extras = new Vector <String> ();
while(list.next()) {
extras.add(list.getName());
}
int [] extraStatus = new int[2];
//cid-date
if(extras.contains("SID-Date.gz")) {
ftpbean.getBinaryFile("SID-Date.gz", destDir+"/SID-Date.gz");
} else {
//continue but set status bit
extraStatus[0]=1;
}
//cid-synonyms
if(extras.contains("Source-Names")) {
ftpbean.getBinaryFile("Source-Names", destDir+"/Source-Names");
} else {
//continue but set status bit
extraStatus[2]=1;
}
if(extraStatus[0] == 1 || extraStatus[1] == 1) {
String msg = "ERROR Couldn't Fetch Compound Extra Resources";
if(extraStatus[0] == 1) {
msg += " SID-Date.gz";
}
if(extraStatus[1] == 1) {
msg += " Source-Names";
}
logger.warning(msg);
logger.info(msg);
//return false if fetch fails even for extras
return false;
}
ftpbean.close();
} catch (FtpException e) {
e.printStackTrace();
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
public boolean fetchKEGGDiseaseFile(Properties props) {
boolean haveFiles = false;
FtpBean ftpbean = new FtpBean();
try {
ftpbean.ftpConnect(props.getProperty("kegg.ftp.server"), props.getProperty("kegg.ftp.user"));
ftpbean.setDirectory(props.getProperty("kegg.disease.dir"));
String destDir = props.getProperty("bard.loader.scratch.dir") + "/KEGG-Disease";
File keggDir = new File(destDir);
keggDir.mkdir();
this.clearBardScratch(destDir);
ftpbean.getBinaryFile(props.getProperty("kegg.disease.filename"), destDir + "/" +props.getProperty("kegg.disease.filename"));
if(ftpbean.getReply().trim().startsWith("2"))
haveFiles = true;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return false;
} catch (FtpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return false;
}
return haveFiles;
}
// public boolean fetchMLPCNAssayList(Properties props) {
// BardPullPubchemAIDListUtil aidPuller = new BardPullPubchemAIDListUtil();
// Vector <Integer> assayIDs = aidPuller.buildAIDList(props);
// long assayStartCount;
//
// try {
// assayStartCount = BardDBUtil.getTableRowCount("mlp_assay");
// } catch (ClassNotFoundException e) {
// e.printStackTrace();
// return false;
// } catch (SQLException e) {
// e.printStackTrace();
// return false;
// }
//
// double sizeRatio = ((double)assayIDs.size())/((double)assayStartCount);
// //the assay count ratio should be 1.0 or greater. Accounting for slight drop in case of deprecation of stale assays
// //or other policies that might cause a slight (<2%) drop in assay count.
// return (sizeRatio > 0.98);
// }
public boolean fetchAssayDescriptionMetatdataXMLZips(Properties props) {
boolean haveFiles = true;
String bioassayDescriptionBase = props.getProperty("pubchem.assay.description.root");
String localBioassayZipDir = props.getProperty("bard.filepath.mlpcn.assaydesczip");
clearBardScratch(localBioassayZipDir);
FtpBean ftpbean = new FtpBean();
try {
ftpbean.ftpConnect(props.getProperty("ncbi.ftp.root"), props.getProperty("ncbi.ftp.user"), props.getProperty("ncbi.ftp.password"));
logger.info("Established NCBI FTP Connection");
ftpbean.setDirectory(bioassayDescriptionBase);
logger.info("Changed DIR, Reply: "+ftpbean.getReply());
FtpListResult list = ftpbean.getDirectoryContent();
String fileName = "";
int zipCnt = 0;
while(list.next()) {
fileName = list.getName();
if(fileName.endsWith(".zip")) {
ftpbean.getBinaryFile(fileName, localBioassayZipDir+"/"+fileName);
zipCnt++;
logger.info("Download Assay XML ZIP File ("+zipCnt+"): "+fileName);
}
}
logger.info("Finished Assay Description Download, File Count ="+zipCnt);
//make sure we have at least one file
//later if the assay update count is too low, an error will prevent update of production table
haveFiles = (zipCnt > 0);
ftpbean.close();
} catch (IOException e) {
e.printStackTrace();
return false;
} catch (FtpException e) {
e.printStackTrace();
return false;
}
return haveFiles;
}
public boolean fetchUniprotDatFile(Properties props) {
boolean haveFiles = false;
FtpBean ftpbean = new FtpBean();
try {
ftpbean.ftpConnect(props.getProperty("uniprot.ftp.server"), props.getProperty("uniprot.ftp.user"), "");
ftpbean.setDirectory(props.getProperty("uniprot.data.current.dir"));
logger.info("Uniprot Change Dir Response="+ftpbean.getReply());
String destDir = props.getProperty("bard.loader.scratch.dir") + "/Uniprot";
File uniprotDir = new File(destDir);
if(!uniprotDir.exists())
uniprotDir.mkdir();
this.clearBardScratch(destDir);
logger.info("ftploc = "+ftpbean.getDirectory()+" file name ="+props.getProperty("uniprot.data.file"));
ftpbean.getBinaryFile(props.getProperty("uniprot.data.file"), destDir + "/" +props.getProperty("uniprot.data.file"));
if(ftpbean.getReply().trim().startsWith("2"))
haveFiles = true;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return false;
} catch (FtpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return false;
}
return haveFiles;
}
public static void gunzipFile(String sourcePath, String destPath) throws FileNotFoundException, IOException {
GZIPInputStream gis = new GZIPInputStream(new FileInputStream(sourcePath));
FileOutputStream fos = new FileOutputStream(destPath);
byte [] buff = new byte[2048];
int len;
while((len = gis.read(buff)) > 0) {
fos.write(buff, 0, len);
}
gis.close();
fos.flush();
fos.close();
}
public static void untarFile(String sourceFilePath) throws IOException {
TarArchiveInputStream tis = new TarArchiveInputStream(new FileInputStream(sourceFilePath));
TarArchiveEntry entry;
String filename;
byte [] buff = new byte [512];
int len;
File file = new File(sourceFilePath);
String destPath = file.getParent();
while((entry = tis.getNextTarEntry()) != null) {
filename = entry.getName();
file = new File(destPath, filename);
if(entry.isDirectory()) {
if(!file.exists()) {
file.mkdirs();
}
} else {
FileOutputStream fos = new FileOutputStream(destPath+"/"+filename);
while((len = tis.read(buff)) > 0) {
fos.write(buff,0,len);
}
fos.close();
}
}
tis.close();
}
public boolean fetchFTPFileResource(String server, String pathToFile) {
boolean transfer = false;
return transfer;
}
public static void main(String [] args) {
BardResourceFetch fetch = new BardResourceFetch();
try {
fetch.gunzipFile("C:/Temp/GO_test/tarziptest/testtar2.tar.gz", "C:/Temp/GO_test/tarziptest/testtar2.tar");
fetch.untarFile("C:/Temp/GO_test/tarziptest/testtar2.tar");
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// if(!fetch.fetchGOHTTPAssociationResources(new Properties()))
// System.out.println("no files");
}
}