/** * Copyright 2010-15 Simon Andrews * * This file is part of BamQC. * * BamQC is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * BamQC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with BamQC; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* * Changelog: * - Piero Dalle Pezze: Imported from SeqMonk and adjusted for BamQC * - Simon Andrews: Class creation. */ package uk.ac.babraham.BamQC.Network.DownloadableGenomes; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.sql.Date; import java.util.Hashtable; import java.util.Vector; import org.apache.log4j.Logger; import uk.ac.babraham.BamQC.Preferences.BamQCPreferences; /** * * @author Simon Andrews * */ public class DownloadableGenomeSet { private static Logger log = Logger.getLogger(DownloadableGenomeSet.class); private Hashtable <String, GenomeSpecies> seenSpecies; private Vector<GenomeSpecies> species = new Vector<GenomeSpecies>(); public DownloadableGenomeSet () throws IOException { URL genomeIndexURL = new URL(BamQCPreferences.getInstance().getGenomeDownloadLocation()+"genome_index.txt"); BufferedReader genomeIndexReader = new BufferedReader(new InputStreamReader(genomeIndexURL.openStream())); seenSpecies = new Hashtable<String, GenomeSpecies>(); String indexLine = null; while ((indexLine = genomeIndexReader.readLine())!= null) { String [] sections = indexLine.split("\\t"); if (sections.length < 4) { throw new IOException("Genome list file is corrupt. Expected 4 sections on line '"+indexLine+"' but got "+sections.length); } if (!seenSpecies.containsKey(new String(sections[0]))) { GenomeSpecies newSpecies = new GenomeSpecies(sections[0]); species.add(newSpecies); seenSpecies.put(sections[0],newSpecies); } long epoch = Long.parseLong(sections[3]); Date date = new Date(epoch*1000); // Network date is in seconds. Local date is in milliseconds. // System.out.println("For assembly "+sections[1]+" epoch was "+epoch+" and date was "+date); new GenomeAssembly(seenSpecies.get(sections[0]),sections[1],Integer.parseInt(sections[2]),date); // System.out.println("Found organism "+sections[0]+" and assembly "+sections[1]); } } public GenomeSpecies [] species () { return species.toArray(new GenomeSpecies[0]); } public GenomeAssembly [] findUpdateableGenomes () throws IOException { // We need to go through the installed genomes and see if we have an assembly // which is newer than the one which is installed. Vector<GenomeAssembly>updates = new Vector<GenomeAssembly>(); File [] speciesFolders = BamQCPreferences.getInstance().getGenomeBase().listFiles(); for (int s=0;s<speciesFolders.length;s++) { if (!speciesFolders[s].isDirectory()) continue; File [] assemblyFolders = speciesFolders[s].listFiles(); for (int a=0;a<assemblyFolders.length;a++) { if (!assemblyFolders[a].isDirectory()) continue; // Now find the latest modification time on a dat file File [] datFiles = assemblyFolders[a].listFiles(); long latestEpoch = 0; for (int d=0;d<datFiles.length;d++) { if (datFiles[d].getName().toLowerCase().endsWith(".dat")) { if (datFiles[d].lastModified() > latestEpoch) { latestEpoch = datFiles[d].lastModified(); } } } Date latestDate = new Date(latestEpoch); // Now see if there is an assembly in the downloadable genomes // which matches this one, and if it's newer than the one we // have installed. if (seenSpecies.containsKey(speciesFolders[s].getName())) { GenomeAssembly [] genomes = seenSpecies.get(speciesFolders[s].getName()).assemblies(); for (int ga=0;ga<genomes.length;ga++) { if (genomes[ga].assembly().equals(assemblyFolders[a].getName())){ // We have a match, but is it newer if (genomes[ga].date().after(latestDate)) { // We have an update to record. updates.add(genomes[ga]); } // else { // System.out.println("Local date for "+genomes[ga].assembly()+" is "+latestDate.toString()+" but network date is "+genomes[ga].date().toString()); // // } } } } } } return updates.toArray(new GenomeAssembly[0]); } @Override public String toString () { return "Downloadable Genomes"; } public static void main (String [] args) { try { DownloadableGenomeSet dgs = new DownloadableGenomeSet(); GenomeAssembly [] updates = dgs.findUpdateableGenomes(); System.out.println ("There are "+updates.length+" genomes to update"); for (int i=0;i<updates.length;i++) { System.out.println(updates[i].species().name()+"\t"+updates[i].assembly()+" from "+updates[i].date()); } System.out.println("List of species+assemblies:"); GenomeSpecies[] gs = dgs.species(); for(int i=0;i<gs.length;i++) { System.out.println(gs[i].name()); GenomeAssembly[] ga = gs[i].assemblies(); for(int j=0;j<ga.length;j++) { System.out.println("\t" + ga[j].assembly()); } } } catch (IOException e) { log.error(e, e); } } }