/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.data.storages; import static com.google.common.base.Strings.nullToEmpty; import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger; import static fr.ens.biologie.genomique.eoulsan.util.Utils.checkNotNull; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.regex.Pattern; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.bio.GenomeDescription; import fr.ens.biologie.genomique.eoulsan.bio.readsmappers.SequenceReadsMapper; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.util.FileUtils; /** * This class define a basic GenomeIndexStorage based on an index file. * @since 1.1 * @author Laurent Jourdren */ public class SimpleGenomeIndexStorage implements GenomeIndexStorage { private static final String INDEX_FILENAME = "genomes_index_storage.txt"; private final DataFile dir; private final Map<String, IndexEntry> entries = new LinkedHashMap<>(); /** * This inner class define an entry of the index file. * @author Laurent Jourdren */ private static final class IndexEntry { String genomeName; int sequences; long length; String genomeMD5; String mapperName; DataFile file; String description; String getKey() { return createKey(this.mapperName, this.genomeMD5); } @Override public String toString() { return this.getClass().getSimpleName() + "{genomeName=" + this.genomeName + ", sequences=" + this.sequences + ", length=" + this.length + ", genomeMD5=" + this.genomeMD5 + ", mapperName= " + this.mapperName + ", file=" + this.file + "}"; } } // // Interface methods // @Override public DataFile get(final SequenceReadsMapper mapper, final GenomeDescription genome, final Map<String, String> additionalDescription) { checkNotNull(mapper, "Mapper is null"); checkNotNull(mapper, "Genome description is null"); checkNotNull(additionalDescription, "additionalDescription is null"); final IndexEntry entry = this.entries.get(createKey(mapper, genome, additionalDescription)); return entry == null ? null : entry.file; } @Override public void put(final SequenceReadsMapper mapper, final GenomeDescription genome, final Map<String, String> additionalDescription, final DataFile indexArchive) { checkNotNull(mapper, "Mapper is null"); checkNotNull(genome, "Genome description is null"); checkNotNull(additionalDescription, "additionalDescription is null"); checkNotNull(indexArchive, "IndexArchive is null"); // Update the index to avoid to lost entries when several instances of // Eoulsan are running try { load(); } catch (IOException e1) { getLogger().warning("Unable to reload the index mapper storage"); } if (!indexArchive.exists()) { return; } final String key = createKey(mapper, genome, additionalDescription); if (this.entries.containsKey(key)) { return; } final IndexEntry entry = createIndexEntry(mapper, genome, additionalDescription); if (entry == null) { return; } try { FileUtils.copy(indexArchive.rawOpen(), entry.file.create()); this.entries.put(entry.getKey(), entry); save(); getLogger().info("Successfully added " + indexArchive.getName() + " index archive to genome index storage."); } catch (IOException e) { getLogger().warning("Failed to add " + indexArchive.getName() + " index archive to genome index storage: " + e.getMessage()); } } // // Sum creation method // private IndexEntry createIndexEntry(final SequenceReadsMapper mapper, final GenomeDescription genome, final Map<String, String> additionalDescription) { final IndexEntry entry = new IndexEntry(); entry.genomeName = genome.getGenomeName().trim(); entry.sequences = genome.getSequenceCount(); entry.length = genome.getGenomeLength(); entry.mapperName = mapper.getMapperName().toLowerCase().trim(); final Map<String, String> md5Map = createMD5SumMap(mapper, genome, additionalDescription); final String md5Sum = createMD5Sum(md5Map); if (md5Sum == null) { return null; } entry.genomeMD5 = md5Sum; entry.file = new DataFile(this.dir, entry.mapperName + "-" + entry.genomeMD5 + ".zip"); entry.description = md5Map.toString(); return entry; } private static Map<String, String> createMD5SumMap( final SequenceReadsMapper mapper, final GenomeDescription genome, final Map<String, String> additionalDescription) { final LinkedHashMap<String, String> map = new LinkedHashMap<>(); map.put("mapper.name", nullToEmpty(mapper.getMapperName())); map.put("mapper.version", nullToEmpty(mapper.getMapperVersionToUse()).trim()); map.put("mapper.flavor", nullToEmpty(mapper.getMapperFlavor()).trim()); map.put("genome.md5sum", nullToEmpty(genome.getMD5Sum()).trim()); // Add sorted additional description map.putAll(new TreeMap<>(additionalDescription)); return map; } private static String createMD5Sum(final Map<String, String> map) { MessageDigest md5Digest; try { md5Digest = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { getLogger().warning( "Failled to create checksum for mapper index: " + e.getMessage()); return null; } for (Map.Entry<String, String> e : map.entrySet()) { md5Digest.update(e.getKey().getBytes(Globals.DEFAULT_CHARSET)); md5Digest.update(e.getValue().getBytes(Globals.DEFAULT_CHARSET)); } final BigInteger bigInt = new BigInteger(1, md5Digest.digest()); return bigInt.toString(16); } // // Index management methods // /** * Load the information from the index file * @throws IOException if an error occurs while loading the index file */ private void load() throws IOException { if (!this.dir.exists()) { throw new IOException( "Genome index storage directory not found: " + this.dir.getSource()); } final DataFile indexFile = new DataFile(this.dir, INDEX_FILENAME); // Create an empty index file if no index exists if (!indexFile.exists()) { save(); return; } // Clear the entries (useful when reloading the index) this.entries.clear(); try (final BufferedReader br = new BufferedReader( new InputStreamReader(indexFile.open(), Globals.DEFAULT_CHARSET))) { final Pattern pattern = Pattern.compile("\t"); String line = null; while ((line = br.readLine()) != null) { final String trimmedLine = line.trim(); if ("".equals(trimmedLine) || trimmedLine.startsWith("#")) { continue; } final List<String> fields = Arrays.asList(pattern.split(trimmedLine)); if (fields.size() < 6 || fields.size() > 7) { continue; } final IndexEntry e = new IndexEntry(); e.genomeName = fields.get(0); e.genomeMD5 = fields.get(1); e.mapperName = fields.get(4); e.file = new DataFile(this.dir, fields.get(5)); if (e.file.exists()) { this.entries.put(e.getKey(), e); } if (fields.size() == 7) { e.description = fields.get(6); } } } } /** * Save the information in the index file * @throws IOException if an error occurs while saving the index file */ private void save() throws IOException { if (!this.dir.exists()) { throw new IOException( "Genome index storage directory not found: " + this.dir.getSource()); } final DataFile indexFile = new DataFile(this.dir, INDEX_FILENAME); // Create an empty index file try (final BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(indexFile.create(), Globals.DEFAULT_CHARSET))) { writer.write( "#Genome\tChecksum\tGenomeSequences\tGenomeLength\tMapper\tIndexFile\tDescription\n"); for (Map.Entry<String, IndexEntry> e : this.entries.entrySet()) { IndexEntry ie = e.getValue(); writer.append(ie.genomeName == null ? "???" : ie.genomeName); writer.append("\t"); writer.append(ie.genomeMD5); writer.append("\t"); writer.append(Integer.toString(ie.sequences)); writer.append("\t"); writer.append(Long.toString(ie.length)); writer.append("\t"); writer.append(ie.mapperName); writer.append("\t"); writer.append(ie.file.getName()); if (ie.description != null) { writer.append("\t"); writer.append(ie.description); } writer.append("\n"); } } } // // Other methods // private static String createKey(final SequenceReadsMapper mapper, final GenomeDescription genome, final Map<String, String> additionalDescription) { return createKey(mapper.getMapperName(), createMD5Sum(createMD5SumMap(mapper, genome, additionalDescription))); } private static String createKey(final String mapperName, final String genomeMD5) { return mapperName.toLowerCase().trim() + '\t' + genomeMD5; } // // Static methods // /** * Create a GenomeIndexStorage * @param dir the path of the index storage * @return a GenomeIndexStorage object if the path contains an index storage * or null if no index storage is found */ public static final GenomeIndexStorage getInstance(final DataFile dir) { try { return new SimpleGenomeIndexStorage(dir); } catch (IOException e) { return null; } catch (NullPointerException e) { return null; } } // // Constructor // /** * Private constructor. * @param dir Path to the index storage * @throws IOException if an error occurs while testing the index storage */ private SimpleGenomeIndexStorage(final DataFile dir) throws IOException { checkNotNull(dir, "Index directory is null"); this.dir = dir; load(); getLogger().info("Genome index storage found." + this.entries.size() + " entries in : " + dir.getSource()); } }