/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.data.storages;
import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger;
import static fr.ens.biologie.genomique.eoulsan.util.Utils.checkNotNull;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.bio.GenomeDescription;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFileMetadata;
import fr.ens.biologie.genomique.eoulsan.util.FileUtils;
/**
* This class define a basic GenomeDescStorage based on an index file.
* @since 1.2
* @author Laurent Jourdren
*/
public class SimpleGenomeDescStorage implements GenomeDescStorage {
private static final String INDEX_FILENAME = "genomes_desc_storage.txt";
private final DataFile dir;
private final Map<String, IndexEntry> entries = new LinkedHashMap<>();
private String lastMD5Computed;
private DataFile lastGenomeFile;
private long lastGenomeFileModified;
/**
* This inner class define an entry of the index file.
* @author Laurent Jourdren
*/
private static final class IndexEntry {
String genomeName;
long genomeFileLength;
String genomeFileMD5Sum;
DataFile file;
private String getKey() {
return createKey(this.genomeFileLength, this.genomeFileMD5Sum);
}
@Override
public String toString() {
return this.getClass().getSimpleName()
+ "{genomeName=" + this.genomeName + ", genomeFileLength="
+ this.genomeFileLength + ", genomeFileMD5Sum="
+ this.genomeFileMD5Sum + ", file=" + this.file + "}";
}
}
//
// Index management methods
//
/**
* Load the information from the index file
* @throws IOException if an error occurs while loading the index file
*/
private void load() throws IOException {
if (!this.dir.exists()) {
throw new IOException("Genome description storage directory not found: "
+ this.dir.getSource());
}
final DataFile indexFile = new DataFile(this.dir, INDEX_FILENAME);
// Create an empty index file if no index exists
if (!indexFile.exists()) {
save();
return;
}
final BufferedReader br = new BufferedReader(
new InputStreamReader(indexFile.open(), Globals.DEFAULT_CHARSET));
final Pattern pattern = Pattern.compile("\t");
String line = null;
while ((line = br.readLine()) != null) {
final String trimmedLine = line.trim();
if ("".equals(trimmedLine) || trimmedLine.startsWith("#")) {
continue;
}
final List<String> fields = Arrays.asList(pattern.split(trimmedLine));
if (fields.size() != 4) {
continue;
}
final IndexEntry e = new IndexEntry();
e.genomeName = fields.get(0);
e.genomeFileMD5Sum = fields.get(1);
e.genomeFileLength = Long.parseLong(fields.get(2));
e.file = new DataFile(this.dir, fields.get(3));
if (e.file.exists()) {
this.entries.put(e.getKey(), e);
}
}
br.close();
}
/**
* Save the information in the index file
* @throws IOException if an error occurs while saving the index file
*/
private void save() throws IOException {
if (!this.dir.exists()) {
throw new IOException("Genome description storage directory not found: "
+ this.dir.getSource());
}
final DataFile indexFile = new DataFile(this.dir, INDEX_FILENAME);
// Create an empty index file
final BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(indexFile.create(), Globals.DEFAULT_CHARSET));
writer.write("#Genome\tGenomeFileMD5\tGenomeFileLength\n");
for (Map.Entry<String, IndexEntry> e : this.entries.entrySet()) {
IndexEntry ie = e.getValue();
writer.append(ie.genomeName);
writer.append("\t");
writer.append(ie.genomeFileMD5Sum);
writer.append("\t");
writer.append(Long.toString(ie.genomeFileLength));
writer.append("\t");
writer.append(ie.file.getName());
writer.append("\n");
}
writer.close();
}
//
// Other methods
//
private String createKey(final DataFile genomeFile) {
try {
final DataFileMetadata md = genomeFile.getMetaData();
final String md5Sum = computeMD5Sum(genomeFile);
return createKey(md.getContentLength(), md5Sum);
} catch (IOException e) {
return null;
}
}
private static String createKey(final long genomeFileLength,
final String genomeFileMD5Sum) {
return genomeFileMD5Sum + '\t' + genomeFileLength;
}
private String computeMD5Sum(final DataFile genomeFile) throws IOException {
DataFileMetadata md = null;
try {
md = genomeFile.getMetaData();
} catch (IOException e) {
}
if (md != null
&& genomeFile.equals(this.lastGenomeFile)
&& this.lastGenomeFileModified == md.getLastModified()
&& this.lastMD5Computed != null) {
return this.lastMD5Computed;
}
final String md5Sum = FileUtils.computeMD5Sum(genomeFile.rawOpen());
if (md != null && md5Sum != null) {
this.lastGenomeFile = genomeFile;
this.lastGenomeFileModified = md.getLastModified();
this.lastMD5Computed = md5Sum;
}
return md5Sum;
}
//
// Interface methods
//
@Override
public GenomeDescription get(final DataFile genomeFile) {
checkNotNull(genomeFile, "Genome file is null");
final IndexEntry entry = this.entries.get(createKey(genomeFile));
if (entry == null || entry.file == null) {
return null;
}
try {
return GenomeDescription.load(entry.file.open());
} catch (IOException e) {
getLogger()
.warning("Cannot read genome description file: " + e.getMessage());
return null;
}
}
@Override
public void put(final DataFile genomeFile,
final GenomeDescription genomeDesc) {
checkNotNull(genomeFile, "GenomeFile is null");
checkNotNull(genomeDesc, "Genome description is null");
final String key = createKey(genomeFile);
if (this.entries.containsKey(key)) {
return;
}
try {
final DataFileMetadata md = genomeFile.getMetaData();
final IndexEntry entry = new IndexEntry();
entry.genomeName = genomeFile.getName();
entry.genomeFileLength = md.getContentLength();
entry.genomeFileMD5Sum = computeMD5Sum(genomeFile);
entry.file = new DataFile(this.dir,
entry.genomeFileMD5Sum + "_" + entry.genomeFileLength + ".gdesc");
genomeDesc.save(entry.file.create());
this.entries.put(entry.getKey(), entry);
save();
getLogger().info("Successfully added "
+ entry.genomeName
+ " genome description to genome description storage.");
} catch (IOException e) {
getLogger().warning(
"Cannot add genome description file to genome description storage: "
+ e.getMessage());
}
}
//
// Static methods
//
/**
* Create a GenomeDescStorage
* @param dir the path of the genome descriptions storage
* @return a GenomeDescStorage object if the path contains an index storage or
* null if no index storage is found
*/
public static final GenomeDescStorage getInstance(final DataFile dir) {
try {
return new SimpleGenomeDescStorage(dir);
} catch (IOException e) {
return null;
} catch (NullPointerException e) {
return null;
}
}
//
// Constructor
//
/**
* Private constructor.
* @param dir Path to the index storage
* @throws IOException if an error occurs while testing the index storage
*/
private SimpleGenomeDescStorage(final DataFile dir) throws IOException {
checkNotNull(dir, "Index directory is null");
this.dir = dir;
load();
getLogger().info("Genome description storage found. "
+ this.entries.size() + " entries in : " + dir.getSource());
}
}