package com.compomics.util.protein_sequences_manager;
import com.compomics.util.protein_sequences_manager.enums.SequenceInputType;
import com.compomics.util.Util;
import com.compomics.util.experiment.identification.protein_sequences.FastaIndex;
import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory;
import com.compomics.util.io.SerializationUtils;
import com.compomics.util.preferences.UtilitiesUserPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
/**
* The protein sequences manager helps the user manage FASTA files.
*
* @author Marc Vaudel
*/
public class ProteinSequencesManager {
/**
* Name of the folder containing temporary files.
*/
public static final String TEMP_FOLDER = ".temp";
/**
* Name of the folder containing UniProt files.
*/
public static final String UNIPROT_FOLDER = "uniprot";
/**
* Name of the folder containing user FASTA files.
*/
public static final String USER_FOLDER = "user";
/**
* Name of the folder containing DNA translated files.
*/
public static final String DNA_FOLDER = "dna";
/**
* The list of databases loaded.
*/
private ArrayList<String> databaseNames;
/**
* Map of input types for every database name.
*/
private HashMap<String, SequenceInputType> databaseInputTypes;
/**
* Map of the index for every database: name - version - FastaIndex.
*/
private HashMap<String, HashMap<String, FastaIndex>> databaseIndexes;
/**
* The working folder.
*/
private File workingFolder;
/**
* Constructor. The Protein Sequences Manager folder must be set in the
* utilities preferences before calling the constructor.
*/
public ProteinSequencesManager() {
UtilitiesUserPreferences utilitiesUserPreferences = UtilitiesUserPreferences.loadUserPreferences();
workingFolder = utilitiesUserPreferences.getProteinSequencesManagerFolder();
if (workingFolder == null || !workingFolder.exists()) {
throw new IllegalArgumentException("Working folder not set.");
}
parseWorkingFolder();
}
/**
* Parses the databases found in the working folder.
*/
private void parseWorkingFolder() {
databaseNames = new ArrayList<String>();
databaseInputTypes = new HashMap<String, SequenceInputType>();
databaseIndexes = new HashMap<String, HashMap<String, FastaIndex>>();
parseSubFolder(getUniprotFolder(), SequenceInputType.uniprot);
parseSubFolder(getUserFolder(), SequenceInputType.user);
parseSubFolder(getDnaFolder(), SequenceInputType.dna);
}
/**
* Parses a folder containing database folders and loads the content in the
* maps.
*
* @param folder the folder to inspect
* @param sequenceInputType the type of input
*/
private void parseSubFolder(File folder, SequenceInputType sequenceInputType) {
if (!folder.exists()) {
folder.mkdirs();
if (!folder.exists()) {
throw new IllegalArgumentException("Impossible to write into the working folder.");
}
return;
}
for (File fastaFolder : folder.listFiles()) {
if (fastaFolder.isDirectory()) {
String name = fastaFolder.getName();
boolean dbFound = false;
for (File versionFolder : fastaFolder.listFiles()) {
if (versionFolder.isDirectory()) {
String version = versionFolder.getName();
FastaIndex fastaIndex = null;
for (File subFile : versionFolder.listFiles()) {
if (subFile.getName().endsWith(".cui")) {
try {
FastaIndex tempIndex = (FastaIndex) SerializationUtils.readObject(subFile);
String correctedName = correctFastaName(tempIndex.getName());
if (correctedName.equals(name)) {
File fastaFile = new File(versionFolder, tempIndex.getFileName());
if (fastaFile.exists()) {
fastaIndex = tempIndex;
}
}
} catch (Exception e) {
// ignore
}
}
}
if (fastaIndex != null) {
dbFound = true;
databaseNames.add(fastaIndex.getName());
databaseInputTypes.put(name, sequenceInputType);
HashMap<String, FastaIndex> fastaMap = databaseIndexes.get(name);
if (fastaMap == null) {
fastaMap = new HashMap<String, FastaIndex>(1);
databaseIndexes.put(name, fastaMap);
}
fastaMap.put(version, fastaIndex);
} else {
// corrupted folder, delete
Util.deleteDir(versionFolder);
}
}
}
if (!dbFound) {
// corrupted folder, delete
Util.deleteDir(fastaFolder);
}
}
}
}
/**
* Returns the folder where UniProt databases are stored.
*
* @return the folder where UniProt databases are stored
*/
public File getUniprotFolder() {
return new File(workingFolder, UNIPROT_FOLDER);
}
/**
* Returns the folder where user databases are stored.
*
* @return the folder where user databases are stored
*/
public File getUserFolder() {
return new File(workingFolder, USER_FOLDER);
}
/**
* Returns the folder where DNA databases are stored.
*
* @return the folder where DNA databases are stored
*/
public File getDnaFolder() {
return new File(workingFolder, DNA_FOLDER);
}
/**
* Adds a FASTA file to the working folder.
*
* @param fastaFile the FASTA file to add
* @param sequenceInputType the type of input
* @param waitingHandler a waiting handler displaying progress and allowing
* canceling the operation.
*
* @throws IOException exception thrown whenever an error occurred while
* copying the file.
*/
public void addFastaFile(File fastaFile, SequenceInputType sequenceInputType, WaitingHandler waitingHandler) throws IOException {
FastaIndex tempIndex = SequenceFactory.getFastaIndex(fastaFile, false, waitingHandler);
if (!waitingHandler.isRunCanceled()) {
String fastaName = tempIndex.getName();
File folder = getFolder(sequenceInputType);
folder = new File(folder, fastaName);
String version = tempIndex.getVersion();
folder = new File(folder, version);
folder.mkdirs();
if (!folder.exists()) {
throw new IllegalArgumentException("Impossible to write into the working folder.");
}
String fileName = fastaFile.getName();
fileName = correctFastaName(fileName);
File importedFile = new File(folder, fileName);
Util.copyFile(fastaFile, importedFile);
//add all user specifications to the new index
FastaIndex newIndex = SequenceFactory.getFastaIndex(fastaFile, true, waitingHandler);
newIndex.setName(tempIndex.getName());
newIndex.setAccessionParsingRule(tempIndex.getAccessionParsingRule());
newIndex.setDecoyTag(tempIndex.getDecoyTag());
newIndex.setDescription(tempIndex.getDescription());
newIndex.setMainDatabaseType(tempIndex.getMainDatabaseType());
newIndex.setVersion(tempIndex.getVersion());
SequenceFactory.writeIndex(newIndex, folder);
// add fasta file to the mapping
if (!databaseNames.contains(fastaName)) {
databaseNames.add(fastaName);
databaseInputTypes.put(fastaName, sequenceInputType);
}
HashMap<String, FastaIndex> databaseMap = databaseIndexes.get(fastaName);
if (databaseMap == null) {
databaseMap = new HashMap<String, FastaIndex>(1);
databaseIndexes.put(fastaName, databaseMap);
}
databaseMap.put(version, newIndex);
}
}
/**
* Corrects the name of the given FASTA file.
*
* @param fastaName the name of the given FASTA file
*
* @return a corrected name for the given FASTA file
*/
public static String correctFastaName(String fastaName) {
return fastaName.replaceAll(" ", "_");
}
/**
* Returns the folder to be used for the given input type.
*
* @param sequenceInputType the type of input
*
* @return the folder to be used for the given input type
*/
public File getFolder(SequenceInputType sequenceInputType) {
switch (sequenceInputType) {
case uniprot:
return new File(workingFolder, UNIPROT_FOLDER);
case user:
return new File(workingFolder, USER_FOLDER);
case dna:
return new File(workingFolder, DNA_FOLDER);
default:
throw new UnsupportedOperationException("Folder not implemented for input type " + sequenceInputType + ".");
}
}
/**
* Returns the temporary folder.
*
* @return the temporary folder
*/
public File getTempFolder() {
return new File(workingFolder, TEMP_FOLDER);
}
/**
* Returns the list of database names parsed from the working folder.
*
* @return the list of database names parsed from the working folder
*/
public ArrayList<String> getDatabaseNames() {
return databaseNames;
}
/**
* Returns the list of versions for the given database name as parsed from
* the working folder.
*
* @param databaseName the name of the database of interest
*
* @return the list of versions for the given database name as parsed from
* the working folder
*/
public ArrayList<String> getVersionsForDb(String databaseName) {
ArrayList<String> result = new ArrayList<String>(databaseIndexes.get(databaseName).keySet());
Collections.sort(result);
return result;
}
/**
* Returns the FASTA index of the given database name and version.
*
* @param databaseName the name of the database of interest
* @param version the version of the database of interest
*
* @return the corresponding FASTA index
*/
public FastaIndex getFastaIndex(String databaseName, String version) {
HashMap<String, FastaIndex> databaseMap = databaseIndexes.get(databaseName);
if (databaseMap == null) {
return null;
}
return databaseMap.get(version);
}
/**
* Returns the input type of the given database.
*
* @param databaseName the name of the database
*
* @return the input type of the given database
*/
public SequenceInputType getInputType(String databaseName) {
return databaseInputTypes.get(databaseName);
}
}