/*******************************************************************************
* GenPlay, Einstein Genome Analyzer
* Copyright (C) 2009, 2014 Albert Einstein College of Medicine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu>
* Nicolas Fourel <nicolas.fourel@einstein.yu.edu>
* Eric Bouhassira <eric.bouhassira@einstein.yu.edu>
*
* Website: <http://genplay.einstein.yu.edu>
******************************************************************************/
package edu.yu.einstein.genplay.core.manager.project;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import edu.yu.einstein.genplay.core.manager.ProjectFiles;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile;
import edu.yu.einstein.genplay.core.multiGenome.data.display.content.MGFileContentManager;
import edu.yu.einstein.genplay.core.multiGenome.data.synchronization.MGSMultiGenome;
import edu.yu.einstein.genplay.core.multiGenome.data.synchronization.MGSOffset;
import edu.yu.einstein.genplay.core.multiGenome.operation.synchronization.MGSynchronizer;
import edu.yu.einstein.genplay.core.multiGenome.utils.FormattedMultiGenomeName;
import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome;
import edu.yu.einstein.genplay.dataStructure.enums.VariantType;
import edu.yu.einstein.genplay.gui.action.multiGenome.synchronization.MGASynchronizing;
/**
* The multi genome data structure can be seen in 3 main parts:
* - {@link MGSMultiGenome} : Manages offsets between genomes and the meta genome. It is all about the synchronization of the positions.
* - {@link MGFileContentManager} : Manages the variant information for their display.
*
* This class also contains the map between the genome names and their VCF file readers.
* Information about a genome can be stored in one or several VCF files, no matter the type (Indels, SV, SNPs).
* Genomes separated in different files MUST HAVE THE SAME NAME IN EVERY FILE!
*
* The genomes names list is required quiet often. That list is made from the map between the genome names and their reader.
* Once created, the list is stored in order to be use later without creating it again and again.
*
* ALL GENOME NAMES ARE STORED IN THIS DATA STRUCTURE AS "FULL GENOME NAME" (with group/genome/raw name).
* See {@link FormattedMultiGenomeName} for more details.
*
* THE WHOLE SYNCHRONIZATION PROCESS IS HANDLED BY {@link MGASynchronizing}.
*
* @author Nicolas Fourel
* @version 0.1
*/
public class MultiGenomeProject implements Serializable {
/** Generated serial version ID */
private static final long serialVersionUID = -6096336417566795182L;
private static final int SAVED_FORMAT_VERSION_NUMBER = 0; // saved format version
private List<String> genomeNames; // The genome names list.
private Map<String, List<VCFFile>> genomeFileAssociation; // The map between genome names and their files.
private MGSMultiGenome multiGenome; // The genome synchronization data structure.
private MGSynchronizer multiGenomeSynchronizer; // The synchronizer for Indels and Structural Variant variations.
private MGFileContentManager fileContentManager; // The file content manager.
/**
* Constructor of {@link MultiGenomeProject}
*/
MultiGenomeProject () {}
/**
* Retrieves all the genome raw names of the project
* @return the full list of genome raw name
*/
public List<String> getAllGenomeRawNames () {
List<String> genomeRawNames = new ArrayList<String>();
for (String genomeName: genomeNames) {
genomeRawNames.add(FormattedMultiGenomeName.getRawName(genomeName));
}
return genomeRawNames;
}
/**
* Retrieves all the VCF files
* @return the full list of VCF files
*/
public List<VCFFile> getAllVCFFiles () {
List<VCFFile> readerList = new ArrayList<VCFFile>();
for (List<VCFFile> currentReaderList: genomeFileAssociation.values()) {
for (VCFFile currentReader: currentReaderList) {
if (!readerList.contains(currentReader)) {
readerList.add(currentReader);
}
}
}
return readerList;
}
/**
* @return the fileContentManager
*/
public MGFileContentManager getFileContentManager() {
return fileContentManager;
}
/**
* Creates an array with all genome names association (including the reference genome).
* Used for display.
* @return genome names association array
*/
public Object[] getFormattedGenomeArray () {
return getFormattedGenomeArray(true, true);
}
/**
* Creates an array with all genome names association.
* Used for display.
* @param withReferenceGenome true to add the reference genome to the list
* @param withMetaGenome true to add the meta genome to the list
* @return genome names association array
*/
public Object[] getFormattedGenomeArray (boolean withReferenceGenome, boolean withMetaGenome) {
String[] names;
List<String> preNames = new ArrayList<String>();
int index = 0;
if (withMetaGenome) {
preNames.add(FormattedMultiGenomeName.META_GENOME_NAME);
}
if (withReferenceGenome) {
preNames.add(ProjectManager.getInstance().getAssembly().getDisplayName());
}
names = new String[genomeNames.size() + preNames.size()];
for (String preName: preNames) {
names[index] = preName;
index++;
}
for (String name: genomeNames) {
names[index] = name;
index++;
}
return names;
}
/**
* @return the genomeFileAssociation
*/
public Map<String, List<VCFFile>> getGenomeFileAssociation() {
return genomeFileAssociation;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Genome names methods
/**
* @return the genomeNames
*/
public List<String> getGenomeNames() {
return genomeNames;
}
/**
* @return the multiGenome
*/
public MGSMultiGenome getMultiGenome() {
return multiGenome;
}
/**
* @return the multiGenomeSynchronizer
*/
public MGSynchronizer getMultiGenomeSynchronizer() {
return multiGenomeSynchronizer;
}
/**
* Get a vcf file object with a vcf file name.
* @param fileName the name of the vcf file
* @return the reader
*/
public VCFFile getVCFFileFromName (String fileName) {
List<VCFFile> list = getAllVCFFiles();
for (VCFFile vcfFile: list) {
if (vcfFile.getFile().getName().equals(fileName)) {
return vcfFile;
}
}
return null;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Others
/**
* Retrieves the VCF files according to a genome name and a variant type
* @param genomeName the full genome name
* @param type the variant type
* @return the list of VCF files for the given genome and variant type
*/
public List<VCFFile> getVCFFiles (String genomeName, VariantType type) {
List<VCFFile> fileList = new ArrayList<VCFFile>();
List<VCFFile> currentList = genomeFileAssociation.get(genomeName);
for (VCFFile currentReader: currentList) {
List<VariantType> typeList = currentReader.getVariantTypes(genomeName);
if ((typeList != null) && typeList.contains(type)) {
fileList.add(currentReader);
}
}
return fileList;
}
/**
* This method notice the file manager of the dependant files.
*/
private void initializeFileDependancy () {
List<VCFFile> vcfFiles = getAllVCFFiles();
String[] paths = new String[vcfFiles.size()];
for (int i = 0; i < paths.length; i++) {
paths[i] = vcfFiles.get(i).getFile().getPath();
}
ProjectFiles.getInstance().setCurrentFiles(paths);
}
/**
* Initializes synchronizer attributes.
* @param genomeFileAssociation the genome file association
*/
public void initializeSynchronization (Map<String, List<VCFFile>> genomeFileAssociation) {
this.genomeFileAssociation = genomeFileAssociation;
genomeNames = new ArrayList<String>(this.genomeFileAssociation.keySet());
Collections.sort(genomeNames);
for (String genomeName: genomeNames) {
List<VCFFile> vcfFiles = genomeFileAssociation.get(genomeName);
for (VCFFile vcfFile: vcfFiles) {
vcfFile.addGenomeName(genomeName);
}
}
multiGenome = new MGSMultiGenome(genomeNames);
multiGenomeSynchronizer = new MGSynchronizer(this);
fileContentManager = new MGFileContentManager(getAllVCFFiles());
initializeFileDependancy();
}
/**
* Method used for unserialization
* @param in
* @throws IOException
* @throws ClassNotFoundException
*/
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
in.readInt();
genomeNames = (List<String>) in.readObject();
genomeFileAssociation = (Map<String, List<VCFFile>>) in.readObject();
multiGenome = (MGSMultiGenome) in.readObject();
multiGenomeSynchronizer = (MGSynchronizer) in.readObject();
fileContentManager = (MGFileContentManager) in.readObject();
initializeFileDependancy();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Getters & Setters
/**
* @param genomeFileAssociation the genomeFileAssociation to set
*/
public void setGenomeFileAssociation(Map<String, List<VCFFile>> genomeFileAssociation) {
this.genomeFileAssociation = genomeFileAssociation;
}
/**
* Set the current {@link MultiGenomeProject} using another instance of {@link MultiGenomeProject}
* Used for the unserialization.
* @param project the instance of {@link MultiGenomeProject} to use
*/
protected void setMultiGenomeProject (MultiGenomeProject project) {
genomeNames = project.getGenomeNames();
genomeFileAssociation = project.getGenomeFileAssociation();
multiGenome = project.getMultiGenome();
multiGenomeSynchronizer = project.getMultiGenomeSynchronizer();
fileContentManager = project.getFileContentManager();
}
/**
* Show the information of the {@link MultiGenomeProject}
*/
public void show () {
System.out.println("POSITION");
multiGenome.show();
System.out.println("CONTENT");
fileContentManager.show();
}
/**
* Update the chromosome list using the new chromosome length
*/
public void updateChromosomeList () {
ProjectChromosomes projectChromosomes = ProjectManager.getInstance().getProjectChromosomes();
List<Chromosome> currentChromosomeList = projectChromosomes.getChromosomeList();
List<Integer> newChromosomeLengths = new ArrayList<Integer>();
List<List<MGSOffset>> offsetList = multiGenome.getReferenceGenome().getAllele().getOffsetList();
for (Chromosome current: currentChromosomeList) {
int index = projectChromosomes.getIndex(current);
int lastOffsetIndex = offsetList.get(index).size() - 1;
int length = current.getLength();
if (lastOffsetIndex > -1) {
length += offsetList.get(index).get(lastOffsetIndex).getValue();
}
newChromosomeLengths.add(length);
}
projectChromosomes.updateChromosomeLengths(newChromosomeLengths);
}
/**
* Method used for serialization
* @param out
* @throws IOException
*/
private void writeObject(ObjectOutputStream out) throws IOException {
out.writeInt(SAVED_FORMAT_VERSION_NUMBER);
out.writeObject(genomeNames);
out.writeObject(genomeFileAssociation);
out.writeObject(multiGenome);
out.writeObject(multiGenomeSynchronizer);
out.writeObject(fileContentManager);
}
}