/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.data.display.content; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import edu.yu.einstein.genplay.core.manager.project.ProjectManager; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile; import edu.yu.einstein.genplay.core.multiGenome.data.display.array.MGByteArray; import edu.yu.einstein.genplay.core.multiGenome.data.display.array.MGFloatArray; import edu.yu.einstein.genplay.core.multiGenome.data.display.array.MGIntegerArray; import edu.yu.einstein.genplay.core.multiGenome.data.display.variant.Variant; import edu.yu.einstein.genplay.core.multiGenome.data.display.variant.VariantDisplay; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; /** * A {@link MGChromosomeContent} represents the content of a {@link Chromosome} for a specific {@link VCFFile}. * It stores arrays for: * - the reference genome positions * - the scores (QUAL) * - a list of alternatives * - a list of genotype for each genome * - a list of {@link Variant} * * Every line from the chromosome is represented in these arrays. * The {@link Variant} stored are stored only once and it's here, a same {@link Variant} can be used for different display. * This way, a {@link Variant} will never be created more than once. * For display specific details, a {@link Variant} is encapsulated in a {@link VariantDisplay}. * * @author Nicolas Fourel * @version 0.1 */ public class MGChromosomeContent implements Iterable<MGLineContent>, Serializable { /** Default serial version ID */ private static final long serialVersionUID = -8385957556240550523L; private static final int SAVED_FORMAT_VERSION_NUMBER = 0; // saved format version private String chromosomeName; // The chromosome represented here. private MGIntegerArray positions; // The array of reference genome positions. private MGFloatArray scores; // The array of scores. private List<MGIntegerArray> alternatives; // The list of alternatives. private Map<String, List<MGByteArray>> genotypes; // The lists of genotypes. private MGChromosomeVariants variants; // The lists of variants. /** * Constructor of {@link MGChromosomeContent} * @param chromosome * @param genomeNames */ public MGChromosomeContent (String chromosome, List<String> genomeNames) { chromosomeName = chromosome; positions = new MGIntegerArray(); scores = new MGFloatArray(); alternatives = new ArrayList<MGIntegerArray>(); alternatives.add(new MGIntegerArray()); genotypes = new HashMap<String, List<MGByteArray>>(); for (String genomeName: genomeNames) { genotypes.put(genomeName, new ArrayList<MGByteArray>()); genotypes.get(genomeName).add(new MGByteArray()); } variants = null; } /** * Add the length of an alternative into the file content structure * @param alternativeIndex the index of the alternative * @param positionIndex the index of position where to set the alternative * @param alternative the length of the alternative */ private void addAlternative (int alternativeIndex, int positionIndex, int alternative) { int add = (alternativeIndex - alternatives.size()) + 1; for (int i = 0; i < add; i++) { alternatives.add(new MGIntegerArray()); } alternatives.get(alternativeIndex).set(positionIndex, alternative); } /** * Add a genotype into the file content structure * @param positionIndex the index of position where to set the genotype * @param genomeName the name of the genome the genotype belongs to * @param genotype the genotype */ private void addGenotype (int positionIndex, String genomeName, byte[] genotype) { for (int i = 0; i < genotype.length; i++) { int add = (i - genotypes.get(genomeName).size()) + 1; for (int j = 0; j < add; j++) { genotypes.get(genomeName).add(new MGByteArray()); } genotypes.get(genomeName).get(i).set(positionIndex, genotype[i]); } } /** * Add a {@link MGLineContent} into the file content structure * @param index index to insert the {@link MGLineContent} * @param position the {@link MGLineContent} to insert */ public void addPosition (int index, MGLineContent position) { // Each element of a position is added separately in the different lists positions.set(index, position.getReferenceGenomePosition()); // Add the position. scores.set(index, position.getScore()); // Add the score. for (int i = 0; i < position.getAlternatives().length; i++) { // Add the alternatives. addAlternative(i, index, position.getAlternatives()[i]); } List<String> genomes = new ArrayList<String>(position.getGenotypes().keySet()); // Add the genotypes. for (String genome: genomes) { addGenotype(index, genome, position.getGenotypes().get(genome)); } } /** * Compact all lists resizing arrays for better memory usage */ public void compact () { positions.compact(); int size = positions.size(); scores.resize(size); for (MGIntegerArray alternative: alternatives) { alternative.resize(size); } for (String genomeName: genotypes.keySet()) { for (MGByteArray genotype: genotypes.get(genomeName)) { genotype.resize(size); } } } /** * Generates the variants based on {@link MGChromosomeContent} information. */ public void generateVariants () { variants = new MGChromosomeVariants(); variants.generateVariants(this); } /** * @return the alternatives */ public List<MGIntegerArray> getAlternatives() { return alternatives; } /** * @param index index of the alternatives * @return an array of alternatives length */ private int[] getAlternatives (int index) { List<Integer> result = new ArrayList<Integer>(); for (MGIntegerArray alternative: alternatives) { if (alternative.get(index) != MGLineContent.NO_ALTERNATIVE) { result.add(alternative.get(index)); } } int[] array = new int[result.size()]; for (int i = 0; i < array.length; i++) { array[i] = result.get(i); } return array; } /** * @return the chromosome */ public Chromosome getChromosome() { return ProjectManager.getInstance().getProjectChromosomes().get(chromosomeName); } /** * @param index index of the genotypes * @return the map of genotypes */ private Map<String, byte[]> getGenotypes (int index) { Map<String, byte[]> genotypes = new HashMap<String, byte[]>(); for (String genomeName: this.genotypes.keySet()) { List<MGByteArray> byteArrays = this.genotypes.get(genomeName); byte[] array = new byte[byteArrays.size()]; for (int i = 0; i < array.length; i++) { array[i] = byteArrays.get(i).getByte(index); } genotypes.put(genomeName, array); } return genotypes; } /** * @return the maximum number of alternatives found in a line */ public int getMaxAlternativeNumber () { return alternatives.size(); } /** * Scan the genotype arrays of each sample in order to determine the widest haplotype. * @return the biggest haplotype (1 if haploide, 2 if diploide...), 0 otherwise */ public int getMaxGenotypeNumber () { int count = 0; for (List<MGByteArray> genotype: genotypes.values()) { if ((genotype != null) && (genotype.size() > 0)) { count = Math.max(count, genotype.size()); } } return count; } /** * @param index index of the {@link MGLineContent} * @return the {@link MGLineContent} for the given index */ public MGLineContent getPosition (int index) { MGLineContent position = new MGLineContent(); return getPosition(position, index); } /** * @param position a {@link MGLineContent} to update * @param index index of the {@link MGLineContent} * @return the {@link MGLineContent} for the given index */ public MGLineContent getPosition (MGLineContent position, int index) { position.setReferenceGenomePosition(positions.get(index)); position.setScore(scores.get(index)); position.setAlternatives(getAlternatives(index)); position.setGenotypes(getGenotypes(index)); return position; } /** * @return the positions */ public MGIntegerArray getPositions() { return positions; } /** * @param index position index on the list * @return the score for the given index, -1 otherwise */ public float getScore (int index) { if (index < getSize()) { return scores.get(index); } return -1; } /** * @return the scores */ public MGFloatArray getScores() { return scores; } /** * @return file content size (number of position) */ public int getSize () { return positions.size(); } /** * @return the variants */ public MGChromosomeVariants getVariants() { return variants; } @Override public Iterator<MGLineContent> iterator() { return new ChromosomeContentIterator(this); } /** * Print part of the {@link MGChromosomeContent} information * USED FOR DEVELOPMENT PURPOSE ONLY * @param start the index where to start * @param stop the index where to stop */ public void printChunkWithIndex (int start, int stop) { for (int i = start; i < stop; i++) { String info = ""; info += "[" + i + "]\t"; info += positions.get(i) + "\t"; info += scores.get(i) + "\t"; for (MGIntegerArray alternative: alternatives) { info += alternative.get(i) + "\t"; } System.out.println(info); } } /** * Print part of the {@link MGChromosomeContent} information * USED FOR DEVELOPMENT PURPOSE ONLY * @param start the position on the reference genome where to start printing * @param stop the position on the reference genome where to stop printing */ public void printChunkWithReferencePosition (int start, int stop) { System.out.println("MGChromosomeContent.printChunkWithReferencePosition()"); int index = positions.getIndex(start); if (index == -1) { System.out.println("No index has been found for the reference position: " + start); } else { boolean inBound = true; MGLineContent line = getPosition(index); while (inBound && (index < getSize())) { int referencePosition = line.getReferenceGenomePosition(); if ((referencePosition >= start) && (referencePosition <= stop)) { System.out.println(index + ": " + line.toString()); index++; line = getPosition(line, index); } else { inBound = false; } } } } /** * Method used for unserialization * @param in * @throws IOException * @throws ClassNotFoundException */ @SuppressWarnings("unchecked") private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.readInt(); chromosomeName = (String) in.readObject(); positions = (MGIntegerArray) in.readObject(); scores = (MGFloatArray) in.readObject(); alternatives = (List<MGIntegerArray>) in.readObject(); genotypes = (Map<String, List<MGByteArray>>) in.readObject(); variants = (MGChromosomeVariants) in.readObject(); } /** * Removes the variants */ public void removeVariants () { variants = null; } /** * Shows file content */ public void show () { String info = ""; int size = positions.size(); for (int i = 0; i < size; i++) { info += getPosition(i).toString() + "\n"; } System.out.println(info); if (variants != null) { variants.show(); } } /** * Method used for serialization * @param out * @throws IOException */ private void writeObject(ObjectOutputStream out) throws IOException { out.writeInt(SAVED_FORMAT_VERSION_NUMBER); out.writeObject(chromosomeName); out.writeObject(positions); out.writeObject(scores); out.writeObject(alternatives); out.writeObject(genotypes); out.writeObject(variants); } }