/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.VCF.VCFStatistics; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; /** * @author Nicolas Fourel * @version 0.1 */ public class VCFSampleFullStatistic implements Serializable, VCFSampleStatistics { /** Default generated serial version ID */ private static final long serialVersionUID = -1037070449560631967L; private static final int SAVED_FORMAT_VERSION_NUMBER = 0; // saved format version // Number of lines and columns private static final int LINE_NUMBER = 21; // Number of lines in the data object private static final int COLUMN_NUMBER = 4; // Number of columns in the data object // Column indexes private static final int SECTION_INDEX = 0; // Index for the section column private static final int NUMBER_INDEX = 1; // Index for the number column private static final int PERCENTAGE_SECTION_INDEX = 2; // Index for the section percentage column private static final int PERCENTAGE_TOTAL_INDEX = 3; // Index for the total percentage column // Line indexes private static final int VARIATION_INDEX = 0; private static final int SNP_VARIATION_INDEX = 1; private static final int INSERTION_VARIATION_INDEX = 2; private static final int INSERTION_INDEL_INDEX = 3; private static final int INSERTION_SV_INDEX = 4; private static final int DELETION_VARIATION_INDEX = 5; private static final int DELETION_INDEL_INDEX = 6; private static final int DELETION_SV_INDEX = 7; private static final int GENOTYPE_INDEX = 8; private static final int SNP_GENOTYPE_INDEX = 9; private static final int SNP_HOMOZYGOTE_INDEX = 10; private static final int SNP_HETEROZYGOTE_INDEX = 11; private static final int SNP_HEMIZYGOTE_INDEX = 12; private static final int INSERTION_GENOTYPE_INDEX = 13; private static final int INSERTION_HOMOZYGOTE_INDEX = 14; private static final int INSERTION_HETEROZYGOTE_INDEX = 15; private static final int INSERTION_HEMIZYGOTE_INDEX = 16; private static final int DELETION_GENOTYPE_INDEX = 17; private static final int DELETION_HOMOZYGOTE_INDEX = 18; private static final int DELETION_HETEROZYGOTE_INDEX = 19; private static final int DELETION_HEMIZYGOTE_INDEX = 20; // Column names private static final String SECTION_NAME = "Sections"; // Name for the section column private static final String NUMBER_NAME = "Number"; // Name for the number column private static final String PERCENTAGE_SECTION_NAME = "% on the section type"; // Name for the section percentage column private static final String PERCENTAGE_TOTAL_NAME = "% on the whole genome"; // Name for the total percentage column // Line names private static final String VARIATION_TITLE = "Number of variations"; private static final String GENOTYPE_TITLE = "Genotype variations"; private static final String SNP_LABEL = " SNP"; private static final String INSERTION_LABEL = " Insertion"; private static final String DELETION_LABEL = " Deletion"; private static final String HOMOZYGOTE_LABEL = " Homozygote"; private static final String HETEROZYGOTE_LABEL = " Heterozygote"; private static final String HEMIZYGOTE_LABEL = " Hemizygote"; private static final String INDEL_LABEL = " Indel"; private static final String SV_LABEL = " SV"; private Object[][] data; private String[][] dataDisplay; private int numberOfSNPs; private int numberOfShortInsertions; private int numberOfLongInsertions; private int numberOfShortDeletions; private int numberOfLongDeletions; private int numberOfHomozygoteSNPs; private int numberOfHomozygoteInsertions; private int numberOfHomozygoteDeletions; private int numberOfHeterozygoteSNPs; private int numberOfHeterozygoteInsertions; private int numberOfHeterozygoteDeletions; private int numberOfHemizygoteSNPs; private int numberOfHemizygoteInsertions; private int numberOfHemizygoteDeletions; /** * Method used for serialization * @param out * @throws IOException */ private void writeObject(ObjectOutputStream out) throws IOException { out.writeInt(SAVED_FORMAT_VERSION_NUMBER); out.writeObject(data); out.writeObject(dataDisplay); } /** * Method used for unserialization * @param in * @throws IOException * @throws ClassNotFoundException */ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.readInt(); data = (Object[][]) in.readObject(); dataDisplay = (String[][]) in.readObject(); } /** * Constructor of {@link VCFSampleFullStatistic} */ protected VCFSampleFullStatistic () { numberOfSNPs = 0; numberOfShortInsertions = 0; numberOfLongInsertions = 0; numberOfShortDeletions = 0; numberOfLongDeletions = 0; numberOfHomozygoteSNPs = 0; numberOfHomozygoteInsertions = 0; numberOfHomozygoteDeletions = 0; numberOfHeterozygoteSNPs = 0; numberOfHeterozygoteInsertions = 0; numberOfHeterozygoteDeletions = 0; numberOfHemizygoteSNPs = 0; numberOfHemizygoteInsertions = 0; numberOfHemizygoteDeletions = 0; data = null; dataDisplay = null; } @Override public String[] getColumnNamesForData () { String[] columnNames = {SECTION_NAME, NUMBER_NAME, PERCENTAGE_SECTION_NAME, PERCENTAGE_TOTAL_NAME}; return columnNames; } @Override public void processStatistics () { if (data == null) { data = new Object[LINE_NUMBER][COLUMN_NUMBER]; data[VARIATION_INDEX][SECTION_INDEX] = VARIATION_TITLE; data[SNP_VARIATION_INDEX][SECTION_INDEX] = SNP_LABEL; data[INSERTION_VARIATION_INDEX][SECTION_INDEX] = INSERTION_LABEL; data[INSERTION_INDEL_INDEX][SECTION_INDEX] = INDEL_LABEL; data[INSERTION_SV_INDEX][SECTION_INDEX] = SV_LABEL; data[DELETION_VARIATION_INDEX][SECTION_INDEX] = DELETION_LABEL; data[DELETION_INDEL_INDEX][SECTION_INDEX] = INDEL_LABEL; data[DELETION_SV_INDEX][SECTION_INDEX] = SV_LABEL; data[GENOTYPE_INDEX][SECTION_INDEX] = GENOTYPE_TITLE; data[SNP_GENOTYPE_INDEX][SECTION_INDEX] = SNP_LABEL; data[SNP_HETEROZYGOTE_INDEX][SECTION_INDEX] = HETEROZYGOTE_LABEL; data[SNP_HOMOZYGOTE_INDEX][SECTION_INDEX] = HOMOZYGOTE_LABEL; data[SNP_HEMIZYGOTE_INDEX][SECTION_INDEX] = HEMIZYGOTE_LABEL; data[INSERTION_GENOTYPE_INDEX][SECTION_INDEX] = INSERTION_LABEL; data[INSERTION_HETEROZYGOTE_INDEX][SECTION_INDEX] = HETEROZYGOTE_LABEL; data[INSERTION_HOMOZYGOTE_INDEX][SECTION_INDEX] = HOMOZYGOTE_LABEL; data[INSERTION_HEMIZYGOTE_INDEX][SECTION_INDEX] = HEMIZYGOTE_LABEL; data[DELETION_GENOTYPE_INDEX][SECTION_INDEX] = DELETION_LABEL; data[DELETION_HETEROZYGOTE_INDEX][SECTION_INDEX] = HETEROZYGOTE_LABEL; data[DELETION_HOMOZYGOTE_INDEX][SECTION_INDEX] = HOMOZYGOTE_LABEL; data[DELETION_HEMIZYGOTE_INDEX][SECTION_INDEX] = HEMIZYGOTE_LABEL; int totalSNP = numberOfSNPs; int totalInsertion = numberOfShortInsertions + numberOfLongInsertions; int totalDeletion = numberOfShortDeletions + numberOfLongDeletions; int totalVariation = totalSNP + totalInsertion + totalDeletion; int totalGTSNP = numberOfHeterozygoteSNPs + numberOfHomozygoteSNPs + numberOfHemizygoteSNPs; int totalGTInsertion = numberOfHeterozygoteInsertions + numberOfHomozygoteInsertions + numberOfHemizygoteInsertions; int totalGTDeletion = numberOfHeterozygoteDeletions + numberOfHomozygoteDeletions + numberOfHemizygoteDeletions; int totalGT = totalGTSNP + totalGTInsertion + totalGTDeletion; data[VARIATION_INDEX][NUMBER_INDEX] = totalVariation; data[SNP_VARIATION_INDEX][NUMBER_INDEX] = totalSNP; data[INSERTION_VARIATION_INDEX][NUMBER_INDEX] = totalInsertion; data[INSERTION_INDEL_INDEX][NUMBER_INDEX] = numberOfShortInsertions; data[INSERTION_SV_INDEX][NUMBER_INDEX] = numberOfLongInsertions; data[DELETION_VARIATION_INDEX][NUMBER_INDEX] = totalDeletion; data[DELETION_INDEL_INDEX][NUMBER_INDEX] = numberOfShortDeletions; data[DELETION_SV_INDEX][NUMBER_INDEX] = numberOfLongDeletions; data[GENOTYPE_INDEX][NUMBER_INDEX] = totalGT; data[SNP_GENOTYPE_INDEX][NUMBER_INDEX] = totalGTSNP; data[SNP_HETEROZYGOTE_INDEX][NUMBER_INDEX] = numberOfHeterozygoteSNPs; data[SNP_HOMOZYGOTE_INDEX][NUMBER_INDEX] = numberOfHomozygoteSNPs; data[SNP_HEMIZYGOTE_INDEX][NUMBER_INDEX] = numberOfHemizygoteSNPs; data[INSERTION_GENOTYPE_INDEX][NUMBER_INDEX] = totalGTInsertion; data[INSERTION_HETEROZYGOTE_INDEX][NUMBER_INDEX] = numberOfHeterozygoteInsertions; data[INSERTION_HOMOZYGOTE_INDEX][NUMBER_INDEX] = numberOfHomozygoteInsertions; data[INSERTION_HEMIZYGOTE_INDEX][NUMBER_INDEX] = numberOfHemizygoteInsertions; data[DELETION_GENOTYPE_INDEX][NUMBER_INDEX] = totalGTDeletion; data[DELETION_HETEROZYGOTE_INDEX][NUMBER_INDEX] = numberOfHeterozygoteDeletions; data[DELETION_HOMOZYGOTE_INDEX][NUMBER_INDEX] = numberOfHomozygoteDeletions; data[DELETION_HEMIZYGOTE_INDEX][NUMBER_INDEX] = numberOfHemizygoteDeletions; data[VARIATION_INDEX][PERCENTAGE_SECTION_INDEX] = "100"; data[SNP_VARIATION_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(SNP_VARIATION_INDEX), totalVariation); data[INSERTION_VARIATION_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_VARIATION_INDEX), totalVariation); data[INSERTION_INDEL_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_INDEL_INDEX), totalInsertion); data[INSERTION_SV_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_SV_INDEX), totalInsertion); data[DELETION_VARIATION_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_VARIATION_INDEX), totalVariation); data[DELETION_INDEL_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_INDEL_INDEX), totalDeletion); data[DELETION_SV_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_SV_INDEX), totalDeletion); data[GENOTYPE_INDEX][PERCENTAGE_SECTION_INDEX] = "100"; data[SNP_GENOTYPE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(SNP_GENOTYPE_INDEX), totalGT); data[SNP_HETEROZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(SNP_HETEROZYGOTE_INDEX), totalGTSNP); data[SNP_HOMOZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(SNP_HOMOZYGOTE_INDEX), totalGTSNP); data[SNP_HEMIZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(SNP_HEMIZYGOTE_INDEX), totalGTSNP); data[INSERTION_GENOTYPE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_GENOTYPE_INDEX), totalGT); data[INSERTION_HETEROZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_HETEROZYGOTE_INDEX), totalGTInsertion); data[INSERTION_HOMOZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_HOMOZYGOTE_INDEX), totalGTInsertion); data[INSERTION_HEMIZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_HEMIZYGOTE_INDEX), totalGTInsertion); data[DELETION_GENOTYPE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_GENOTYPE_INDEX), totalGT); data[DELETION_HETEROZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_HETEROZYGOTE_INDEX), totalGTDeletion); data[DELETION_HOMOZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_HOMOZYGOTE_INDEX), totalGTDeletion); data[DELETION_HEMIZYGOTE_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_HEMIZYGOTE_INDEX), totalGTDeletion); data[VARIATION_INDEX][PERCENTAGE_TOTAL_INDEX] = "100"; data[SNP_VARIATION_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(SNP_VARIATION_INDEX), totalVariation); data[INSERTION_VARIATION_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_VARIATION_INDEX), totalVariation); data[INSERTION_INDEL_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_INDEL_INDEX), totalVariation); data[INSERTION_SV_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_SV_INDEX), totalVariation); data[DELETION_VARIATION_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_VARIATION_INDEX), totalVariation); data[DELETION_INDEL_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_INDEL_INDEX), totalVariation); data[DELETION_SV_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_SV_INDEX), totalVariation); data[GENOTYPE_INDEX][PERCENTAGE_TOTAL_INDEX] = "100"; data[SNP_GENOTYPE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(SNP_GENOTYPE_INDEX), totalGT); data[SNP_HETEROZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(SNP_HETEROZYGOTE_INDEX), totalGT); data[SNP_HOMOZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(SNP_HOMOZYGOTE_INDEX), totalGT); data[SNP_HEMIZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(SNP_HEMIZYGOTE_INDEX), totalGT); data[INSERTION_GENOTYPE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_GENOTYPE_INDEX), totalGT); data[INSERTION_HETEROZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_HETEROZYGOTE_INDEX), totalGT); data[INSERTION_HOMOZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_HOMOZYGOTE_INDEX), totalGT); data[INSERTION_HEMIZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_HEMIZYGOTE_INDEX), totalGT); data[DELETION_GENOTYPE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_GENOTYPE_INDEX), totalGT); data[DELETION_HETEROZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_HETEROZYGOTE_INDEX), totalGT); data[DELETION_HOMOZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_HOMOZYGOTE_INDEX), totalGT); data[DELETION_HEMIZYGOTE_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_HEMIZYGOTE_INDEX), totalGT); } formatData(); } /** * @param value the value * @param total the total * @return the percentage between the value and its total, 0 otherwise */ private int getPercentage (int value, int total) { int result = 0; if ((total == 0) && (value == total)) { result = 100; } else { try { result = (value * 100) / total; } catch (Exception e) {} } return result; } /** * @param indexLine index of a line * @return the integer located in the column containing the number, -1 otherwise */ @Override public int getDataInt (int indexLine) { return getDataInt(indexLine, NUMBER_INDEX); } /** * @param indexLine index of a line * @param indexColumn index of a column * @return the associated integer, -1 otherwise */ private int getDataInt (int indexLine, int indexColumn) { int result = -1; try { result = Integer.parseInt(data[indexLine][indexColumn].toString()); } catch (Exception e) {} return result; } /** * Format the data for display purposes to the dataDisplay attribute. */ private void formatData () { if (data != null) { dataDisplay = new String[LINE_NUMBER][COLUMN_NUMBER]; for (int row = 0; row < LINE_NUMBER; row++) { for (int col = 0; col < COLUMN_NUMBER; col++) { if (col == SECTION_INDEX) { dataDisplay[row][col] = data[row][col].toString(); } else { dataDisplay[row][col] = VCFFileFullStatistic.getNumberFormat(data[row][col]); } } } } } @Override public Object[][] getData() { return data; } @Override public String[][] getDisplayData() { return dataDisplay; } @Override public void incrementNumberOfSNPs() { this.numberOfSNPs++; } @Override public void incrementNumberOfShortInsertions() { this.numberOfShortInsertions++; } @Override public void incrementNumberOfLongInsertions() { this.numberOfLongInsertions++; } @Override public void incrementNumberOfShortDeletions() { this.numberOfShortDeletions++; } @Override public void incrementNumberOfLongDeletions() { this.numberOfLongDeletions++; } @Override public void incrementNumberOfHomozygoteSNPs() { this.numberOfHomozygoteSNPs++; } @Override public void incrementNumberOfHomozygoteInsertions() { this.numberOfHomozygoteInsertions++; } @Override public void incrementNumberOfHomozygoteDeletions() { this.numberOfHomozygoteDeletions++; } @Override public void incrementNumberOfHeterozygoteSNPs() { this.numberOfHeterozygoteSNPs++; } @Override public void incrementNumberOfHeterozygoteInsertions() { this.numberOfHeterozygoteInsertions++; } @Override public void incrementNumberOfHeterozygoteDeletions() { this.numberOfHeterozygoteDeletions++; } @Override public void incrementNumberOfHemizygoteSNPs() { this.numberOfHemizygoteSNPs++; } @Override public void incrementNumberOfHemizygoteInsertions() { this.numberOfHemizygoteInsertions++; } @Override public void incrementNumberOfHemizygoteDeletions() { this.numberOfHemizygoteDeletions++; } @Override public void show () { String info = ""; info += SECTION_NAME + "\t" + NUMBER_NAME + "\t" + PERCENTAGE_SECTION_NAME + "\t" + PERCENTAGE_TOTAL_NAME + "\n"; for (int i = 0; i < LINE_NUMBER; i++) { for (int j = 0; j < COLUMN_NUMBER; j++) { info += data[i][j]; if (j < (COLUMN_NUMBER - 1)) { info += "\t"; } } info += "\n"; } System.out.println(info); } @Override public String getString () { String info = ""; info += SECTION_NAME + "\t" + NUMBER_NAME + "\t" + PERCENTAGE_SECTION_NAME + "\t" + PERCENTAGE_TOTAL_NAME + "\n"; for (int i = 0; i < LINE_NUMBER; i++) { for (int j = 0; j < COLUMN_NUMBER; j++) { info += data[i][j]; if (j < (COLUMN_NUMBER - 1)) { info += "\t"; } } if (i < (LINE_NUMBER - 1)) { info += "\n"; } } return info; } @Override public String getFullString() { return ""; } }