/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.VCF.VCFStatistics; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.HashMap; import java.util.Map; import edu.yu.einstein.genplay.util.NumberFormats; /** * @author Nicolas Fourel * @version 0.1 */ public class VCFFileFullStatistic implements Serializable, VCFFileStatistics { /** Default generated serial version ID */ private static final long serialVersionUID = -1037070449560631967L; private static final int SAVED_FORMAT_VERSION_NUMBER = 0; // saved format version // Number of lines and columns private static final int LINE_NUMBER = 8; // Number of lines in the data object private static final int COLUMN_NUMBER = 4; // Number of columns in the data object // Column indexes private static final int SECTION_INDEX = 0; // Index for the section column private static final int NUMBER_INDEX = 1; // Index for the number column private static final int PERCENTAGE_SECTION_INDEX = 2; // Index for the section percentage column private static final int PERCENTAGE_TOTAL_INDEX = 3; // Index for the total percentage column // Line indexes private static final int LINE_INDEX = 0; // Index for the line section private static final int SNP_INDEX = 1; // Index for the SNP section private static final int INSERTION_INDEX = 2; // Index for the Insertion section private static final int INSERTION_INDEL_INDEX = 3; // Index for the Insertion indels sub-section private static final int INSERTION_SV_INDEX = 4; // Index for the Insertion SV sub-section private static final int DELETION_INDEX = 5; // Index for the Deletion section private static final int DELETION_INDEL_INDEX = 6; // Index for the Deletion indels sub-section private static final int DELETION_SV_INDEX = 7; // Index for the Deletion SV sub-section // Column names private static final String SECTION_NAME = "Sections"; // Name for the section column private static final String NUMBER_NAME = "Number"; // Name for the number column private static final String PERCENTAGE_SECTION_NAME = "% on the section type"; // Name for the section percentage column private static final String PERCENTAGE_TOTAL_NAME = "% on the whole genome"; // Name for the total percentage column // Line names private static final String LINE_NAME = "Line"; // Name for the line section private static final String SNP_NAME = "SNP"; // Name for the SNP section private static final String INSERTION_NAME = "Insertion"; // Name for the Insertion section private static final String INSERTION_INDEL_NAME = " Short (indels)"; // Name for the Insertion indels sub-section private static final String INSERTION_SV_NAME = " Long (SV)"; // Name for the Insertion SV sub-section private static final String DELETION_NAME = "Deletion"; // Name for the Deletion section private static final String DELETION_INDEL_NAME = " Short (indels)"; // Name for the Deletion indels sub-section private static final String DELETION_SV_NAME = " Long (SV)"; // Name for the Deletion SV sub-section /** * @param number an int * @return the local formatted value of the given int */ protected static String getNumberFormat (Object o) { Integer number = null; try { number = Integer.parseInt(o.toString()); } catch (Exception e) { return o.toString(); } return NumberFormats.getPositionFormat().format(number); } private Object[][] data; private String[][] dataDisplay; private Map<String, VCFSampleStatistics> genomeStatistics; private int numberOfSNPs; private int numberOfShortInsertions; private int numberOfLongInsertions; private int numberOfShortDeletions; private int numberOfLongDeletions; private int numberOfLines; /** * Constructor of {@link VCFFileFullStatistic} */ public VCFFileFullStatistic () { genomeStatistics = new HashMap<String, VCFSampleStatistics>(); numberOfSNPs = 0; numberOfShortInsertions = 0; numberOfLongInsertions = 0; numberOfShortDeletions = 0; numberOfLongDeletions = 0; numberOfLines = 0; data = null; dataDisplay = null; } @Override public void addGenomeName (String genomeName) { if (!genomeStatistics.containsKey(genomeName)) { genomeStatistics.put(genomeName, new VCFSampleFullStatistic()); } } /** * Format the data for display purposes to the dataDisplay attribute. */ private void formatData () { if (data != null) { dataDisplay = new String[LINE_NUMBER][COLUMN_NUMBER]; for (int row = 0; row < LINE_NUMBER; row++) { for (int col = 0; col < COLUMN_NUMBER; col++) { if (col == SECTION_INDEX) { dataDisplay[row][col] = data[row][col].toString(); } else { dataDisplay[row][col] = getNumberFormat(data[row][col]); } } } } } @Override public String[] getColumnNamesForData () { String[] columnNames = {SECTION_NAME, NUMBER_NAME, PERCENTAGE_SECTION_NAME, PERCENTAGE_TOTAL_NAME}; return columnNames; } @Override public Object[][] getData() { return data; } @Override public int getDataInt (int indexLine) { return getDataInt(indexLine, NUMBER_INDEX); } /** * @param indexLine index of a line * @param indexColumn index of a column * @return the associated integer, -1 otherwise */ private int getDataInt (int indexLine, int indexColumn) { int result = -1; try { result = Integer.parseInt(data[indexLine][indexColumn].toString()); } catch (Exception e) {} return result; } @Override public String[][] getDisplayData() { return dataDisplay; } @Override public String getFullString () { String info = getString(); for (String sample: genomeStatistics.keySet()) { info += "\nGenome Statistics \"" + sample + "\":\n" + genomeStatistics.get(sample).getString(); } return info; } @Override public Map<String, VCFSampleStatistics> getGenomeStatistics() { return genomeStatistics; } /** * @param value the value * @param total the total * @return the percentage between the value and its total, 0 otherwise */ private int getPercentage (int value, int total) { int result = 0; if ((total == 0) && (value == total)) { result = 100; } else { try { result = (value * 100) / total; } catch (Exception e) {} } return result; } @Override public VCFSampleStatistics getSampleStatistics (String sample) { return genomeStatistics.get(sample); } @Override public String getString() { String info = ""; info += "File Statistics:\n"; info += SECTION_NAME + "\t" + NUMBER_NAME + "\t" + PERCENTAGE_SECTION_NAME + "\t" + PERCENTAGE_TOTAL_NAME + "\n"; for (int i = 0; i < LINE_NUMBER; i++) { for (int j = 0; j < COLUMN_NUMBER; j++) { info += data[i][j]; if (j < (COLUMN_NUMBER - 1)) { info += "\t"; } } if (i < (LINE_NUMBER - 1)) { info += "\n"; } } return info; } @Override public void incrementNumberOfLines() { numberOfLines++; } @Override public void incrementNumberOfLongDeletions() { numberOfLongDeletions++; } @Override public void incrementNumberOfLongInsertions() { numberOfLongInsertions++; } @Override public void incrementNumberOfShortDeletions() { numberOfShortDeletions++; } @Override public void incrementNumberOfShortInsertions() { numberOfShortInsertions++; } @Override public void incrementNumberOfSNPs() { numberOfSNPs++; } @Override public void processStatistics () { if (data == null) { data = new Object[LINE_NUMBER][COLUMN_NUMBER]; data[LINE_INDEX][SECTION_INDEX] = LINE_NAME; data[SNP_INDEX][SECTION_INDEX] = SNP_NAME; data[INSERTION_INDEX][SECTION_INDEX] = INSERTION_NAME; data[INSERTION_INDEL_INDEX][SECTION_INDEX] = INSERTION_INDEL_NAME; data[INSERTION_SV_INDEX][SECTION_INDEX] = INSERTION_SV_NAME; data[DELETION_INDEX][SECTION_INDEX] = DELETION_NAME; data[DELETION_INDEL_INDEX][SECTION_INDEX] = DELETION_INDEL_NAME; data[DELETION_SV_INDEX][SECTION_INDEX] = DELETION_SV_NAME; data[LINE_INDEX][NUMBER_INDEX] = numberOfLines; data[SNP_INDEX][NUMBER_INDEX] = numberOfSNPs; data[INSERTION_INDEX][NUMBER_INDEX] = numberOfShortInsertions + numberOfLongInsertions; data[INSERTION_INDEL_INDEX][NUMBER_INDEX] = numberOfShortInsertions; data[INSERTION_SV_INDEX][NUMBER_INDEX] = numberOfLongInsertions; data[DELETION_INDEX][NUMBER_INDEX] = numberOfShortDeletions + numberOfLongDeletions; data[DELETION_INDEL_INDEX][NUMBER_INDEX] = numberOfShortDeletions; data[DELETION_SV_INDEX][NUMBER_INDEX] = numberOfLongDeletions; data[LINE_INDEX][PERCENTAGE_SECTION_INDEX] = "-"; data[SNP_INDEX][PERCENTAGE_SECTION_INDEX] = "100"; data[INSERTION_INDEX][PERCENTAGE_SECTION_INDEX] = "100"; data[INSERTION_INDEL_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_INDEL_INDEX), getDataInt(INSERTION_INDEX)); data[INSERTION_SV_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(INSERTION_SV_INDEX), getDataInt(INSERTION_INDEX)); data[DELETION_INDEX][PERCENTAGE_SECTION_INDEX] = "100"; data[DELETION_INDEL_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_INDEL_INDEX), getDataInt(DELETION_INDEX)); data[DELETION_SV_INDEX][PERCENTAGE_SECTION_INDEX] = getPercentage(getDataInt(DELETION_SV_INDEX), getDataInt(DELETION_INDEX)); int totalVariation = getDataInt(SNP_INDEX) + getDataInt(INSERTION_INDEX) + getDataInt(DELETION_INDEX); data[LINE_INDEX][PERCENTAGE_TOTAL_INDEX] = "-"; data[SNP_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(SNP_INDEX), totalVariation); data[INSERTION_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_INDEX), totalVariation); data[INSERTION_INDEL_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_INDEL_INDEX), totalVariation); data[INSERTION_SV_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(INSERTION_SV_INDEX), totalVariation); data[DELETION_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_INDEX), totalVariation); data[DELETION_INDEL_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_INDEL_INDEX), totalVariation); data[DELETION_SV_INDEX][PERCENTAGE_TOTAL_INDEX] = getPercentage(getDataInt(DELETION_SV_INDEX), totalVariation); } formatData(); for (VCFSampleStatistics sampleStatistics: genomeStatistics.values()) { sampleStatistics.processStatistics(); } } /** * Method used for unserialization * @param in * @throws IOException * @throws ClassNotFoundException */ @SuppressWarnings("unchecked") private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.readInt(); data = (Object[][]) in.readObject(); dataDisplay = (String[][]) in.readObject(); genomeStatistics = (Map<String, VCFSampleStatistics>) in.readObject(); } @Override public void show () { String info = ""; info += "File Statistics\n"; info += SECTION_NAME + "\t" + NUMBER_NAME + "\t" + PERCENTAGE_SECTION_NAME + "\t" + PERCENTAGE_TOTAL_NAME + "\n"; for (int i = 0; i < LINE_NUMBER; i++) { for (int j = 0; j < COLUMN_NUMBER; j++) { info += data[i][j]; if (j < (COLUMN_NUMBER - 1)) { info += "\t"; } } info += "\n"; } System.out.println("===== FILE Statistics"); System.out.println(info); for (String sample: genomeStatistics.keySet()) { System.out.println("===== " + sample + " Statistics"); genomeStatistics.get(sample).show(); } } /** * Method used for serialization * @param out * @throws IOException */ private void writeObject(ObjectOutputStream out) throws IOException { out.writeInt(SAVED_FORMAT_VERSION_NUMBER); out.writeObject(data); out.writeObject(dataDisplay); out.writeObject(genomeStatistics); } }