/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.IO.writer.geneListWriter; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.text.NumberFormat; import edu.yu.einstein.genplay.core.IO.utils.TrackLineHeader; import edu.yu.einstein.genplay.core.manager.project.ProjectManager; import edu.yu.einstein.genplay.core.multiGenome.utils.FormattedMultiGenomeName; import edu.yu.einstein.genplay.core.multiGenome.utils.ShiftCompute; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; import edu.yu.einstein.genplay.dataStructure.gene.Gene; import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.geneList.GeneList; import edu.yu.einstein.genplay.dataStructure.list.listView.ListView; import edu.yu.einstein.genplay.dataStructure.scoredChromosomeWindow.ScoredChromosomeWindow; import edu.yu.einstein.genplay.gui.statusBar.Stoppable; import edu.yu.einstein.genplay.util.NumberFormats; /** * Writes {@link GeneList} data into BED files. * @author Julien Lajugie */ public final class GeneListAsBedWriter extends GeneListWriter implements Stoppable { /** * @param gene a Gene * @return true if the gene is scored, false otherwise */ private final static boolean areExonsScored(Gene gene) { for (ScoredChromosomeWindow currentExon: gene.getExons()) { if (!Float.isNaN(currentExon.getScore())) { return true; } } return false; } /** * @param gene a Gene * @param chromosome the chromosome of the gene * @return a bed format string representing the specified gene */ public final static String geneToString(Gene gene, Chromosome chromosome) { return geneToString(gene, chromosome, gene.getStart(), gene.getStop(), gene.getUTR5Bound(), gene.getUTR3Bound(), true); } /** * @param gene a gene * @param chromosome the chromosome of the gene * @param start the start position of the gene after translation on appropriate genome (for multigenome) * @param stop the stop position of the gene after translation on appropriate genome (for multigenome) * @param UTR5Bound the UTR5 bound position of the gene after translation on appropriate genome (for multigenome) * @param UTR3Bound the UTR3 bound position of the gene after translation on appropriate genome (for multigenome) * @param isGeneListScored true if the gene is scores * @return a bed format string representing the specified gene */ private final static String geneToString(Gene gene, Chromosome chromosome, int start, int stop, int UTR5Bound, int UTR3Bound, boolean isGeneListScored) { String lineToPrint = ""; // retrieve the number formats for the scores NumberFormat numberFormat = NumberFormats.getWriterScoreFormat(); lineToPrint = chromosome.toString(); lineToPrint += "\t"; lineToPrint += start; lineToPrint += "\t"; lineToPrint += stop; lineToPrint += "\t"; lineToPrint += gene.getName(); lineToPrint += "\t"; // add the RPKM of the gene for the score if there is one if (!isGeneListScored) { lineToPrint += "1"; } else { float score = gene.getScore(); if (Float.isNaN(score)) { // if there is no score for the gene we put a default 1 lineToPrint += "0"; } else { lineToPrint += numberFormat.format(score); } } lineToPrint += "\t"; lineToPrint += gene.getStrand().toString(); lineToPrint += "\t"; lineToPrint += UTR5Bound; lineToPrint += "\t"; lineToPrint += UTR3Bound; // add "-" for itemRgb lineToPrint += "\t-\t"; if (gene.getExons() == null) { lineToPrint += "-\t-\t-"; } else { // exon count lineToPrint += gene.getExons().size(); lineToPrint += "\t"; // exon lengths for (int i = 0; i < gene.getExons().size(); i++) { lineToPrint += gene.getExons().get(i).getStop() - gene.getExons().get(i).getStart(); lineToPrint += ","; } // remove last comma lineToPrint = lineToPrint.substring(0, lineToPrint.length() - 1); lineToPrint += "\t"; // exon starts for (ScoredChromosomeWindow currentExon: gene.getExons()) { int currentStart = currentExon.getStart(); lineToPrint += currentStart - gene.getStart(); lineToPrint += ","; } // remove last comma lineToPrint = lineToPrint.substring(0, lineToPrint.length() - 1); // exon scores if (areExonsScored(gene)) { lineToPrint += "\t"; for (ScoredChromosomeWindow currentExon: gene.getExons()) { float currentScore = currentExon.getScore(); if (Float.isNaN(currentScore)) { currentScore = 0; } lineToPrint += numberFormat.format(currentScore); lineToPrint += ","; } // remove last comma lineToPrint = lineToPrint.substring(0, lineToPrint.length() - 1); } } return lineToPrint; } private boolean needsToBeStopped = false; // true if the writer needs to be stopped private boolean isGeneListScored = false; // true if the gene list is scored /** * Creates an instance of {@link GeneListAsBedWriter}. * @param outputFile output {@link File} * @param data {@link GeneList} to write * @param name a name for the {@link GeneList} */ public GeneListAsBedWriter(File outputFile, GeneList data, String name) { super(outputFile, data, name); } /** * @return true if the genes are scored, false otherwise */ private boolean isGeneListScored() { for (ListView<Gene> currentList : data) { if (currentList != null) { for (Gene currentGene : currentList) { Float score = currentGene.getScore(); if ((score != null) && (score != 0)) { return true; } } } } return false; } /** * Stops the writer while it's writing a file */ @Override public void stop() { needsToBeStopped = true; } @Override public void write() throws IOException, InterruptedException { isGeneListScored = isGeneListScored(); BufferedWriter writer = null; try { boolean isMultiGenome = ProjectManager.getInstance().isMultiGenomeProject() && (fullGenomeName != null) && (allele != null); // try to create a output file writer = new BufferedWriter(new FileWriter(outputFile)); // print the "track" header of the file TrackLineHeader trackLineHeader = new TrackLineHeader(); // header "track" line trackLineHeader.setName(name); trackLineHeader.setGeneDBURL(data.getGeneDBURL()); trackLineHeader.setGeneScoreType(data.getGeneScoreType()); String trackLine = trackLineHeader.generateTrackLine(); if (trackLineHeader != null) { writer.write(trackLine); writer.newLine(); } // print the data for (Chromosome currentChromosome: projectChromosomes) { ListView<Gene> currentList = data.get(currentChromosome); if (currentList != null) { for (Gene currentGene : currentList) { // if the operation need to be stopped we close the writer and delete the file if (needsToBeStopped) { writer.close(); outputFile.delete(); throw new InterruptedException(); } int start = currentGene.getStart(); int stop = currentGene.getStop(); int UTR5Bound = currentGene.getUTR5Bound(); int UTR3Bound = currentGene.getUTR3Bound(); if (isMultiGenome) { start = ShiftCompute.getPosition(FormattedMultiGenomeName.META_GENOME_NAME, allele, start, currentChromosome, fullGenomeName); stop = ShiftCompute.getPosition(FormattedMultiGenomeName.META_GENOME_NAME, allele, stop, currentChromosome, fullGenomeName); UTR5Bound = ShiftCompute.getPosition(FormattedMultiGenomeName.META_GENOME_NAME, allele, UTR5Bound, currentChromosome, fullGenomeName); UTR3Bound = ShiftCompute.getPosition(FormattedMultiGenomeName.META_GENOME_NAME, allele, UTR3Bound, currentChromosome, fullGenomeName); } // we subtract 1 because positions in bed files are 0 based and GenPlay positions are 1-based start--; stop--; UTR5Bound--; UTR3Bound--; if ((start > -1) && (stop > -1)) { String lineToPrint = geneToString(currentGene, currentChromosome, start, stop, UTR5Bound, UTR3Bound, isGeneListScored); writer.write(lineToPrint); writer.newLine(); } } } } } finally { if (writer != null) { writer.close(); } } } }