/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.operation.VCF; import java.io.IOException; import java.util.Collections; import java.util.List; import edu.yu.einstein.genplay.core.multiGenome.VCF.BGZIPReader; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile; import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.FileScannerInterface; import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.ManualVCFReader; import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.SingleFileScanner; import edu.yu.einstein.genplay.core.multiGenome.utils.FormattedMultiGenomeName; import edu.yu.einstein.genplay.util.Utils; /** * This method exports a VCF track into a VCF file. * * @author Nicolas Fourel * @version 0.1 */ public class MGOVCFExportSingleFile extends ExportVCFEngine { @Override protected boolean canStart() throws Exception { List<VCFFile> fileList = getFileList(); if (fileList.size() == 1) { fileScanner = new SingleFileScanner(this); return true; } System.err.println("VCFExportEngineSingleFile.canStart() number of files invalid: " + fileList.size()); return false; } @Override public void processLine(FileScannerInterface fileAlgorithm) throws IOException { ManualVCFReader vcfReader = fileAlgorithm.getCurrentVCFReader(); VCFLine currentLine = fileAlgorithm.getCurrentLine(); data.writeObject(buildLine(vcfReader.getReader(), vcfReader.getAllValidIndex(), fileAlgorithm.getGenomeList(), vcfReader.getAllValidGenome()) + "\n"); // We have to add the line headerHandler.processLine(fileAlgorithm.getCurrentVCFFile(), currentLine.getALT(), currentLine.getFILTER(), currentLine.getINFO(), currentLine.getFORMAT()); // Checks the line in order to update IDs for the header } @Override protected void createHeader() throws IOException { // Initialize the reader BGZIPReader reader = new BGZIPReader(getFileList().get(0)); // Initialize the list of genomes List<String> genomeList = getGenomeList(); // Gets the meta data header = reader.getMetaDataHeader() + "\n"; // Gets the fields data header += headerHandler.getFieldHeader() + "\n"; // Gets the column names line header += reader.getFixedColumns(); for (int i = 0; i < genomeList.size(); i++) { header += FormattedMultiGenomeName.getRawName(genomeList.get(i)); if (i < (genomeList.size() - 1)) { header += "\t"; } } } /** * Builds the line to insert in the output. * The ALT field contains information only about the required variations. * The genome fields are native fields. Genomes that don't define any required variation will have a empty field: ./. * All other fields are the ones from the native line. * @param reader the file reader * @param indexes the indexes referring to the correct alternatives * @param fullGenomesList the list of all required genomes * @param genomes the list of genomes that have information matching requirements * @return the line to insert */ private String buildLine (BGZIPReader reader, List<Integer> indexes, List<String> fullGenomesList, List<String> genomes) { String result = ""; VCFLine line = reader.getCurrentLine(); result += line.getCHROM() + "\t"; result += line.getPOS() + "\t"; result += line.getID() + "\t"; result += line.getREF() + "\t"; result += buildALTField(indexes, line.getALT()) + "\t"; result += line.getQUAL() + "\t"; result += line.getFILTER() + "\t"; result += line.getINFO() + "\t"; result += line.getFORMAT() + "\t"; for (int i = 0; i < fullGenomesList.size(); i++) { String currentGenome = fullGenomesList.get(i); if (genomes.contains(currentGenome)) { result += line.getField(reader.getIndexFromGenome(genomes.get(i))); } else { result += "./."; } if (i < (fullGenomesList.size() - 1)) { result += "\t"; } } return result; } /** * Build the ALT field. * It contains only alternatives matching the requirements * @param indexes the list of indexes referring to the alternatives * @param alt the native ALT field * @return the ALT field to insert */ private String buildALTField (List<Integer> indexes, String alt) { Collections.sort(indexes); String[] alternatives = Utils.split(alt, ','); String result = ""; for (int i = 0; i < indexes.size(); i++) { int altIndex = indexes.get(i); if (altIndex > -1) { result += alternatives[altIndex]; if (i < (indexes.size() - 1)) { result += ","; } } } return result; } @Override public void processLine(VCFLine src, VCFLine dest) throws IOException {} }