/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.operation.BED; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFHeaderType.VCFHeaderAdvancedType; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFHeaderType.VCFHeaderType; import edu.yu.einstein.genplay.core.multiGenome.operation.ExportEngine; import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.FileScannerInterface; import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.SingleFileScanner; import edu.yu.einstein.genplay.core.multiGenome.utils.VCFLineUtility; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; import edu.yu.einstein.genplay.dataStructure.enums.AlleleType; import edu.yu.einstein.genplay.dataStructure.enums.CoordinateSystemType; import edu.yu.einstein.genplay.dataStructure.enums.VCFColumnName; import edu.yu.einstein.genplay.util.Utils; /** * Operation to export a VCF track as a BED file. * This will create new file(s). * * @author Nicolas Fourel * @version 0.1 */ public class MGOBedExportSingleFile extends ExportEngine { private final String fullGenomeName; // The genome to export. private final AlleleType allele; // The allele(s) to export. private final VCFHeaderType header; // The header field to use as a score. private final CoordinateSystemType coordinateSystem; // The coordinate system to export the positions. private List<AlleleSettingsBedExport> fullAlleleList; // The full list of allele settings helper. private List<AlleleSettingsBedExport> alleleListToExport; // The list of allele settings helper to use. private int genomeIndex; // The index of the genome in the file to export. /** * Constructor of {@link MGOBedExportSingleFile} * @param fullGenomeName the full genome name of the genome to export * @param allele the allele type to export * @param header the header to use as a score * @param coordinateSystem the coordinate system of the position to export the data */ public MGOBedExportSingleFile (String fullGenomeName, AlleleType allele, VCFHeaderType header, CoordinateSystemType coordinateSystem) { this.fullGenomeName = fullGenomeName; this.allele = allele; this.header = header; this.coordinateSystem = coordinateSystem; } @Override protected boolean canStart() throws Exception { List<VCFFile> fileList = getFileList(); if (fileList.size() == 1) { fileScanner = new SingleFileScanner(this); initializeAlleleList(); return true; } else { System.err.println("BedExportEngineSingleFile.canStart() number of files invalid: " + fileList.size()); } return false; } /** * Initialize the list of allele */ private void initializeAlleleList () { fullAlleleList = new ArrayList<AlleleSettingsBedExport>(); fullAlleleList.add(new AlleleSettingsBedExport(path, AlleleType.ALLELE01, coordinateSystem)); fullAlleleList.add(new AlleleSettingsBedExport(path, AlleleType.ALLELE02, coordinateSystem)); alleleListToExport = new ArrayList<AlleleSettingsBedExport>(); //if (coordinateSystem == CoordinateSystemType.CURRENT_GENOME) { if (allele == AlleleType.BOTH) { alleleListToExport.add(fullAlleleList.get(0)); alleleListToExport.add(fullAlleleList.get(1)); } else if (allele == AlleleType.ALLELE01) { alleleListToExport.add(fullAlleleList.get(0)); } else if (allele == AlleleType.ALLELE02) { alleleListToExport.add(fullAlleleList.get(1)); } /*} else { alleleListToExport.add(fullAlleleList.get(0)); }*/ } @Override protected void process() throws Exception { // Retrieve the index of the column of the genome in the VCF genomeIndex = fileScanner.getCurrentVCFReader().getReader().getIndexFromGenome(fullGenomeName); // Open the file streams for (AlleleSettingsBedExport alleleExport: alleleListToExport) { alleleExport.openStreams(); alleleExport.write(getFileHeader(alleleExport)); } // Compute the file scan algorithm fileScanner.compute(); // Close the file streams for (AlleleSettingsBedExport alleleExport: alleleListToExport) { alleleExport.closeStreams(); } } /** * @param alleleExport * @return the header for the bed */ private String getFileHeader (AlleleSettingsBed alleleExport) { String header = ""; String track = fullGenomeName.replace(" ", "_") + "_"; if (alleleExport.getCoordinateSystem() == CoordinateSystemType.CURRENT_GENOME) { track += alleleExport.getAllele().toString().toLowerCase(); } else if (alleleExport.getCoordinateSystem() == CoordinateSystemType.METAGENOME) { track += "meta_genome"; } else if (alleleExport.getCoordinateSystem() == CoordinateSystemType.REFERENCE) { track += "reference_genome"; } track += "_genplay_export"; String id = this.header.getId(); if (id == null) { id = this.header.getColumnCategory().toString(); } String description = id + " is used as score to extract positions of: " + getVariantDescription() + " from the file " + getFileList().get(0).getFile().getName(); header += "track name=" + track + " "; header += "description=\"" + description + "\" "; header += "useScore=1"; return header; } @Override public void processLine(FileScannerInterface fileAlgorithm) throws IOException { VCFLine currentLine = fileAlgorithm.getCurrentLine(); currentLine.processForAnalyse(); String gt = currentLine.getFormatField(genomeIndex, 0).toString(); if (gt.length() == 3) { Chromosome chromosome = currentLine.getChromosome(); int[] lengths = VCFLineUtility.getVariantLengths(currentLine.getREF(), Utils.split(currentLine.getALT(), ','), currentLine.getINFO()); for (AlleleSettingsBedExport alleleExport: fullAlleleList) { int altIndex = VCFLineUtility.getAlleleIndex(gt.charAt(alleleExport.getCharIndex())); alleleExport.initializeCurrentInformation(lengths, currentLine, altIndex); } AlleleSettingsBedExport firstAllele = fullAlleleList.get(0); AlleleSettingsBedExport secondAllele = fullAlleleList.get(1); firstAllele.updateCurrentInformation(secondAllele, chromosome); secondAllele.updateCurrentInformation(firstAllele, chromosome); firstAllele.finalizePosition(); secondAllele.finalizePosition(); for (AlleleSettingsBedExport alleleExport: alleleListToExport) { if (alleleExport.isWritable()) { Object score = getScore(currentLine, alleleExport); if (score != null) { String name = alleleExport.getName(currentLine); String line; line = buildLine(chromosome, alleleExport.getCurrentStart(), alleleExport.getCurrentStop(), name, score); alleleExport.write(line); } else { System.err.println("The line could not be exported. It seems the ID '" + header.getId() + "' has not been found in the line: " + currentLine.toString()); } } } } } /** * @param currentLine the current line in process * @param alleleExport the allele setting helper to use * @return the score to use, null otherwise */ private Object getScore (VCFLine currentLine, AlleleSettingsBedExport alleleExport) { Object value = currentLine.getHeaderField(header, genomeIndex); Object result = null; if (value != null) { Object[] values = Utils.split(value.toString(), ','); int valueIndex = 0; if (values.length > 1) { if (header instanceof VCFHeaderAdvancedType) { VCFHeaderAdvancedType advanced = (VCFHeaderAdvancedType) header; if (!advanced.getNumber().equals("1")) { valueIndex = alleleExport.getCurrentAltIndex(); if (advanced.getColumnCategory() == VCFColumnName.FORMAT) { if (header.getId().equals("AD")) { valueIndex++; } } } } } if ((valueIndex > -1) && (valueIndex < values.length)) { result = values[valueIndex]; } else { result = null; } } return result; } /** * Builds the BED line * @param chromosome the chromosome of the line * @param position the start and stop positions * @param value the score * @return the BED line */ private String buildLine (Chromosome chromosome, int start, int stop, String name, Object value) { String result = ""; result += chromosome.getName() + "\t"; result += start + "\t"; result += stop + "\t"; result += name + "\t"; result += value; return result; } /** * @return the list of generated files */ public List<File> getExportedFiles () { List<File> list = new ArrayList<File>(); for (AlleleSettingsBedExport alleleExport: alleleListToExport) { list.add(alleleExport.getBedFile()); } return list; } @Override public void processLine(VCFLine src, VCFLine dest) throws IOException {} }