/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.VCF; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashMap; import java.util.List; import java.util.Map; import net.sf.samtools.util.BlockCompressedInputStream; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile; import edu.yu.einstein.genplay.core.multiGenome.utils.FormattedMultiGenomeName; import edu.yu.einstein.genplay.core.multiGenome.utils.VCFGenomeIndexer; import edu.yu.einstein.genplay.dataStructure.enums.VCFColumnName; import edu.yu.einstein.genplay.exception.ExceptionManager; import edu.yu.einstein.genplay.util.Utils; /** * @author Nicolas Fourel * @version 0.1 */ public class BGZIPReader implements VCFGenomeIndexer { private final VCFFile vcfFile; // file to read private final BlockCompressedInputStream bcis; // stream for the file private final InputStreamReader isr; private final BufferedReader reader; private String metaDataHeader; // meta data of the file header private String fieldDataHeader; // field data of the file header private String fullHeader; // the full header private VCFLine currentLine; // current line in the file //private String columns; private final Map<String, Integer> genomeMap; // map between genome names and their related index according to their location on the column line /** * Constructor of {@link BGZIPReader} * @param vcfFile the VCF file * @throws IOException */ public BGZIPReader (VCFFile vcfFile) throws IOException { this.vcfFile = vcfFile; this.bcis = new BlockCompressedInputStream(vcfFile.getFile()); this.isr = new InputStreamReader(bcis); this.reader = new BufferedReader(isr); this.currentLine = null; this.metaDataHeader = ""; this.fieldDataHeader = ""; this.fullHeader = ""; this.genomeMap = new HashMap<String, Integer>(); initialize(); } /** * Initializes the reader. * It reads the header, the first line of data and then stop. * @throws IOException */ private void initialize () throws IOException { boolean isData = false; while (!isData) { String line = readLine(reader); if ((line != null) && (line.length() > 2)) { if (line.substring(0, 2).equals("##")) { if (!fullHeader.isEmpty()) { fullHeader += "\n"; } fullHeader += line; if (isMetaDataLine(line)) { if (!metaDataHeader.isEmpty()) { metaDataHeader += "\n"; } metaDataHeader += line; } else { if (!fieldDataHeader.isEmpty()) { fieldDataHeader += "\n"; } fieldDataHeader += line; } } else if (line.substring(0, 1).equals("#")) { if (!fullHeader.isEmpty()) { fullHeader += "\n"; } fullHeader += line; //this.columns = line; String[] columns = Utils.splitWithTab(line); for (int i = 9; i < columns.length; i++) { genomeMap.put(columns[i], i); } } else { currentLine = new VCFLine(line, this); isData = true; } } } } private boolean isMetaDataLine (String line) { if (line.length() > 8) { if (line.substring(2, 2 + VCFColumnName.ALT.toString().length()).equals(VCFColumnName.ALT.toString())) { return false; } else if (line.substring(2, 2 + VCFColumnName.FILTER.toString().length()).equals(VCFColumnName.FILTER.toString())) { return false; } else if (line.substring(2, 2 + VCFColumnName.INFO.toString().length()).equals(VCFColumnName.INFO.toString())) { return false; } else if (line.substring(2, 2 + VCFColumnName.FORMAT.toString().length()).equals(VCFColumnName.FORMAT.toString())) { return false; } } return true; } /** * @return the header without the columns */ public String getMetaDataHeader () { return metaDataHeader; } /** * Creates the string of fixed columns of a VCF files. * It goes from CHROM to FORMAT included. * It includes tabs (even after the FORMAT field). * @return the formated string of the fixed columns */ public String getFixedColumns () { String columns = "#"; columns += VCFColumnName.CHROM.toString() + "\t"; columns += VCFColumnName.POS.toString() + "\t"; columns += VCFColumnName.ID.toString() + "\t"; columns += VCFColumnName.REF.toString() + "\t"; columns += VCFColumnName.ALT.toString() + "\t"; columns += VCFColumnName.QUAL.toString() + "\t"; columns += VCFColumnName.FILTER.toString() + "\t"; columns += VCFColumnName.INFO.toString() + "\t"; columns += VCFColumnName.FORMAT.toString() + "\t"; return columns; } /** * Close the streams */ public void closeStreams () { try { reader.close(); } catch (IOException e) { ExceptionManager.getInstance().caughtException(e); } try { isr.close(); } catch (IOException e) { ExceptionManager.getInstance().caughtException(e); } try { bcis.close(); } catch (IOException e) { ExceptionManager.getInstance().caughtException(e); } } /** * Reads a line from the input stream * @return the line (or null) * @throws IOException */ private String readLine(BufferedReader reader) throws IOException { return reader.readLine(); } /** * Go to the next line in the file and read it. * Updates the current line. * @throws IOException */ public void goNextLine () throws IOException { currentLine = new VCFLine(readLine(reader), this); } /** * @return the currentLine */ public VCFLine getCurrentLine() { return currentLine; } /** * @return the file */ public VCFFile getVCFFile() { return vcfFile; } /** * @param genomeName the full genome name * @return the column index of the genome */ public int getIndexFromGenome (String genomeName) { return genomeMap.get(FormattedMultiGenomeName.getRawName(genomeName)); } /** * @param index the index of a genome * @return the genome raw name associated to the index */ public String getGenomeFromIndex (int index) { for (String genome: genomeMap.keySet()) { if (genomeMap.get(genome) == index) { return genome; } } return null; } /** * @return the vcfFile */ public VCFFile getVcfFile() { return vcfFile; } /** * Print the all the lines of the file * @throws IOException */ public void printAllFile () throws IOException { BlockCompressedInputStream bcis = new BlockCompressedInputStream(vcfFile.getFile()); BufferedReader reader = new BufferedReader(new InputStreamReader(bcis)); System.out.println("Content of the file " + vcfFile.getFile().getName() + ":"); boolean end = false; while (!end) { String currentLine = readLine(reader); if (currentLine == null) { end = true; } else if (!currentLine.isEmpty()) { System.out.println(currentLine); } } reader.close(); } /** * Print the all the lines of the file with the detail of each line * @throws IOException */ public void printFileAsElements () throws IOException { BlockCompressedInputStream bcis = new BlockCompressedInputStream(vcfFile.getFile()); BufferedReader reader = new BufferedReader(new InputStreamReader(bcis)); System.out.println("Content of the file " + vcfFile.getFile().getName() + ":"); boolean end = false; while (!end) { VCFLine currentLine = new VCFLine(readLine(reader), this); if (currentLine.isLastLine()) { end = true; } else if (currentLine.isValid()) { currentLine.showElements(); } } reader.close(); } @Override public int getIndexFromRawGenomeName(String genomeRawName) { try { return genomeMap.get(genomeRawName); } catch (Exception e) { return -1; } } @Override public int getIndexFromFullGenomeName(String genomeFullName) { return getIndexFromRawGenomeName(FormattedMultiGenomeName.getRawName(genomeFullName)); } @Override public String getGenomeRawName(int index) { for (String rawName: genomeMap.keySet()) { if (genomeMap.get(rawName) == index) { return rawName; } } return null; } @Override public List<String> getGenomeRawNames() { return vcfFile.getHeader().getGenomeRawNames(); } /** * @return the fullHeader */ public String getFullHeader() { return fullHeader; } }