/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import net.sf.jannot.tabix.Iterator; import net.sf.jannot.tabix.TabixReader; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; import edu.yu.einstein.genplay.dataStructure.enums.VCFColumnName; import edu.yu.einstein.genplay.util.Utils; /** * @author Nicolas Fourel * @version 0.1 */ public class VCFReader { private transient TabixReader vcfParser; // Tabix object for the VCF file (Tabix Java API) private List<String> columnNames; // All column header names /** * Constructor of {@link VCFReader} */ protected VCFReader () { vcfParser = null; } /** * Indexes the file creating using the Tabix API * @param file * @throws IOException */ protected void indexVCFFile (File file) throws IOException { this.vcfParser = new TabixReader(file.getPath()); } /** * Performs a query on the indexed VCF file for a whole chromosome. * @param chromosome the chromosome * @return the results list * @throws IOException */ public List<String> query (Chromosome chromosome) throws IOException { return query(chromosome.getName(), 0, chromosome.getLength()); } /** * Performs a query on the indexed VCF file. * @param chr chromosome * @param start start position * @param stop stop position * @return query results list * @throws IOException */ public List<String> query (String chr, int start, int stop) throws IOException { Iterator iter = vcfParser.query(chr + ":" + start + "-" + stop); List<String> result = new ArrayList<String>(); String line; while ((iter != null) && ((line = iter.next()) != null)){ result.add(line); } return result; } /** * Performs a query on the first chromosome of the indexed VCF file and return the 10 first results. * @return query results list * @throws IOException */ public List<Map<String, Object>> shortQuery () throws IOException { Iterator iter = vcfParser.shortQuery(0); List<String> fields = new ArrayList<String>(); fields.add(VCFColumnName.REF.toString()); fields.add(VCFColumnName.ALT.toString()); List<Map<String, Object>> result = new ArrayList<Map<String, Object>>(); int cpt = 0; String line; while ((iter != null) && ((line = iter.next()) != null) && (cpt < 10)){ String[] info = Utils.splitWithTab(line); Map<String, Object> row = new HashMap<String, Object>(); for (String columnName: columnNames) { if (fields.indexOf(columnName) != -1) { row.put(columnName, info[columnNames.indexOf(columnName)]); } } result.add(row); cpt++; } return result; } /** * @return the vcfParser */ public TabixReader getVCFParser() { return vcfParser; } /** * @param columnNames the columnNames to set */ public void setColumnNames(List<String> columnNames) { this.columnNames = columnNames; } /** * Shows the result of a query. * A query can have more than one result. * @param result the list of result */ public static void showQueryResults (List<Map<String, Object>> result) { System.out.println("===== Query results"); String line; if (result.size() == 0) { System.out.println("no result"); } else { for (int i = 0; i < result.size(); i++) { line = "Result " + (i + 1) + ": "; for (String name: result.get(i).keySet()) { line = line + name + " = " + result.get(i).get(name) + ", "; } System.out.println(line); } } } /** * Shows the result of several query. * @param result the list of result */ public static void showQueriesFileResults (List<List<Map<String, Object>>> result) { System.out.println("===== Queries file results"); for (List<Map<String, Object>> queryResults: result) { showQueryResults(queryResults); } } }