// Copyright (C) 2011-2012 CRS4. // // This file is part of Seal. // // Seal is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // Seal is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with Seal. If not, see <http://www.gnu.org/licenses/>. package it.crs4.seal.recab; import it.crs4.seal.common.FormatException; import it.crs4.seal.common.CutString; import java.io.IOException; import java.io.LineNumberReader; import java.io.Reader; /** * Read SNPs from a Rod file. */ public class RodFileVariantReader implements VariantReader { private LineNumberReader reader; private CutString cutter; /* Sample format: 585 1 10259 10260 rs72477211 0 + C C A/G genomic single unknown 0 0 unknown exact 1 */ // required columns (0-based indices): Cut index // 1: chr 0 // 2: start 1 // 3: end 2 // 10: molecule type (must be == "genomic") 3 // 11: class (must be == "single") 4 // 16: locType (must be == "exact") 5 public RodFileVariantReader(Reader in) throws IOException { reader = new LineNumberReader(in); // see the sample format above to understand the indices selected. cutter = new CutString("\t", 1, 2, 3, 10, 11, 16); } public boolean nextEntry(VariantRegion dest) throws FormatException, IOException { boolean gotRecord = false; String line; try { do { line = reader.readLine(); if (line != null) { cutter.loadRecord(line); // col 10, 11, 16 if (cutter.getField(3).equals("genomic") && cutter.getField(4).equals("single") && cutter.getField(5).equals("exact")) { // col 2 long start = Long.parseLong(cutter.getField(1)); // col 3 long end = Long.parseLong(cutter.getField(2)); if (end - start == 1) // must be of length 1 { // XXX: safety check. If this fails we have to move up to long values if (end > Integer.MAX_VALUE) throw new RuntimeException("end bigger than expected! File a bug!!"); // This entry fulfills all our SNP requirements so we return it // col 1 dest.setContigName(cutter.getField(0)); // XXX: remove the cast if we move up to long values dest.setPosition((int)start); dest.setLength((int)(end - start)); gotRecord = true; } // length 1 } // if (string matches) } else // line is null { if (reader.getLineNumber() == 0) throw new FormatException("empty Variant table file"); } } while (line != null && !gotRecord); } catch (CutString.FormatException e) { throw new FormatException("Invalid table format at line " + reader.getLineNumber() + ": " + e); } catch (NumberFormatException e) { throw new FormatException("Invalid coordinate at line " + reader.getLineNumber() + ": " + e); } return gotRecord; } }