/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.broad.igv.feature.genome; import org.apache.log4j.Logger; import org.broad.igv.Globals; import org.broad.igv.util.ParsingUtils; import java.io.BufferedReader; import java.io.IOException; import java.util.LinkedHashMap; import java.util.Set; /** * Representation of a fasta (.fai) index. This is a modified version of a similar class in Picard, but extended * to handle loading by URLs. * * @auther jrobinso * @since 2011 Aug 7 */ public class FastaIndex { static Logger log = Logger.getLogger(FastaIndex.class); /** * Store the entries. Use a LinkedHashMap for consistent iteration in insertion order. */ private final LinkedHashMap<String, FastaSequenceIndexEntry> sequenceEntries = new LinkedHashMap<String, FastaSequenceIndexEntry>(); public FastaIndex(String indexPath) throws IOException { parseIndexFile(indexPath); } public Set<String> getSequenceNames() { return sequenceEntries.keySet(); } public FastaSequenceIndexEntry getIndexEntry(String name) { return sequenceEntries.get(name); } public int getSequenceSize(String name) { FastaSequenceIndexEntry entry = sequenceEntries.get(name); return entry == null ? -1 : (int) entry.getSize(); } /** * Parse the contents of an index file * <p/> * Example index file * <p/> * sequenceName size locationInFile basesPerLine bytesPerLine * chr01p 6220112 8 50 51 * chr02q 8059593 6344531 50 51 * chr03q 5803340 14565324 50 51 * * @param indexFile File to parse. * @throws java.io.FileNotFoundException Thrown if file could not be opened. */ private void parseIndexFile(String indexFile) throws IOException { BufferedReader reader = null; try { reader = ParsingUtils.openBufferedReader(indexFile); String nextLine; while ((nextLine = reader.readLine()) != null) { // Tokenize and validate the index line. String[] tokens = Globals.singleTabMultiSpacePattern.split(nextLine); int nTokens = tokens.length; if (nTokens != 5) { throw new RuntimeException("Error. Unexpected number of tokens parsing: " + indexFile); } // Parse the index line. String contig = tokens[0]; contig = GenomeImporter.SEQUENCE_NAME_SPLITTER.split(contig, 2)[0]; long size = Long.parseLong(tokens[1]); long location = Long.parseLong(tokens[2]); int basesPerLine = Integer.parseInt(tokens[3]); int bytesPerLine = Integer.parseInt(tokens[4]); // Build sequence structure add(new FastaSequenceIndexEntry(contig, location, size, basesPerLine, bytesPerLine)); } } finally { if (reader != null) { reader.close(); } } } private void add(FastaSequenceIndexEntry indexEntry) { final FastaSequenceIndexEntry ret = sequenceEntries.put(indexEntry.getContig(), indexEntry); if (ret != null) { throw new RuntimeException("Contig '" + indexEntry.getContig() + "' already exists in fasta index."); } } /** * Hold an individual entry in a fasta sequence index file. */ static class FastaSequenceIndexEntry { private String contig; private long position; private long size; private int basesPerLine; private int bytesPerLine; /** * Create a new entry with the given parameters. * * @param contig Contig this entry represents. * @param position Location (byte coordinate) in the fasta file. * @param size The number of bases in the contig. * @param basesPerLine How many bases are on each line. * @param bytesPerLine How many bytes are on each line (includes newline characters). */ public FastaSequenceIndexEntry(String contig, long position, long size, int basesPerLine, int bytesPerLine) { this.contig = contig; this.position = position; this.size = size; this.basesPerLine = basesPerLine; this.bytesPerLine = bytesPerLine; } /** * @return The contig. */ public String getContig() { return contig; } /** * @return seek position within the fasta, in bytes */ public long getPosition() { return position; } /** * @return size of the contig bases, in bytes. */ public long getSize() { return size; } /** * @return Number of bases in a given fasta line */ public int getBasesPerLine() { return basesPerLine; } /** * @return Number of bytes (bases + whitespace) in a line. */ public int getBytesPerLine() { return bytesPerLine; } /** * @return A string representation of the contig line. */ public String toString() { return String.format("contig %s; position %d; size %d; basesPerLine %d; bytesPerLine %d", contig, position, size, basesPerLine, bytesPerLine); } } }