// Copyright (C) 2011-2012 CRS4. // // This file is part of Seal. // // Seal is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // Seal is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with Seal. If not, see <http://www.gnu.org/licenses/>. package it.crs4.seal.recab; import java.io.File; import java.io.RandomAccessFile; import java.nio.channels.*; import java.nio.MappedByteBuffer; import java.nio.ByteBuffer; /** * Access a packed sequence file. * This class implements memory-mapped access to a packed sequence * as in the .pac file produced by "bwa index". */ public class PackedSequence { protected File path; protected FileChannel channel; protected MappedByteBuffer mmap; // 0 1 2 3 4 public static final char[] BASE_INDEX = { 'A', 'C', 'G', 'T', 'N' }; public PackedSequence() { } /** * Load a packed sequence file. * This method actually creates the memory map for the sequence file to * be accessed. */ public void load(File pac) throws java.io.FileNotFoundException, java.io.IOException { path = pac; RandomAccessFile fileObject = null; try { fileObject = new RandomAccessFile(path, "r"); channel = fileObject.getChannel(); // XXX: can we do prefetching? mmap = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileObject.length()); } catch (NonReadableChannelException e) { System.err.println("BUG!! used unreadable channel to create mmap. Message: " + e); throw e; } catch (NonWritableChannelException e) { System.err.println("BUG!! Tried to create a writable mmap. Message: " + e); throw e; } catch (IllegalArgumentException e) { System.err.println("BUG!! Tried to create mmap with bad parameters. Message: " + e); throw e; } finally { if (fileObject != null) fileObject.close(); } } /** * Try to load the packed sequence fully into memory. */ public void preload() { mmap.load(); } /** * Close the sequence file. */ public void close() throws java.io.IOException { if (channel != null) channel.close(); } /** * Read and unpack a section of sequence len bases long and starting at start (0-based). * The unpacked sequence (in byte values 0-3). */ public void readSection(ByteBuffer dest, long start, int len) { if (mmap == null) throw new IllegalStateException("PacReference.readSection called before loading a reference"); if (start < 0) throw new IllegalArgumentException("start value must be positive"); if (len <= 0) throw new IllegalArgumentException("len must be > 0"); // get file position mmap.position((int)(start / 4)); int offset = (int)(start % 4); // offset within the byte if (offset > 0) { // read the first partial byte BytePacking.unpackByte(dest, mmap.get(), offset, 3); len -= (4 - offset); } // read the whole bytes int bytesToRead = len / 4; for (int i = bytesToRead; i > 0; --i) BytePacking.unpackByte(dest, mmap.get(), 0, 3); len -= bytesToRead * 4; if (len > 0) { // we still have to mop up a last some bits from the last partial byte. BytePacking.unpackByte(dest, mmap.get(), 0, len - 1); } } /** * Convert a ByteBuffer of DNA base bytes (in the range 0-4) to an ASCII string. * Reads from bytes.position() to bytes.limit() (not inclusive) */ public static String bytesToBases(ByteBuffer bytes) { StringBuilder builder = new StringBuilder(bytes.limit()); for (int i = bytes.limit(); i > 0; --i) builder.append( BASE_INDEX[bytes.get()] ); return builder.toString(); } }