/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.feature.genome;
import org.apache.log4j.Logger;
import org.broad.igv.feature.Range;
import org.broad.igv.ui.panel.ReferenceFrame;
import org.broad.igv.util.ObjectCache;
import java.util.Hashtable;
import java.util.List;
/**
* A wrapper class that provides caching for on-disk, queried, and web-service Sequence implementations.
*
* @author jrobinso
*/
public class SequenceWrapper implements Sequence {
private static Logger log = Logger.getLogger(SequenceWrapper.class);
private static boolean cacheSequences = true;
private static int tileSize = 1000000;
private Sequence sequence;
private ObjectCache<String, SequenceTile> sequenceCache = new ObjectCache<String, SequenceTile>(50);
public SequenceWrapper(Sequence sequence) {
this.sequence = sequence;
}
public byte getBase(String chr, int position) {
if (cacheSequences) {
int tileNo = position / tileSize;
// Get first chunk
SequenceTile tile = getSequenceTile(chr, tileNo);
int offset = position - tile.getStart();
byte[] bytes = tile.bytes;
if (bytes == null) {
return 0;
} else if (offset > 0 && offset < bytes.length) {
return bytes[offset];
} else {
return 0;
}
} else {
// TODO -- implement or disable
return sequence.getBase(chr, position);
}
}
@Override
public List<String> getChromosomeNames() {
return sequence.getChromosomeNames();
}
@Override
public int getChromosomeLength(String chrname) {
return sequence.getChromosomeLength(chrname);
}
@Override
public boolean isLoaded(ReferenceFrame frame) {
if(!cacheSequences) return false;
int startTile = (int) frame.getOrigin() / tileSize;
int endTile = (int) frame.getEnd() / tileSize;
String chr = frame.getChrName();
for(int i=startTile; i <= endTile; i++) {
String key = getKey(chr, i);
if (!sequenceCache.containsKey(key)) return false;
}
return true;
}
/**
* Return the reference dna sequence for the exact interval specified.
*
* @param chr
* @param start
* @param end
* @return
*/
public byte[] getSequence(String chr, int start, int end) {
if (cacheSequences) {
byte[] seqbytes = new byte[end - start];
int startTile = start / tileSize;
int endTile = end / tileSize;
SequenceTile[] tiles = getSequenceTiles(chr, startTile, endTile);
// Get first chunk
SequenceTile tile = tiles[0];
if (tile == null) {
return null; // Can this ever happen?
}
byte[] tileBytes = tile.getBytes();
if (tileBytes == null) {
return null;
}
int fromOffset = start - tile.getStart();
int toOffset = 0;
// A negative offset means the requested start is < the the first tile start. This situation can arise at the
// left end of chromosomes. In this case we want to copy the first tile to some offset location in the
// destination sequence array.
if (fromOffset < 0) {
toOffset = -fromOffset;
fromOffset = 0;
}
// # of bytes to copy. Note that only one of fromOffset or toOffset is non-zero.
int nBytes = Math.min(tileBytes.length - Math.abs(fromOffset), seqbytes.length - Math.abs(toOffset));
// Copy first chunk
System.arraycopy(tileBytes, fromOffset, seqbytes, toOffset, nBytes);
// If multiple chunks ...
for (int t = startTile + 1; t <= endTile; t++) {
tile = tiles[t - startTile];
int nNext = Math.min(seqbytes.length - nBytes, tile.getSize());
System.arraycopy(tile.getBytes(), 0, seqbytes, nBytes, nNext);
nBytes += nNext;
}
return seqbytes;
} else {
return sequence.getSequence(chr, start, end);
}
}
private SequenceTile getSequenceTile(String chr, int tileNo) {
String key = getKey(chr, tileNo);
SequenceTile tile = sequenceCache.get(key);
if (tile == null) {
int start = tileNo * tileSize;
int end = start + tileSize; // <= UCSC coordinate conventions (end base not inclusive)
if (end <= start) {
return null;
}
byte[] seq = sequence.getSequence(chr, start, end);
tile = new SequenceTile(start, seq);
sequenceCache.put(key, tile);
}
return tile;
}
private SequenceTile[] getSequenceTiles(String chr, int startTile, int endTile) {
SequenceTile[] tiles = new SequenceTile[endTile - startTile + 1];
TileRange toLoad = null;
for (int tileNo = startTile; tileNo <= endTile; tileNo++) {
String key = getKey(chr, tileNo);
SequenceTile tile = sequenceCache.get(key);
if (tile == null) {
if(toLoad == null) {
toLoad = new TileRange(tileNo, tileNo);
}
else {
toLoad.endTile = tileNo;
}
} else { // tile != null
tiles[tileNo-startTile] = tile;
if (toLoad != null) {
loadTiles(chr, startTile, tiles, toLoad);
toLoad = null;
}
}
}
if (toLoad != null) {
loadTiles(chr, startTile, tiles, toLoad);
}
return tiles;
}
private void loadTiles(String chr, int startTile, SequenceTile[] tiles, TileRange toLoad) {
int start = toLoad.startTile * tileSize;
int end = (toLoad.endTile + 1) * tileSize;
byte[] seq = sequence.getSequence(chr, start, end);
int offset = 0;
for(int t = toLoad.startTile; t <= toLoad.endTile; t++) {
int nBytes = Math.min(tileSize, seq.length - offset);
byte [] tileSeq = new byte[nBytes];
int tileStart = t * tileSize;
System.arraycopy(seq, offset, tileSeq, 0, nBytes);
SequenceTile t2 = new SequenceTile(tileStart, tileSeq);
String k = getKey(chr, t);
sequenceCache.put(k, t2);
tiles[t-startTile] = t2;
offset += tileSize;
}
}
private static class TileRange {
int startTile;
int endTile;
public TileRange(int startTile, int endTile) {
this.startTile = startTile;
this.endTile = startTile;
}
}
/**
* Generate unique key to be used to store/retrieve
* tiles. We combined the chr and tileNo, with a
* delimiter in between to ensure that
* chr1 12 doesn't clash with chr11 2
*
* @param chr
* @param tileNo
* @return
*/
static String getKey(String chr, int tileNo) {
return chr + "/" + tileNo;
}
/**
* This accessor provided to support unit tests.
*
* @param aChunkSize
*/
static void setTileSize(int aChunkSize) {
tileSize = aChunkSize;
}
/**
* Accessor to support unit tests.
*
* @param aCacheSequences
*/
static void setCacheSequences(boolean aCacheSequences) {
cacheSequences = aCacheSequences;
}
public void clearCache() {
sequenceCache.clear();
}
static class SequenceTile {
private int start;
private byte[] bytes;
SequenceTile(int start, byte[] bytes) {
this.start = start;
this.bytes = bytes;
}
public int getStart() {
return start;
}
public int getSize() {
return bytes == null ? 0 : bytes.length;
}
public byte[] getBytes() {
return bytes;
}
}
/**
* Translates sequence URLs that might be cached on client machines. This method should be retired eventually,
* as caches expire.
* <p>
* Also modifies URLs to Broad hosted sequences that will use byte range requests if byte-range requests are
* disabled. This hack is neccessary for the Partners network, which does not forward the byte-range header.
* <p>
* Older "sequence servlet" request URLs
* http://www.broad.mit.edu/igv/SequenceServlet/
* http://www.broadinstitute.org/igv/sequence
* <p>
* Direct URLS (uses byte range requests)
* https://data.broadinstitute.org/igvdata/annotations/seq/
* http://igvdata.broadinstitute.org/genomes/seq
*
* @param url
* @return
*/
private static Hashtable<String, String> sequenceUrlCache = new Hashtable();
/**
* Some rather ugly code to maintain backward compatibility. Does 2 things
* (1) domain swap (mit -> broadinstitute)
* (2) removes references to SequenceServlet, there are 2 forms
* <p>
* This method can be removed when its verified that references to the MIT domain and sequence servlet have
* been removed from all genomes.
*
* @param url
* @return
*/
public static String checkSequenceURL(String url) {
String key = url;
String convertedURL = sequenceUrlCache.get(key);
if (convertedURL == null) {
convertedURL = url;
// Legacy URLs -- this code can be removed when all .genome files are updated.
convertedURL = convertedURL.replace("broad.mit.edu", "broadinstitute.org");
// Replace all references to the old SequenceServlet with direct references to a sequence directory.
convertedURL = convertedURL.replace(
"http://www.broadinstitute.org/igv/SequenceServlet",
"http://igvdata.broadinstitute.org/genomes/seq");
convertedURL = convertedURL.replace(
"http://www.broadinstitute.org/igv/sequence",
"http://igvdata.broadinstitute.org/genomes/seq");
if (!url.equals(convertedURL)) {
log.info("Converting sequence URL: " + url + " -> " + convertedURL);
}
sequenceUrlCache.put(key, convertedURL);
}
return convertedURL;
}
}