/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.bbfile;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.apache.log4j.Logger;
import org.broad.igv.util.CompressionUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: martind
* Date: Apr 13, 2010
* Time: 12:34:16 PM
* To change this template use File | Settings | File Templates.
*/
public class BigWigIterator implements Iterator<WigItem> {
private static Logger log = Logger.getLogger(BigWigIterator.class);
boolean empty = false;
//specification of chromosome selection region
private RPChromosomeRegion selectionRegion; // selection region for iterator
private boolean isContained; // if true, features must be fully contained by selection region
// File access variables for reading Bed data block
private SeekableStream fis; // file input stream handle
private BPTree chromIDTree; // B+ chromosome index tree
private RPTree chromDataTree; // R+ chromosome data location tree
// chromosome region extraction items
private ArrayList<RPTreeLeafNodeItem> leafHitList; // array of leaf hits for selection region items
private HashMap<Integer, String> chromosomeMap; // map of chromosome ID's and corresponding names
private int leafItemIndex; // index of current leaf item being processed from leaf hit list
RPTreeLeafNodeItem leafHitItem; // leaf item being processed by next
private RPChromosomeRegion hitRegion; // hit selection region for iterator
// current data block processing members
private BigWigDataBlock wigDataBlock; // Wig data block with Wig records decompressed
private boolean dataBlockRead; // indicates successful read of data block
private ArrayList<WigItem> wigItemList; // array of selected Wig values
private int wigItemIndex; // index of next Wig data item from the list
CompressionUtils compressionUtils;
/**
* Constructor for a BigWig iterator over the specified chromosome region
* <p/>
* Parameters:
* fis - file input stream handle
* chromIDTree - B+ chromosome index tree provides chromosome ID's for chromosome names
* chromDataTree - R+ chromosome data locations tree
* selectionRegion - chromosome region for selection of Wig feature extraction
* consists of:
* startChromID - ID of start chromosome
* startBase - starting base position for values
* endChromID - ID of end chromosome
* endBase - ending base position for values
* contained - specifies wig values must be contained by region, if true;
* else return any intersecting region values
*/
public BigWigIterator(SeekableStream fis, BPTree chromIDTree, RPTree chromDataTree,
RPChromosomeRegion selectionRegion, boolean contained) {
// check for valid selection region
if (selectionRegion == null)
throw new RuntimeException("Error: BigWigIterator selection region is null\n");
this.fis = fis;
this.chromIDTree = chromIDTree;
this.chromDataTree = chromDataTree;
this.selectionRegion = new RPChromosomeRegion(selectionRegion);
isContained = contained;
// set up hit list and first data block read
int hitCount = getHitRegion(selectionRegion, contained);
if (hitCount == 0) {
empty = true;
}
// Ready for next() data extraction
}
/**
* Constructor for an "empty" iterator
*/
public BigWigIterator() {
empty = true;
}
/*
* Method returns status on a "next item" being available.
*
* Return:
* True if a "next item" exists; else false.
*
* Note: If "next" method is called for a false condition,
* an NoSuchElementException will be thrown.
* */
public boolean hasNext() {
if (empty) return false;
// first check if current segment can be read for next Wig item
if (wigItemIndex < wigItemList.size())
return true;
// need to fetch next data block
else if (leafItemIndex < leafHitList.size())
return true;
else return false;
}
/**
* Method returns the current Wig item and advances to the next Wig record.
* <p/>
* Returns:
* Wig item for current BigWig data record.
* <p/>
* Note: If "next" method is called when a "next item" does not exist,
* an NoSuchElementException will be thrown.
*/
public WigItem next() {
// return next Wig item in list
if (wigItemIndex < wigItemList.size())
return (wigItemList.get(wigItemIndex++));
// attempt to get next leaf item data block
else {
int nHits = getHitRegion(selectionRegion, isContained);
if (nHits > 0) {
// Note: getDataBlock initializes bed feature index to 0
return (wigItemList.get(wigItemIndex++)); // return 1st Data Block item
} else {
String result = String.format("Failed to find data for wig region (%d,%d,%d,%d)\n",
hitRegion.getStartChromID(), hitRegion.getStartBase(),
hitRegion.getEndChromID(), hitRegion.getEndBase());
log.error(result);
return null;
//throw new NoSuchElementException(result);
}
}
}
public void remove() {
throw new UnsupportedOperationException("Remove iterator item is not supported yet.");
}
// ************ BigBedIterator specific methods *******************
/*
* Method returns the iterator selection region.
* */
public RPChromosomeRegion getSelectionRegion() {
return selectionRegion;
}
/*
* Method provides the iterator with a new selection region.
*
* Parameters:
* selectionRegion - chromosome region for selection of Bed feature extraction
* consists of:
* startChromID - ID of start chromosome
* startBase - starting base position for features
* endChromID - ID of end chromosome
* endBase - starting base position for features
* contained - specifies bed features must be contained by region, if true;
* else return any intersecting region features
*
* Returns:
* number of chromosome regions found in the selection region
* */
public int setSelectionRegion(RPChromosomeRegion selectionRegion,
boolean contained) {
this.selectionRegion = selectionRegion;
isContained = contained;
// set up hit list and first data block read
leafHitList = null; // Must nullify existing hit list first!
int hitCount = getHitRegion(selectionRegion, contained);
if (hitCount == 0) // no hits - no point in fetching data
throw new RuntimeException("No wig data found in the selection region");
// Ready for next() data extraction
return hitCount;
}
/*
* Method returns if bed items must be completely contained in
* the selection region.
*
* Returns:
* Boolean indicates items must be contained in selection region if true,
* else may intersect the selection region if false
* */
public boolean isContained() {
return isContained;
}
/*
* Method returns the BigBed file input stream handle.
*
* Returns:
* File input stream handle
* */
public SeekableStream getBBFis() {
return fis;
}
/*
* Method returns the B+ chromosome index tree used for identifying
* chromosome ID's used to specify R+ chromosome data locations.
*
* Returns:
* B+ chromosome index tree
* */
public BPTree getChromosomeIDTree() {
return chromIDTree;
}
/*
* Method returns the R+ chromosome data tree used for identifying
* chromosome data locations for the selection region.
*
* Returns:
* R+ chromosome data locations tree
* */
public RPTree getChromosomeDataTree() {
return chromDataTree;
}
/*
* Method finds the chromosome data hit items for the current hit selection region,
* and loads first hit data.
*
* Parameters:
* hitRegion - selection region for extracting hit items
* contained - indicates hit items must contained in selection region if true;
* and if false, may intersect selection region
* Note: The selection region will be limited to accommodate mMaxLeafHits; which terminates
* selection at the leaf node at which maxLeafHits is reached. Total number of selected
* items may exceed maxLeafHits, but only by the number of leaves in the cutoff leaf node.
*
* Returns:
* number of R+ chromosome data hits
* */
private int getHitRegion(RPChromosomeRegion hitRegion, boolean contained) {
int hitCount = 0;
// check if new hit list is needed
if (leafHitList == null) {
hitCount = getHitList(hitRegion, contained);
if (hitCount == 0)
return 0; // no hit data found
} else {
hitCount = leafHitList.size() - leafItemIndex;
if (hitCount == 0)
return 0; // hit list exhausted
}
// Perform a block read for starting base of selection region - use first leaf hit
dataBlockRead = getDataBlock(leafItemIndex++);
// try next item - probably intersection issue
// Note: recursive call until a block is valid or hit list exhuasted
if (!dataBlockRead)
hitCount = getHitRegion(hitRegion, contained);
return hitCount;
}
/*
* Method finds the chromosome data tree hit items for the current hit selection region.
*
* Parameters:
* hitRegion - selection region for extracting hit items
* contained - indicates hit items must contained in selection region if true;
* and if false, may intersect selection region
*
* Note: The selection region will be limited to accommodate mMaxLeafHits; which terminates
* selection at the leaf node at which maxLeafHits is reached. Total number of selected
* items may exceed maxLeafHits, but only by the number of leaves in the cutoff leaf node.
*
* Returns:
* number of R+ chromosome data hits
* */
private int getHitList(RPChromosomeRegion hitRegion, boolean contained) {
// hit list for hit region; subject to mMaxLeafHits limitation
leafHitList = chromDataTree.getChromosomeDataHits(hitRegion, contained);
// check if any leaf items were selected
int nHits = leafHitList.size();
if (nHits == 0)
return 0;
else
leafItemIndex = 0; // reset hit item index to start of list
// find hit bounds from first and last hit items
int startChromID = leafHitList.get(0).getChromosomeBounds().getStartChromID();
int startBase = leafHitList.get(0).getChromosomeBounds().getStartBase();
int endChromID = leafHitList.get(nHits - 1).getChromosomeBounds().getEndChromID();
int endBase = leafHitList.get(nHits - 1).getChromosomeBounds().getEndBase();
// save hit region; not currently used but useful for debug
this.hitRegion = new RPChromosomeRegion(startChromID, startBase, endChromID, endBase);
return nHits;
}
/*
* Method sets up a decompressed data block of big bed features for iteration.
*
* Parameters:
* leafIteIndex - leaf item index in the hit list referencing the data block
*
* Returns:
* Successful Bed feature data block set up: true or false.
* */
private boolean getDataBlock(int leafItemIndex) {
// check for valid data block
if (leafItemIndex >= leafHitList.size())
return false;
// Perform a block read for indexed leaf item
leafHitItem = leafHitList.get(leafItemIndex);
// get the chromosome names associated with the hit region ID's
int startChromID = leafHitItem.getChromosomeBounds().getStartChromID();
int endChromID = leafHitItem.getChromosomeBounds().getEndChromID();
chromosomeMap = chromIDTree.getChromosomeIDMap(startChromID, endChromID);
boolean isLowToHigh = chromDataTree.isIsLowToHigh();
int uncompressBufSize = chromDataTree.getUncompressBuffSize();
// decompress leaf item data block for feature extraction
wigDataBlock = new BigWigDataBlock(fis, leafHitItem, chromosomeMap, isLowToHigh, uncompressBufSize);
// get section Wig item list and set next index to first item
wigItemList = wigDataBlock.getWigData(selectionRegion, isContained);
wigItemIndex = 0;
// data block items available for iterator
if (wigItemList.size() > 0)
return true;
else
return false;
}
}