/*
* Copyright (c) 2007-2010 by The Broad Institute, Inc. and the Massachusetts Institute of Technology.
* All Rights Reserved.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL), Version 2.1 which
* is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*
* THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR WARRANTIES OF
* ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT
* OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR
* RESPECTIVE TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES OF
* ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, ECONOMIC
* DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER THE BROAD OR MIT SHALL
* BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE
* FOREGOING.
*/
package htsjdk.tribble.index.interval;
import htsjdk.tribble.Feature;
import htsjdk.tribble.index.Block;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.TribbleIndexCreator;
import htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex;
import java.io.File;
import java.util.ArrayList;
import java.util.LinkedList;
/**
* Creates interval indexes from a stream of features
* @author jrobinso
*/
public class IntervalIndexCreator extends TribbleIndexCreator {
public static int DEFAULT_FEATURE_COUNT = 600;
/**
* Maximum number of features stored per interval.
* @see #DEFAULT_FEATURE_COUNT
*/
private int featuresPerInterval = DEFAULT_FEATURE_COUNT;
private final LinkedList<ChrIndex> chrList = new LinkedList<ChrIndex>();
/**
* Instance variable for the number of features we currently are storing in the interval
*/
private int featureCount = 0;
private final ArrayList<MutableInterval> intervals = new ArrayList<MutableInterval>();
File inputFile;
public IntervalIndexCreator(final File inputFile, final int featuresPerInterval) {
this.inputFile = inputFile;
this.featuresPerInterval = featuresPerInterval;
}
public IntervalIndexCreator(final File inputFile) {
this(inputFile, DEFAULT_FEATURE_COUNT);
}
public void addFeature(final Feature feature, final long filePosition) {
// if we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
if (chrList.size() == 0 || !chrList.getLast().getName().equals(feature.getChr())) {
// if we're creating a new chrIndex (not the first), make sure to dump the intervals to the old chrIndex
if (chrList.size() != 0)
addIntervalsToLastChr(filePosition);
// create a new chr index for the current contig
chrList.add(new ChrIndex(feature.getChr()));
intervals.clear();
}
// if we're about to overflow the current bin, make a new one
if (featureCount >= featuresPerInterval || intervals.size() == 0) {
final MutableInterval i = new MutableInterval();
i.setStart(feature.getStart());
i.setStartFilePosition(filePosition);
if( intervals.size() > 0) intervals.get(intervals.size()-1).setEndFilePosition(filePosition);
featureCount = 0; // reset the feature count
intervals.add(i);
}
// make sure we update the ending position of the bin
intervals.get(intervals.size()-1).setStop(Math.max(feature.getEnd(),intervals.get(intervals.size()-1).getStop()));
featureCount++;
}
/**
* dump the intervals we have stored to the last chrList entry
* @param currentPos the current position, for the last entry in the interval list
*/
private void addIntervalsToLastChr(final long currentPos) {
for (int x = 0; x < intervals.size(); x++) {
if (x == intervals.size()-1) intervals.get(x).setEndFilePosition(currentPos);
chrList.getLast().insert(intervals.get(x).toInterval());
}
}
/**
* finalize the index; create a tree index given the feature list passed in so far
* @param finalFilePosition the final file position, for indexes that have to close out with the final position
* @return a Tree Index
*/
public Index finalizeIndex(final long finalFilePosition) {
final IntervalTreeIndex featureIndex = new IntervalTreeIndex(inputFile.getAbsolutePath());
// dump the remaining bins to the index
addIntervalsToLastChr(finalFilePosition);
featureIndex.setChrIndex(chrList);
featureIndex.addProperties(properties);
featureIndex.finalizeIndex();
return featureIndex;
}
public int getFeaturesPerInterval() {
return featuresPerInterval;
}
}
/**
* The interval class isn't mutable; use this private class as a temporary storage until we're ready to make intervals
*/
class MutableInterval {
// the start, the stop, and the start position
private int start;
private int stop;
private long startFilePosition;
private long endFilePosition;
public void setStart(final int start) {
if (start < 0) throw new IllegalArgumentException("Start must be greater than 0!");
this.start = start;
}
public void setStop(final int stop) {
if (stop < 0) throw new IllegalArgumentException("Start must be greater than 0!");
this.stop = stop;
}
public void setStartFilePosition(final long startFilePosition) {
this.startFilePosition = startFilePosition;
}
public void setEndFilePosition(final long endFilePosition) {
this.endFilePosition = endFilePosition;
}
public Interval toInterval() {
return new Interval(start,stop,new Block(startFilePosition, endFilePosition - startFilePosition));
}
public int getStop() {
return stop;
}
}