package org.seqcode.gseutils;
import java.util.List;
import java.util.ArrayList;
/**
* Performs calculations for the UCSC bin indexing scheme.
* See http://genomewiki.ucsc.edu/index.php/Bin_indexing_system
*/
public class UCSCBins {
public static final int[] binOffsets = {512+64+8+1, 64+8+1, 8+1, 1, 0};
public static final int[] binSizes = {512*1024*1024, 64*1024*1024, 8*1024*1024, 1024*1024, 128*1024};
public static final int binFirstShift = 17; /* How much to shift to get to finest bin. */
public static final int binNextShift = 3; /* How much to shift to get to next larger bin. */
/* Given start,end in chromosome coordinates assign it
* a bin. There's a bin for each 128k segment, for each
* 1M segment, for each 8M segment, for each 64M segment,
* and for each chromosome (which is assumed to be less than
* 512M.) A range goes into the smallest bin it will fit in. */
public static int rangeToBin(int start, int end) throws IllegalArgumentException {
int startBin = start, endBin = end-1, i;
if (start < 0) { throw new IllegalArgumentException("Start < 0");}
if (end < 0) { throw new IllegalArgumentException("End < 0");}
if (start > binSizes[0]) { throw new IllegalArgumentException("Start > 512M");}
if (end > binSizes[0]) { throw new IllegalArgumentException("End > 512M");}
startBin >>= binFirstShift;
endBin >>= binFirstShift;
for (i=0; i < binOffsets.length; ++i) {
if (startBin == endBin) {
return binOffsets[i] + startBin;
}
startBin >>= binNextShift;
endBin >>= binNextShift;
}
throw new IllegalArgumentException(String.format("start %d, end %d out of range in findBin (max is 512M)", start, end));
}
/* returns the set of bins that cover a specified range */
public static List<Integer> rangeToBins(int start, int end) throws IllegalArgumentException {
if (start < 0) { throw new IllegalArgumentException("Start < 0");}
if (end < 0) { throw new IllegalArgumentException("End < 0");}
if (start > binSizes[0]) { throw new IllegalArgumentException("Start > 512M");}
if (end > binSizes[0]) { throw new IllegalArgumentException("End > 512M");}
List<Integer> output = new ArrayList<Integer>();
output.add(0);
for (int level = 1; level < binSizes.length; level++) {
int binSize = binSizes[level];
int first = start / binSize;
int last = end / binSize;
for (int i = first; i <= last; i++) {
output.add(i + binOffsets[binOffsets.length - level - 1]);
}
}
return output;
}
public static String commaJoin(List<Integer> ints) {
if (ints.size() == 0) {
return "";
}
StringBuilder sb = new StringBuilder();
sb.append(ints.get(0));
for (int i = 1; i < ints.size(); i++) {
sb.append("," + ints.get(i));
}
return sb.toString();
}
}