/* * Created on April 1, 2009 * */ package org.seqcode.gseutils; import java.util.*; import org.seqcode.gseutils.Interval; /** * (Adapted from RunningOverlapSum) * * This is a data structure originally designed to analyze the overlapping * reads of a ChIP-Seq experiment, but since adapted to several more uses. * * Its purpose is to track a series of intervals over a region (in this case, * a region identified with a particular chromosome and genome), and to then * be able to answer queries about the number of times that locations within * that region have been "covered" by on or more intervals. * * The method .collectRegions(int) is the most-used interface -- it collects * a series of (disjoint) regions that are each continuously covered by * *at least* 'int' intervals. * * @author Tim & Alex */ public class OverlapSum { private TreeMap<Integer,Integer> changes; public OverlapSum() { changes = new TreeMap<Integer,Integer>(); } public OverlapSum(Collection<Interval> rs) { changes = new TreeMap<Integer,Integer>(); if(rs.isEmpty()) { throw new IllegalArgumentException(); } for(Interval r : rs) { addInterval(r.start, r.end); } } public void clear() { changes.clear(); } public void combineWith(OverlapSum s) { for(Integer pt : s.changes.keySet()) { if(!changes.containsKey(pt)) { changes.put(pt, 0); } changes.put(pt, changes.get(pt) + s.changes.get(pt)); } } public void addInterval(int start, int end) { if(changes.containsKey(start)) { changes.put(start, changes.get(start) + 1); } else { changes.put(start, 1); } /** * I'm assuming that intervals, like regions, are inclusive of their * end points... * This needs to put the changepoint at one past the position where the * interval ends to account for intervals being inclusive. -Bob */ if(changes.containsKey(end + 1)) { changes.put(end + 1, changes.get(end + 1) - 1); } else { changes.put(end + 1, -1); } } public void addInterval(Interval intv) { addInterval(intv.start, intv.end); } public int[][] getChangePoints() { int[][] array = new int[changes.size()][]; int i = 0; for(int change : changes.keySet()) { int[] pair = new int[2]; pair[0] = change; pair[1] = changes.get(change); array[i++] = pair; } return array; } /* public void addRegion(Region r) { if(!genome.equals(r.getGenome())) { throw new IllegalArgumentException(r.getGenome().toString()); } if(!chrom.equals(r.getChrom())) { throw new IllegalArgumentException(r.getChrom()); } int start = r.getStart(); int end = r.getEnd(); if(changes.containsKey(start)) { changes.put(start, changes.get(start) + 1); } else { changes.put(start, 1); } // This needs to put the changepoint at one past the position where the // region ends to account for regions being inclusive. -Bob if(changes.containsKey(end + 1)) { changes.put(end + 1, changes.get(end + 1) - 1); } else { changes.put(end + 1, -1); } } */ public int getMaxOverlap() { int max = 0; int running = 0; for(int pc : changes.keySet()) { int delta = changes.get(pc); running += delta; max = Math.max(max, running); } return max; } public int getMaxOverlap(int start, int end) { int max = 0; int running = 0; Map<Integer,Integer> map = changes.headMap(end); for(int pc : map.keySet()) { int delta = map.get(pc); running += delta; if(pc >= start && pc <= end) { max = Math.max(max, running); } } return max; } public int countOverlapping(int start, int end) { int count = 0; Map<Integer,Integer> map = changes.headMap(end); int running = 0; for(int pc : map.keySet()) { int delta = map.get(pc); running += delta; if(pc >= start && delta == -1) { count += 1; } } count += running; return count; } public boolean hasOverlap(OverlapSum sum, int thresh) { Iterator<Integer> chs = new MergingIterator<Integer>(changes.keySet().iterator(), sum.changes.keySet().iterator()); int thisCount = 0, thatCount = 0; while(chs.hasNext()) { Integer change = chs.next(); if(changes.containsKey(change)) { thisCount += changes.get(change); } if(sum.changes.containsKey(change)) { thatCount += sum.changes.get(change); } if(thisCount >= thresh && thatCount >= thresh) { return true; } } return false; } public Collection<Interval> collect(int threshold) { if(threshold < 1) { throw new IllegalArgumentException(String.valueOf(threshold)); } LinkedList<Interval> regions = new LinkedList<Interval>(); int runningSum = 0; int rstart = -1, rend = -1; for(int pc : changes.keySet()) { int delta = changes.get(pc); if(runningSum < threshold && runningSum + delta >= threshold) { rstart = pc; } if(runningSum >= threshold && runningSum + delta < threshold) { //subtract 1 from the endpoint because regions are inclusive rend = pc - 1; Interval intv = new Interval(rstart, rend); regions.addLast(intv); rstart = rend = -1; } runningSum += delta; if(runningSum < 0) { throw new IllegalStateException(String.valueOf(runningSum)); } } if(runningSum > 0) { throw new IllegalStateException(String.valueOf(runningSum)); } return regions; } public TreeMap<Integer, Integer> getChangeMap() { return changes; } }