package org.seqcode.gsebricks.verbs; import java.util.*; import org.seqcode.genome.location.Region; /** * DistanceGenerator takes two Expander<Region,Region>, a source and a * sink. When execute() is presented with a Region, DistanceGenerator * applies the two expanders to that region to generate a set of * sources and a set of sinks. It then tries to map each source to * the nearest sink without reusing any sink. There is a maximum * distance (distanceLimit) for the matching. * * execute() doesn't allow you to determine the distance for any given * source-sink; instead, it returns an iterator over all the distances * with negative values indicating a source that couldn't be mapped * (no sinks left) or was too far. * * This class is useful, for example, for mapping binding events to * motifs. */ public class DistanceGenerator implements Expander<Region,Integer> { private Expander<Region,Region> sourcemapper, sinkmapper; /* how far is "toofar" */ private int distanceLimit = 1000; /* how much should we expand the input Region when presenting it to the sinkmapper */ private int expandInputRegion; public DistanceGenerator(Expander<Region,Region> sourcemapper, Expander<Region,Region> sinkmapper) { this.sourcemapper = sourcemapper; this.sinkmapper = sinkmapper; expandInputRegion = 1000; distanceLimit = 1000; } public DistanceGenerator(Expander<Region,Region> sourcemapper, Expander<Region,Region> sinkmapper, int expandInputRegion, int distanceLimit) { this.sourcemapper = sourcemapper; this.sinkmapper = sinkmapper; this.expandInputRegion = expandInputRegion; this.distanceLimit = distanceLimit; } /** * returns a distance of -1 for any source that can't be * matched to a sink and -2 for anything that's too far */ public Iterator<Integer> execute(Region region) { ArrayList<Integer> sourcelist, sinklist, distances; Integer[] sources, sinks; sourcelist = new ArrayList<Integer>(); sinklist = new ArrayList<Integer>(); Iterator<Region> sourceIter = sourcemapper.execute(region); Region sinkRegion = region.expand(-1*expandInputRegion,expandInputRegion); Iterator<Region> sinkIter = sinkmapper.execute(sinkRegion); while (sourceIter.hasNext()) { Region r = sourceIter.next(); sourcelist.add((r.getStart() + r.getEnd())/2); } while (sinkIter.hasNext()) { Region r = sinkIter.next(); sinklist.add((r.getStart() + r.getEnd())/2); } // System.err.println("Regin is " + region); // System.err.println("Sources are " + sourcelist); // System.err.println("Sinks are " + sinklist); sources = sourcelist.toArray(new Integer[0]); sinks = sinklist.toArray(new Integer[0]); Arrays.sort(sources); Arrays.sort(sinks); distances = new ArrayList<Integer>(); int todo = sources.length; int snkleft = sinks.length; for (int i = 0; i < sources.length - 1; i++) { if (sources[i] == sources[i+1]) { sources[i] = -1; todo--; } } int before = 0, after = 0; while ((todo > 0) && (snkleft > 0)) { boolean anything = false; for (int i = 0; i < sources.length; i++) { if (sources[i] == -1) {continue;} while ((after <= sinks.length - 1) && ((sinks[after] < sources[i]) || (sinks[after] == -1))) {after++;} before = after - 1; while ((before > 0) && (sinks[before] == -1)) {before--;} int d1 = Integer.MAX_VALUE, d2 = Integer.MAX_VALUE, d3 = Integer.MAX_VALUE, d4 = Integer.MAX_VALUE; int prevsrc = i - 1, nextsrc = i + 1; while ((prevsrc > 0) && (sources[prevsrc] == -1)) {prevsrc--;} while ((nextsrc < sources.length - 1) && (sources[nextsrc] == -1)) {nextsrc++;} if ((before >= 0) && (prevsrc >= 0) && (prevsrc != i) && (sources[prevsrc] != -1) && (sinks[before] != -1)) { d1 = Math.abs(sources[prevsrc] - sinks[before]); } if ((before >= 0) && (sinks[before] != -1)) { d2 = Math.abs(sources[i] - sinks[before]); } if ((after < sinks.length) && (sinks[after] != -1)) { d3 = Math.abs(sources[i] - sinks[after]); } if ((after < sinks.length) && (sinks[after] != -1) && (nextsrc < sources.length) && (nextsrc != i) && (sources[nextsrc] != -1)) { d4 = Math.abs(sources[nextsrc] - sinks[after]); } // System.err.println("sources[i] = " + sources[i] + " sinks are " + Arrays.toString(sinks)); // System.err.println("d1=" + d1 + " d2=" + d2 + " d3="+ d3 + " d4=" + d4); if (d2 < d3) { if (d2 < d1) { if (d2 < distanceLimit) { // System.out.println(d2 + "\t" + region.getChrom() + "\t" + sources[i] + "\t" + sinks[before]); distances.add(d2); } else { distances.add(-2); } sources[i] = -1; sinks[before] = -1; todo--; snkleft--; anything = true; } } else { if (d3 < d4) { if (d3 < distanceLimit) { distances.add(d3); // System.out.println(d3 + "\t" + region.getChrom() + "\t" + sources[i] + "\t" + sinks[after]); } else { distances.add(-2); } sources[i] = -1; sinks[after] = -1; todo--; snkleft--; anything = true; } } } while (todo > 0) { distances.add(-1); todo--; } if (!anything) { System.err.println("Didn't do anything"); for (int i = 0; i < sources.length; i++) { System.err.print(sources[i] + " "); } System.err.println(""); for (int i = 0; i < sinks.length; i++) { System.err.print(sinks[i] + " "); } System.err.println(""); throw new RuntimeException("Stuck!"); } } // System.err.println("DG is returning " + distances.size()); return distances.iterator(); } }