package org.seqcode.gsebricks.verbs.location;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.seqcode.genome.location.Gene;
import org.seqcode.gsebricks.verbs.Filter;
import org.seqcode.gseutils.NotFoundException;
import org.seqcode.gseutils.Pair;
import org.seqcode.gseutils.SetTools;
public class UniqueishGeneFilter<X extends Gene> implements Filter<X, X> {
private int startBuffer;
private int endBuffer;
private Map<Pair<String,Integer>,Set<X>> startSeen;
private Map<Pair<String,Integer>,Set<X>> endSeen;
private SetTools<X> st;
public UniqueishGeneFilter(int startBuffer, int endBuffer) {
this.startBuffer = startBuffer;
this.endBuffer = endBuffer;
startSeen = new HashMap<Pair<String,Integer>,Set<X>>();
endSeen = new HashMap<Pair<String,Integer>,Set<X>>();
this.st = new SetTools<X>();
}
public X execute(X a) {
int start, end;
switch(a.getStrand()) {
default:
case '+':
start = a.getStart();
end = a.getEnd();
case '-':
start = a.getEnd();
end = a.getStart();
}
Set<X> startSet = new HashSet<X>();
if (startBuffer!=-1) {
for (int l=start-startBuffer; l<=start+startBuffer; l++) {
Pair<String,Integer> tmpPair = new Pair<String,Integer>(a.getChrom(),l);
if (startSeen.containsKey(tmpPair)) {
startSet.addAll(startSeen.get(tmpPair));
}
if (l==3621296) {
if (startSeen.containsKey(tmpPair)) {
//System.err.println(startSeen.get(tmpList).size());
} else {
//System.err.println("empty");
}
}
}
}
Set<X> endSet = new HashSet<X>();
if (endBuffer!=-1) {
for (int r=end-endBuffer; r<=end+endBuffer; r++) {
Pair<String,Integer> tmpPair = new Pair<String,Integer>(a.getChrom(),r);
if (endSeen.containsKey(tmpPair)) {
endSet.addAll(endSeen.get(tmpPair));
}
}
}
if ((startBuffer==-1 && endSet.size()>0) ||
(endBuffer==-1 && startSet.size()>0) ||
st.intersects(startSet,endSet)) {
//System.err.println(a);
return null;
}
Pair<String,Integer> tmpPair = new Pair<String,Integer>(a.getChrom(),start);
if (!startSeen.containsKey(tmpPair)) {
startSeen.put(tmpPair, new HashSet<X>());
}
startSeen.get(tmpPair).add(a);
tmpPair = new Pair<String,Integer>(a.getChrom(),end);
if (!endSeen.containsKey(tmpPair)) {
endSeen.put(tmpPair, new HashSet<X>());
}
endSeen.get(tmpPair).add(a);
return a;
}
}