package operonClustering;
import genomeObjects.GenomicElement;
import genomeObjects.GenomicElementAndQueryMatch;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
public class DicebyClusterID implements OperonDissimilarityMeasure{
@Override
public double computeDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2) {
//initialize lists
ArrayList<Integer> O1Values = new ArrayList<Integer>();
ArrayList<Integer> O2Values = new ArrayList<Integer>();
int NegativeCounter = -10;
//add elements
//if clusterID = 0, this is really probably unique, treat all cluster == 0 as unique sets.
for (GenomicElementAndQueryMatch E: O1){
if (E.getE().getClusterID() == 0){
NegativeCounter--;
O1Values.add(NegativeCounter);
} else {
O1Values.add(E.getE().getClusterID());
}
}
for (GenomicElementAndQueryMatch E: O2){
if (E.getE().getClusterID() == 0){
NegativeCounter--;
O2Values.add(NegativeCounter);
} else {
O2Values.add(E.getE().getClusterID());
}
}
//Initialize values
double Dissimilarity = 0;
double NumIntersecting = 0;
double SizeO1;
double SizeO2;
double SizeUnion = 0;
//Hash Sets
HashSet<Object> O1Hash = new HashSet<Object>(O1Values);
HashSet<Object> O2Hash = new HashSet<Object>(O2Values);
HashSet<Object> IntersectionHash = new HashSet<Object>(O1Values);
HashSet<Object> UnionHash = new HashSet<Object>(O1Values);
IntersectionHash.retainAll(O2Hash);
UnionHash.addAll(O2Hash);
SizeO1 = O1.size();
SizeO2 = O2.size();
//Find all intersecting types, and find the number that intersect.
for (Object O : IntersectionHash){
NumIntersecting = NumIntersecting + Math.min(Collections.frequency(O1Values, O), Collections.frequency(O2Values, O));
}
//compute union
SizeUnion = SizeO1 + SizeO2 - NumIntersecting;
if (!((SizeO1 == 0) && (SizeO2 == 0))){
Dissimilarity = 1-(2*NumIntersecting)/(SizeO1+SizeO2);
} else { //divide by zero case
Dissimilarity = 0;
}
//Dice Measure
return Dissimilarity;
}
}