package operonClustering;
import genomeObjects.GenomicElementAndQueryMatch;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
public class JaccardbyAnnotation implements OperonDissimilarityMeasure {
@Override
public double computeDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2) {
//initialize lists
ArrayList<String> O1Annotations = new ArrayList<String>();
ArrayList<String> O2Annotations = new ArrayList<String>();
//add elements
for (GenomicElementAndQueryMatch E: O1){
O1Annotations.add(E.getE().getAnnotation().toUpperCase());
}
for (GenomicElementAndQueryMatch E: O2){
O2Annotations.add(E.getE().getAnnotation().toUpperCase());
}
//Initialize values
double Dissimilarity = 0;
double NumIntersecting = 0;
double SizeO1;
double SizeO2;
double SizeUnion = 0;
//Hash Sets
HashSet<Object> O1Hash = new HashSet<Object>(O1Annotations);
HashSet<Object> O2Hash = new HashSet<Object>(O2Annotations);
HashSet<Object> IntersectionHash = new HashSet<Object>(O1Annotations);
HashSet<Object> UnionHash = new HashSet<Object>(O1Annotations);
IntersectionHash.retainAll(O2Hash);
UnionHash.addAll(O2Hash);
SizeO1 = O1.size();
SizeO2 = O2.size();
//Find all intersecting types, and find the number that intersect.
for (Object O : IntersectionHash){
NumIntersecting = NumIntersecting + Math.min(Collections.frequency(O1Annotations, O), Collections.frequency(O2Annotations, O));
}
//compute union
SizeUnion = SizeO1 + SizeO2 - NumIntersecting;
if (SizeUnion != 0) {
Dissimilarity = 1-(NumIntersecting/SizeUnion);
} else { //divide by zero case
Dissimilarity = 0;
}
return Dissimilarity;
}
}