package operonClustering;
import genomeObjects.GenomicElement;
import genomeObjects.GenomicElementAndQueryMatch;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
public class DicebyAnnotation implements OperonDissimilarityMeasure{
@Override
public double computeDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2) {
//initialize lists
ArrayList<String> O1Annotations = new ArrayList<String>();
ArrayList<String> O2Annotations = new ArrayList<String>();
//add elements
for (GenomicElementAndQueryMatch E: O1){
O1Annotations.add(E.getE().getAnnotation().toUpperCase());
}
for (GenomicElementAndQueryMatch E: O2){
O2Annotations.add(E.getE().getAnnotation().toUpperCase());
}
//Hash Sets
HashSet<Object> O1Hash = new HashSet<Object>(O1Annotations);
HashSet<Object> O2Hash = new HashSet<Object>(O2Annotations);
HashSet<Object> IntersectionHash = new HashSet<Object>(O1Annotations);
HashSet<Object> UnionHash = new HashSet<Object>(O1Annotations);
IntersectionHash.retainAll(O2Hash);
UnionHash.addAll(O2Hash);
//Initialize values
double Dissimilarity = 0;
double NumIntersecting = 0;
double SizeO1;
double SizeO2;
double SizeUnion = 0;
SizeO1 = O1.size();
SizeO2 = O2.size();
//Find all intersecting types, and find the number that intersect.
for (Object O : IntersectionHash){
NumIntersecting = NumIntersecting + Math.min(Collections.frequency(O1Annotations, O), Collections.frequency(O2Annotations, O));
}
//compute union
SizeUnion = SizeO1 + SizeO2 - NumIntersecting;
if (!((SizeO1 == 0) && (SizeO2 == 0))){
Dissimilarity = 1-(2*NumIntersecting)/(SizeO1+SizeO2);
} else { //divide by zero case
Dissimilarity = 0;
}
//Dice Measure
return Dissimilarity;
}
}