CustomDissimilarity.java example

Explorer
JContextExplorer-master
- JContextExplorer
  - src
package operonClustering;

import genomeObjects.GenomicElement;
import genomeObjects.GenomicElementAndQueryMatch;

import java.awt.Point;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;

import org.biojava3.core.sequence.Strand;

import moduls.frm.children.GapPoint;
import moduls.frm.children.GapPointMapping;

public class CustomDissimilarity implements Serializable {

	/**
	 * 
	 */
	private static final long serialVersionUID = 629102024437887278L;
	//Fields
	//General
	private String Name;
	private String AmalgamationType;
	private LinkedList<String> Factors;
	private Double ImportanceFraction;
	
	//Factor 1: Presence/absence of common genes
	private String CGCompareType;
	private boolean CGDuplicatesUnique;
	private double CGWeight;
	private int CGImportance;
	
	//Factor 2: Presence/absence of common motifs
	private LinkedList<String> CMMotifNames;
	private String CMCompareType;
	private boolean CMDuplicatesUnique;
	private double CMWeight;
	private int CMImportance;
	public boolean CMExcludeOperonHead;
	public boolean CMExcludeOperonTail;
	
	//Factor 3: Gene order
	private String GOCompareType;
	private boolean HeadPos;
	private boolean PairOrd;
	private boolean LinearOrd;
	private double RelWeightHeadPos;
	private double RelWeightPairOrd;
	private double RelWeightLinearOrd;
	private double GOWeight;
	private int GOImportance;
	
	//Factor 4: Intragenic Gap Sizes
	private GapPointMapping GPM;
	private double GGWeight;
	private int GGImportance;
	
	//Factor 5: Changes in strandedness
	private boolean SSIndividualGenes;
	private boolean SSWholeGroup;
	private double SSRelWeightIndGenes;
	private double SSRelWeightWholeGroup;
	private double SSWeight;
	private int SSImportance;
	
	//extra
	public boolean DisplayMsgAboutInternalMotif = false;
	
	//Constructor
	public CustomDissimilarity(String name2,String amalgamationType2,LinkedList<String> factors2, double ImpFactor,
			String cGCompareType2,boolean cGDuplicatesUnique2,double cGWeight2,int cGImportance2,
			LinkedList<String> cMMotifNames2,String cMCompareType2,boolean cMDuplicatesUnique2, boolean cMExcludeOperonHead, boolean cMExcludeOperonTail, double cMWeight2,int cMImportance2,
			boolean HeadPos, boolean PairOrd, boolean LinearOrd, double HeadPoswt, double PairOrdwt, double LinearOrdwt, double gOWeight2, int gOImportance2,
			GapPointMapping gapSizeDissMapping2, double gGWeight2,int gGImportance2,
			boolean individualGenes2, boolean wholeGroup2, double relWeightIndGenes2, double relWeightWholeGroup2, double sSWeight2, int sSImportance2){
		
		//parameters
		//general.
		this.setName(name2);
		this.Factors = factors2;
		this.AmalgamationType = amalgamationType2;
		this.ImportanceFraction = ImpFactor;
		
		//factor 1: common genes
		this.CGCompareType = cGCompareType2;
		this.CGDuplicatesUnique = cGDuplicatesUnique2;
		this.CGImportance = cGImportance2;
		this.CGWeight = cGWeight2;
		
		//factor 2: common motifs
		this.CMMotifNames = cMMotifNames2;
		this.CMCompareType = cMCompareType2;
		this.CMDuplicatesUnique = cMDuplicatesUnique2;
		this.CMExcludeOperonHead = cMExcludeOperonHead;
		this.CMExcludeOperonTail = cMExcludeOperonTail;
		
		//excluding both the head and tail is meaningless, so ignore this possibility.
		if (CMExcludeOperonHead && CMExcludeOperonTail){
			CMExcludeOperonHead = false;
			CMExcludeOperonTail = false;
		}
		
		this.CMImportance = cMImportance2;
		this.CMWeight = cMWeight2;
		
		//factor 3: gene order
		this.HeadPos = HeadPos;
		this.PairOrd = PairOrd;
		this.LinearOrd = LinearOrd;
		this.RelWeightHeadPos = HeadPoswt;
		this.RelWeightPairOrd = PairOrdwt;
		this.RelWeightLinearOrd = LinearOrdwt;
		
		this.GOImportance = gOImportance2;
		this.GOWeight = gOWeight2;
		
		//factor 4: gene gaps
		this.GPM = gapSizeDissMapping2;
		this.GGImportance = gGImportance2;
		this.GGWeight = gGWeight2;
		
		//factor 5: Strandedness
		this.SSIndividualGenes = individualGenes2;
		this.SSWholeGroup = wholeGroup2;
		this.SSRelWeightIndGenes = relWeightIndGenes2;
		this.SSRelWeightWholeGroup = relWeightWholeGroup2;
		this.SSImportance = sSImportance2;
		this.SSWeight = sSWeight2;
	}
	
	//dummy constructor
	public CustomDissimilarity(){
		
	}
	// -------- Compute Dissimilarity -------------------------//
	// -------- General ----------//
	
	//Generalized Dice/Jaccard
	public double GeneralizedDiceOrJaccard(LinkedList<Object> O1, LinkedList<Object> O2, boolean TreatDuplicatesAsUnique, String Type){
		
		//Initialize values
		double Dissimilarity = 0;
		double NumIntersecting = 0;
		double SizeO1;
		double SizeO2;
		double SizeUnion = 0;
		
		//Hash Sets
		HashSet<Object> O1Hash = new HashSet<Object>(O1);
		HashSet<Object> O2Hash = new HashSet<Object>(O2);
		HashSet<Object> IntersectionHash = new HashSet<Object>(O1);
		HashSet<Object> UnionHash = new HashSet<Object>(O1);
		IntersectionHash.retainAll(O2Hash);
		UnionHash.addAll(O2Hash);
		
		if (TreatDuplicatesAsUnique){
			SizeO1 = O1.size();
			SizeO2 = O2.size();

			//Find all intersecting types, and find the number that intersect.
			for (Object O : IntersectionHash){
				NumIntersecting = NumIntersecting + Math.min(Collections.frequency(O1, O), Collections.frequency(O2, O));
			}
			
			//compute union
			SizeUnion = SizeO1 + SizeO2 - NumIntersecting;
			
		} else {

			SizeO1 = O1Hash.size();
			SizeO2 = O2Hash.size();
			NumIntersecting = IntersectionHash.size();
			SizeUnion = UnionHash.size();
		}

		//compute dissimilarity from computed size / union / intersection values
		if (Type.equals("Dice")){
			if (!((SizeO1 == 0) && (SizeO2 == 0))){
				Dissimilarity = 1-(2*NumIntersecting)/(SizeO1+SizeO2);
			} else { //divide by zero case
				Dissimilarity = 0;
			}
		} else {	//Jaccard
			if (SizeUnion != 0) {
				Dissimilarity =  1-(NumIntersecting/SizeUnion);
			} else { //divide by zero case
				Dissimilarity = 0;
			}
			
		}
		
		//debugging
//		if (O1.contains("BOP") && O1.contains("Promoter") &&
//				O2.contains("BOP") && O2.contains("Promoter")){
//			
//			System.out.println("O1 Objects:");
//			for (Object o : O1){
//				System.out.println(o.toString());
//			}
//			System.out.println("O2 Objects:");
//			for (Object o : O2){
//				System.out.println(o.toString());
//			}
//			
//			System.out.println("|O1|: " + SizeO1);
//			System.out.println("|O2|: " + SizeO2);
//			System.out.println("|O1 A O2|: " + NumIntersecting);
//			System.out.println("Diss: " + Dissimilarity);
//		}

		return Dissimilarity;
	}

	//Head position based dissimilarity (GO)
	public double HeadPosDiss(LinkedList<Object> O1Values, LinkedList<Object> O2Values, boolean isStrandFlipped){
		double HeadPosDissimilarity = 0.0;
		
		/*
		 * Algorithm:
		 * (1) determine # common elements (dup. unique)
		 * (2) Set first as pivot, head = start of first list
		 * (3) count common positions from second forwards
		 * (4) count common positions from second reversed
		 * (5) retain higher count
		 * (6) diss = 1 - (higher count) / (# common elements [dup unique])
		 */
		
		//re-sizing - O1 Values must always be larger
		if (O1Values.size() < O2Values.size()){
			LinkedList<Object> Temp = O1Values;
			O1Values = O2Values;
			O2Values = Temp;
		}

		//(1) common elements
		int NumIntersecting = 0;
		HashSet<Object> O2Hash = new HashSet<Object>(O2Values);
		HashSet<Object> IntersectionHash = new HashSet<Object>(O1Values);
		IntersectionHash.retainAll(O2Hash);
		
		//Find all intersecting types, and find the number that intersect.
		for (Object O : IntersectionHash){
			NumIntersecting = NumIntersecting + Math.min(Collections.frequency(O1Values, O), Collections.frequency(O2Values, O));
		}

		//(2)-(5) Counts
		int FwdCount = 0;
		int RevCount = 0;
		int MaxCount = 0;
		
		for (int i = 0; i < O2Values.size(); i++){
			if (O2Values.get(i).equals(O1Values.get(i))){
				FwdCount++;
			}
			if (O2Values.get(i).equals(O1Values.get(O1Values.size()-1-i))){
				RevCount++;
			}
		}
		
		//old way - benefit of doubt
		//MaxCount = Math.max(FwdCount, RevCount);
		
		//new way - based on supplied strand
		if (isStrandFlipped){
			MaxCount = RevCount;
		} else {
			MaxCount = FwdCount;
		}
		
		//(6) Compute dissimilarity
		if (NumIntersecting != 0){
			HeadPosDissimilarity = 1 - ((double)MaxCount/(double)NumIntersecting);
		} else {
			HeadPosDissimilarity = 0;
		}

		return HeadPosDissimilarity;
	}
	
	//Pair ordering based dissimilarity (GO)
	public double PairOrdDiss(LinkedList<Object> O1Values, LinkedList<Object> O2Values, boolean isStrandFlipped){
		double PairOrdDissimilarity = 0;
		
		/*
		 * Algorithm:
		 * (1) Build O1 Adjacencies
		 * (2) Build O2 Adjacencies + O2 reverse adjacencies
		 * (3) Count common, and take higher
		 */
		
		//Initialize adjacencies
		LinkedList<LinkedList<Object>> O1Adjacencies = new LinkedList<LinkedList<Object>>();
		LinkedList<LinkedList<Object>> O2AdjacenciesFwd = new LinkedList<LinkedList<Object>>();
		LinkedList<LinkedList<Object>> O2AdjacenciesRev = new LinkedList<LinkedList<Object>>();
		
		//Build adjacencies
		for (int i = 0; i <O1Values.size()-1; i++){
			LinkedList<Object> SingleAdjacency = new LinkedList<Object>();
			SingleAdjacency.add(O1Values.get(i));
			SingleAdjacency.add(O1Values.get(i+1));
			O1Adjacencies.add(SingleAdjacency);
		}
		
		for (int i = 0; i <O2Values.size()-1; i++){
			LinkedList<Object> FwdAdjacency = new LinkedList<Object>();
			FwdAdjacency.add(O2Values.get(i));
			FwdAdjacency.add(O2Values.get(i+1));
			O2AdjacenciesFwd.add(FwdAdjacency);
		}
		
		for (int i = O2Values.size()-2; i >= 0; i--){
			LinkedList<Object> RevAdjacency = new LinkedList<Object>();
			RevAdjacency.add(O2Values.get(i+1));
			RevAdjacency.add(O2Values.get(i));
			O2AdjacenciesRev.add(RevAdjacency);
		}
		
		//find intersection
		O2AdjacenciesFwd.retainAll(O1Adjacencies);
		O2AdjacenciesRev.retainAll(O1Adjacencies);
		
		//only consider the
		
		//compute dissimilarity
		int SmallerSize = Math.min(O1Values.size(), O2Values.size());
		
		//old way - don't consider pre-determined strandedness
		//int MostAdjacencies = Math.max(O2AdjacenciesFwd.size(), O2AdjacenciesRev.size());
		
		//new way: only if the strand is explicitly reported as flipped, consider the reverse
		int MostAdjacencies = -1;
		if (isStrandFlipped){
			MostAdjacencies = O2AdjacenciesRev.size();
		} else {
			MostAdjacencies = O2AdjacenciesFwd.size();
		}
		
		if (SmallerSize > 1){
			PairOrdDissimilarity = 1 - ((double)MostAdjacencies/(double)(SmallerSize-1));
		} else {
			PairOrdDissimilarity = 0;
		}

		return PairOrdDissimilarity;
	}
	
	//Dissimilarity by overall shape (tolerate insertions/deletions) (GO)
	public double LinearOrderDiss(LinkedList<Object> O1Values, LinkedList<Object> O2Values, boolean isStrandFlipped){
		
		/*
		 * Algorithm:
		 * (1) Build O2 in reverse order
		 * (2) Determine list of elements common to both sets, in the order they occur
		 * (3) check for disagreements in ordering
		 * 
		 * Current state - duplicates pose problems
		 * if there is an unequal number of duplicates, very hard to compare
		 * rather, just count up until the maximum number of common duplicates
		 * this is likely only a big problem when there are a very different number
		 * of duplicates.  Something to think about / worry about for the future.
		 */
		
		/*
		 * Debugging
		 * 
		 */
		
		//initialize output
		double LinearOrderDissimilarity = 0.0;
		boolean MultipleElementsExist = false;
		boolean DuplicatesExist = false;
		
		//build reverse list
		LinkedList<Object> O2ValuesRev = new LinkedList<Object>();
		for (int i = O2Values.size()-1; i >=0; i--){
			O2ValuesRev.add(O2Values.get(i));
		}
		
		//build hash of counts - determine the number of elements appropriate
		LinkedHashMap<Object, Integer> CommonValuesCounts = new LinkedHashMap<Object, Integer>();
		
		for (Object O : O1Values){
			if (O2Values.contains(O)){
				int Count = 1;
				if (CommonValuesCounts.get(O) != null){
					Count = CommonValuesCounts.get(O);
					Count++;
					DuplicatesExist = true;
				}
				CommonValuesCounts.put(O,Count);
			}
		}
		
		//a re-ordering can only possibly occur if multiple elements exist.
		if (CommonValuesCounts.size() > 1){
			MultipleElementsExist = true;
		}
		
		//common elements from each set
		LinkedList<Object> O1CommonValues = new LinkedList<Object>();
		LinkedList<Object> O2CommonValues = new LinkedList<Object>();
		LinkedList<Object> O2RevCommonValues = new LinkedList<Object>();
				
		
		//keep track of the number of common values in each list
		LinkedHashMap<Object, Integer> O1CommonCounts = new LinkedHashMap<Object, Integer>();
		LinkedHashMap<Object, Integer> O2CommonCounts = new LinkedHashMap<Object, Integer>();
		LinkedHashMap<Object, Integer> O2RevCommonCounts = new LinkedHashMap<Object, Integer>();
		
		//build common elements lists
		
		//O1 List
		for (Object O : O1Values){
			if (O2Values.contains(O)){
				
				//check current count with master list, and update hash.
				int Count = 1;
				if (O1CommonCounts.get(O) != null){
					Count = O1CommonCounts.get(O);
					Count++;
				}
				O1CommonCounts.put(O,Count);
				
				//optionally add this to the list.
				if (Count <= CommonValuesCounts.get(O)){				
					O1CommonValues.add(O);
				}

			}			
		}
		
		//O2 List
		for (Object O : O2Values){
			if (O1Values.contains(O)){
				
				//check current count with master list, and update hash.
				int Count = 1;
				if (O2CommonCounts.get(O) != null){
					Count = O2CommonCounts.get(O);
					Count++;
				}
				O2CommonCounts.put(O,Count);
				
				//optionally add this to the list.
				if (Count <= CommonValuesCounts.get(O)){				
					O2CommonValues.add(O);
				}

			}
		}

		//O2Rev List
		for (Object O : O2ValuesRev){
			if (O1Values.contains(O)){
				
				//check current count with master list, and update hash.
				int Count = 1;
				if (O2RevCommonCounts.get(O) != null){
					Count = O2RevCommonCounts.get(O);
					Count++;
				}
				O2RevCommonCounts.put(O,Count);
				
				//optionally add this to the list.
				if (Count <= CommonValuesCounts.get(O)){				
					O2RevCommonValues.add(O);
				}
				
			}
		}
		
//		//debugging.
//		if (O1Values.size() == 3 && O2Values.size() == 3 
//				&& O1Values.contains(15) && O1Values.contains(309) 
//				&& O2Values.contains(15) && O2Values.contains(309)
//				){
//			System.out.println("Breakpoint!");
//		}

		
		//if the first list agrees with either of the other lists, record no change in gene order.
		//otherwise, maximum dissimilarity.
		if (O1CommonValues.equals(O2CommonValues) ||
				(O1CommonValues.equals(O2RevCommonValues) && isStrandFlipped)){
			LinearOrderDissimilarity = 0.0;
			
			//Tricky balance between 0 transitivity and segregating single-gene commonalities
			//
			//Solution: need to eliminate 0 transitivity.
			//To eliminate single-gene commonalities,
			//create a better context set.
			
//			//if only one element exists in common, need to break 0 transitivities.
//			if (O1CommonValues.size()==1){
//				LinearOrderDissimilarity = 1.0;
//			}
			
			//if there is only one kind of common element, no way to talk about re-ordering
			if (!MultipleElementsExist){
				LinearOrderDissimilarity = 1.0;
			}
			
		} else{
			LinearOrderDissimilarity = 1.0;
		}
		
		//return output
		return LinearOrderDissimilarity;
		
	}
	
	// ------- Factors -----------//
	
	//Common Genes
	public double CGDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2, String Type){
		
		double Dissimilarity = 0;
		
		LinkedList<Object> O1Values = new LinkedList<Object>();
		LinkedList<Object> O2Values = new LinkedList<Object>();
		
		//determine appropriate data types
		if (Type.equals("annotation")){
			
			//add elements
			for (GenomicElementAndQueryMatch E: O1){
				O1Values.add(E.getE().getAnnotation().toUpperCase());
			}
			
			for (GenomicElementAndQueryMatch E: O2){
				O2Values.add(E.getE().getAnnotation().toUpperCase());
			}
			
		} else {

			int NegativeCounter = -10;
			
			//add elements
			//if clusterID = 0, this is really probably unique, treat all cluster == 0 as unique sets.
			for (GenomicElementAndQueryMatch E: O1){
				if (E.getE().getClusterID() == 0){
					NegativeCounter--;
					O1Values.add(NegativeCounter);
				} else {
					O1Values.add(E.getE().getClusterID());
				}

			}
			
			for (GenomicElementAndQueryMatch E: O2){
				if (E.getE().getClusterID() == 0){
					NegativeCounter--;
					O2Values.add(NegativeCounter);
				} else {
					O2Values.add(E.getE().getClusterID());
				}
			}
			
		}
		
		//pass into method
		Dissimilarity = GeneralizedDiceOrJaccard(O1Values,O2Values,CGDuplicatesUnique,CGCompareType);

		//Maximum dissimilarity is 1, minimum is 0.
		if (Dissimilarity > 1){
			Dissimilarity = 1;
		} else if (Dissimilarity <= 0){
			Dissimilarity = 0;
		}
		
		return Dissimilarity;

	}
		
	//Common Motifs
	public double CMDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2, String Type){
		
		double Dissimilarity = 0;
		
		/*
		 * Approach: Find all common gene pairs, and assess all associated motifs,
		 * using dice or jaccard.  Take average of all individual common gene motif-related 
		 * dissimilarities.
		 * 
		 * In the case of multiple duplicate common genes existing between gene groupings,
		 * compute all possible pairwise dissimilarities between genes, and use the
		 * Munkres-Hungarian algorithm to determine the assignments that minimize the sum
		 * of all dissimilarities.
		 * 
		 * If there is only one common element across sets (the most common case), then
		 * the matrix is not very interesting and the Hungarian algorithm just returns the element.
		 */

//		//debugging
//		boolean Breakit = false;
//		for (GenomicElementAndQueryMatch GandE : O1){
//			if (GandE.getE().getStart() == 348280 &&
//					GandE.getE().getStop() == 349017){
//				Breakit = true;
//			}
//		}
//		if (Breakit){
//			for (GenomicElementAndQueryMatch GandE2 : O2){
//				if (GandE2.getE().getStart() == 65555 &&
//						GandE2.getE().getStop() == 66292){
//					System.out.println("Breakpoint!");
//				}
//			}
//		}
		
		
		if (Type.equals("annotation")){
			
			//First, isolate all common types.
			//initialize lists
			ArrayList<String> O1Values = new ArrayList<String>();
			ArrayList<String> O2Values = new ArrayList<String>();
			
			//add elements
			for (GenomicElementAndQueryMatch E: O1){
				O1Values.add(E.getE().getAnnotation().toUpperCase());
			}
			
			for (GenomicElementAndQueryMatch E: O2){
				O2Values.add(E.getE().getAnnotation().toUpperCase());
			}
			
			//determine intersection of common genes (by annotation)
			HashSet<String> O1Hash = new HashSet<String>(O1Values);
			HashSet<String> O2Hash = new HashSet<String>(O2Values);
			HashSet<String> Intersection = new HashSet<String>(O1Values);
			Intersection.retainAll(O2Hash);
			Intersection.retainAll(O1Hash);
			
			//Sum of Dissimilarities
			double SumDissimilarity = 0;
			
			//determine sum of dissimilarities
			for (String s : Intersection){
			
				//find all instances in each set
				LinkedList<GenomicElementAndQueryMatch> InstancesIn1 = new LinkedList<GenomicElementAndQueryMatch>();
				for (GenomicElementAndQueryMatch E: O1){
					if (E.getE().getAnnotation().toUpperCase().equals(s)){
						InstancesIn1.add(E);
					}
				}
				
				LinkedList<GenomicElementAndQueryMatch> InstancesIn2 = new LinkedList<GenomicElementAndQueryMatch>();
				for (GenomicElementAndQueryMatch E: O2){
					if (E.getE().getAnnotation().toUpperCase().equals(s)){
						InstancesIn2.add(E);
					}
				}
				
				//compute motif dissimilarity of all pairwise, to determine best matching set
				//double[][] MotifComparisons = new double[InstancesIn1.size()][InstancesIn2.size()];
				
				//make the matrix square, if it needs to be.
				int MatrixSize = Math.max(InstancesIn1.size(), InstancesIn2.size());
				double[][] MotifComparisons = new double[MatrixSize][MatrixSize];
				for (int i = 0; i < MatrixSize; i++){
					for (int j = 0; j < MatrixSize; j++){
						MotifComparisons[i][j] = 0;
					}
				}
				
				for (int i = 0; i < InstancesIn1.size(); i++){
					for (int j = 0; j < InstancesIn2.size(); j++){
						
						//retrieve all motifs
						LinkedList<Object> MotifsIn1 = InstancesIn1.get(i).getE().getAssociatedMotifsAsObjects(CMMotifNames);
						LinkedList<Object> MotifsIn2 = InstancesIn2.get(j).getE().getAssociatedMotifsAsObjects(CMMotifNames);
						
						//fill in array
						MotifComparisons[i][j] = 
								this.GeneralizedDiceOrJaccard(MotifsIn1, MotifsIn2, CMDuplicatesUnique, CMCompareType);

						
//						if (MotifsIn1.contains("BOP") && MotifsIn1.contains("Promoter") &&
//								MotifsIn2.contains("BOP") && MotifsIn2.contains("Promoter")){
//							System.out.println("Diss: " + MotifComparisons[i][j]);
//						}
					}
				}
				
				//Call Hungarian algorithm, sum dissimilarities, add to running total
				SumDissimilarity = SumDissimilarity 
						+ (HungarianAlgorithm.JCEAssignment(MotifComparisons) / (double) Math.min(InstancesIn1.size(), InstancesIn2.size()));

			}
			
			if (Intersection.size() > 0){
				//Normalize
				Dissimilarity = SumDissimilarity / Intersection.size();
			}

			
		} else { //Common Cluster ID
			
			//initialize lists
			ArrayList<Integer> O1Values = new ArrayList<Integer>();
			ArrayList<Integer> O2Values = new ArrayList<Integer>();
			
			int NegativeCounter = -10;
			
			//add elements
			//if clusterID = 0, this is really probably unique, treat all cluster == 0 as unique sets.
			for (GenomicElementAndQueryMatch E: O1){
				if (E.getE().getClusterID() == 0){
					NegativeCounter--;
					O1Values.add(NegativeCounter);
				} else {
					O1Values.add(E.getE().getClusterID());
				}

			}
			
			for (GenomicElementAndQueryMatch E: O2){
				if (E.getE().getClusterID() == 0){
					NegativeCounter--;
					O2Values.add(NegativeCounter);
				} else {
					O2Values.add(E.getE().getClusterID());
				}
			}
			
			//determine intersection of common genes (by cluster ID)
			HashSet<Integer> O1Hash = new HashSet<Integer>(O1Values);
			HashSet<Integer> O2Hash = new HashSet<Integer>(O2Values);
			HashSet<Integer> Intersection = new HashSet<Integer>(O1Values);
			Intersection.retainAll(O2Hash);
			Intersection.retainAll(O1Hash);
			
			//Sum of Dissimilarities
			double SumDissimilarity = 0;
			
			//determine sum of dissimilarities
			for (Integer s : Intersection){
			
				//find all instances in each set
				LinkedList<GenomicElementAndQueryMatch> InstancesIn1 = new LinkedList<GenomicElementAndQueryMatch>();
				for (GenomicElementAndQueryMatch E: O1){
					if (E.getE().getClusterID() == s){
						InstancesIn1.add(E);
					}
				}
				
				LinkedList<GenomicElementAndQueryMatch> InstancesIn2 = new LinkedList<GenomicElementAndQueryMatch>();
				for (GenomicElementAndQueryMatch E: O2){
					if (E.getE().getClusterID() == s){
						InstancesIn2.add(E);
					}
				}
				

				//compute motif dissimilarity of all pairwise, to determine best matching set
				//double[][] MotifComparisons = new double[InstancesIn1.size()][InstancesIn2.size()];
				
				//make the matrix square, if it needs to be.
				int MatrixSize = Math.max(InstancesIn1.size(), InstancesIn2.size());
				double[][] MotifComparisons = new double[MatrixSize][MatrixSize];
				for (int i = 0; i < MatrixSize; i++){
					for (int j = 0; j < MatrixSize; j++){
						MotifComparisons[i][j] = 0;
					}
				}

				for (int i = 0; i < InstancesIn1.size(); i++){
					for (int j = 0; j < InstancesIn2.size(); j++){
						
						//retrieve all motifs
						LinkedList<Object> MotifsIn1 = InstancesIn1.get(i).getE().getAssociatedMotifsAsObjects(CMMotifNames);
						LinkedList<Object> MotifsIn2 = InstancesIn2.get(j).getE().getAssociatedMotifsAsObjects(CMMotifNames);
						
						//fill in array
						MotifComparisons[i][j] = 
								this.GeneralizedDiceOrJaccard(MotifsIn1, MotifsIn2, CMDuplicatesUnique, CMCompareType);

					}
				}
				
				
//				//debugging
//				for (int i = 0; i < MatrixSize; i++){
//					String Line = "";
//					for (int j =0; j <MatrixSize; j++){
//						Line = Line + " " + MotifComparisons[i][j];
//					}
//					Line = Line + ";";
//					System.out.println(Line);
//				}

				//Call Hungarian algorithm, sum dissimilarities, add to running total
				SumDissimilarity = SumDissimilarity 
						+ (HungarianAlgorithm.JCEAssignment(MotifComparisons) / (double) Math.min(InstancesIn1.size(), InstancesIn2.size()));


			}
			
			if (Intersection.size() > 0){
				//Normalize
				Dissimilarity = SumDissimilarity / Intersection.size();
			}
			
		}
		
		//adjust dissimilarity before returning
		if (Dissimilarity > 1){
			Dissimilarity = 1;
		} else if (Dissimilarity < 0){
			Dissimilarity = 0;
		}
		
		return Dissimilarity;
	}
	
	//Gene Order
	public double GODissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2, String Type){
		
		//Initialize output
		double Dissimilarity = 0;
		double HeadPosDissimilarity = 0;
		double PairOrdDissimilarity = 0;
		double LinearOrdDissimilarity = 0;
		
		//Strand counts
		int StrandMatches = 0;
		
		//attempt to detect a complete strand switching
		
		
		LinkedList<Object> O1Values = new LinkedList<Object>();
		LinkedList<Object> O2Values = new LinkedList<Object>();
		
		//determine appropriate data types
		if (Type.equals("annotation")){
			
			//add elements
			for (GenomicElementAndQueryMatch E: O1){
				O1Values.add(E.getE().getAnnotation().toUpperCase());
			}
			
			for (GenomicElementAndQueryMatch E: O2){
				O2Values.add(E.getE().getAnnotation().toUpperCase());
			}
			
		} else {

			int NegativeCounter = -10;
			
			//add elements
			//if clusterID = 0, this is really probably unique, treat all cluster == 0 as unique sets.
			for (GenomicElementAndQueryMatch E: O1){
				if (E.getE().getClusterID() == 0){
					NegativeCounter--;
					O1Values.add(NegativeCounter);
				} else {
					O1Values.add(E.getE().getClusterID());
				}

			}
			
			for (GenomicElementAndQueryMatch E: O2){
				if (E.getE().getClusterID() == 0){
					NegativeCounter--;
					O2Values.add(NegativeCounter);
				} else {
					O2Values.add(E.getE().getClusterID());
				}
			}
			
		}
		
		//(1) common elements
		int NumIntersecting = 0;
		HashSet<Object> O2Hash = new HashSet<Object>(O2Values);
		HashSet<Object> IntersectionHash = new HashSet<Object>(O1Values);
		IntersectionHash.retainAll(O2Hash);
		
		//Find all intersecting types, and find the number that intersect.
		for (Object O : IntersectionHash){
			NumIntersecting = NumIntersecting + Math.min(Collections.frequency(O1Values, O), Collections.frequency(O2Values, O));
		}
		
		//determine if the total strand has flipped
		boolean isStrandFlipped = TotalGroupStrandFlip(O1,O2,Type);
		
		//Determine relative weights
		double TotalRelativeWeights = 0;
		if (HeadPos){
			//increment total weights contribution
			TotalRelativeWeights = TotalRelativeWeights + RelWeightHeadPos;
			
			//Compute head position contribution
			HeadPosDissimilarity = this.HeadPosDiss(O1Values, O2Values, isStrandFlipped);
			
		}
		if (PairOrd){
			//increment total weights contribution
			TotalRelativeWeights = TotalRelativeWeights + RelWeightPairOrd;
			
			//Compute number of common pairs contribution
			PairOrdDissimilarity = this.PairOrdDiss(O1Values, O2Values, isStrandFlipped);
		}
		
		if (LinearOrd){
			
			//increment total weights contribution
			TotalRelativeWeights = TotalRelativeWeights + RelWeightLinearOrd;
		
			
			//Compute the dissimilarity from this factor
			LinearOrdDissimilarity = this.LinearOrderDiss(O1Values, O2Values, isStrandFlipped);
			
		}
		
		//Amalgamate into dissimilarity
		Dissimilarity = (RelWeightHeadPos/TotalRelativeWeights) * HeadPosDissimilarity +
						(RelWeightPairOrd/TotalRelativeWeights) * PairOrdDissimilarity +
						(RelWeightLinearOrd/TotalRelativeWeights) * LinearOrdDissimilarity;
		
		//Maximum dissimilarity is 1, minimum is 0.
		if (Dissimilarity > 1){
			Dissimilarity = 1;
		} else if (Dissimilarity <= 0){
			Dissimilarity = 0;
		}
		
		return Dissimilarity;
	}

	//Gene Gaps
	public double GGDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2, String Type){
		
//		//breakpoint!
//		for (GenomicElementAndQueryMatch GandE : O1){
//			if (GandE.getE().getStart() == 192504 &&
//					GandE.getE().getStop() == 193139){
//				System.out.println("breakpoint!");
//			}
//		}
		
		//determine if the total strand has flipped
		boolean isStrandFlipped = TotalGroupStrandFlip(O1,O2,Type);
		
		//only for customized
		DisplayMsgAboutInternalMotif = false;
		
		//re-sizing - O1 Values must always be larger
		if (O1.size() < O2.size()){
			LinkedList<GenomicElementAndQueryMatch> Temp = O1;
			O1 = O2;
			O2 = Temp;
		}
		
		//Initialize Dissimilarity
		double Dissimilarity = 0.0;
		
		//Initialize adjacencies
		LinkedList<LinkedList<GenomicElementAndQueryMatch>> O1Adjacencies = 
				new LinkedList<LinkedList<GenomicElementAndQueryMatch>>();
		LinkedList<LinkedList<GenomicElementAndQueryMatch>> O2AdjacenciesFwd = 
				new LinkedList<LinkedList<GenomicElementAndQueryMatch>>();
		LinkedList<LinkedList<GenomicElementAndQueryMatch>> O2AdjacenciesRev = 
				new LinkedList<LinkedList<GenomicElementAndQueryMatch>>();
		
		//Build adjacencies
		for (int i = 0; i < O1.size()-1; i++){
			LinkedList<GenomicElementAndQueryMatch> SingleAdjacency = 
					new LinkedList<GenomicElementAndQueryMatch>();
			SingleAdjacency.add(O1.get(i));
			SingleAdjacency.add(O1.get(i+1));
			O1Adjacencies.add(SingleAdjacency);
		}
		
		for (int i = 0; i <O2.size()-1; i++){
			LinkedList<GenomicElementAndQueryMatch> FwdAdjacency = 
					new LinkedList<GenomicElementAndQueryMatch>();
			FwdAdjacency.add(O2.get(i));
			FwdAdjacency.add(O2.get(i+1));
			O2AdjacenciesFwd.add(FwdAdjacency);
		}
		
		for (int i = O2.size()-2; i >= 0; i--){
			LinkedList<GenomicElementAndQueryMatch> RevAdjacency = 
					new LinkedList<GenomicElementAndQueryMatch>();
			RevAdjacency.add(O2.get(i+1));
			RevAdjacency.add(O2.get(i));
			O2AdjacenciesRev.add(RevAdjacency);
		}
		
		//initialize dissimilarities
		double ForwardDissimilarity = 0;
		double ReverseDissimilarity = 0;
		int FwdMatch = 0;
		int RevMatch = 0;
		
		//When multiple homologous pairs present, pick the smallest one.
		LinkedHashMap<LinkedList<Object>, Integer> FwdAdjHash
			= new LinkedHashMap<LinkedList<Object>, Integer>();
		
		LinkedHashMap<LinkedList<Object>, Integer> RevAdjHash
			= new LinkedHashMap<LinkedList<Object>, Integer>();
		
		
		//Walk along adjacencies.
		for (LinkedList<GenomicElementAndQueryMatch> Adjacency : O1Adjacencies){
			
			//check forward
			for (LinkedList<GenomicElementAndQueryMatch> FwdAdj : O2AdjacenciesFwd){
				boolean EquivalentAdjacency = false;
				if (Type.equals("annotation")){
					if (Adjacency.get(0).getE().getAnnotation().toUpperCase()
							.equals(FwdAdj.get(0).getE().getAnnotation().toUpperCase()) && //annotation match
							Adjacency.get(1).getE().getAnnotation().toUpperCase()
							.equals(FwdAdj.get(1).getE().getAnnotation().toUpperCase()) && //annotation match
							Adjacency.get(0).getE().getContig().equals(Adjacency.get(1).getE().getContig()) && //internal contig match
							FwdAdj.get(0).getE().getContig().equals(FwdAdj.get(1).getE().getContig())){ //internal contig match
						
						EquivalentAdjacency = true;
						FwdMatch++;
					}
				} else {
					if (Adjacency.get(0).getE().getClusterID() ==
							FwdAdj.get(0).getE().getClusterID() && //cluster ID match
							Adjacency.get(1).getE().getClusterID() ==
							FwdAdj.get(1).getE().getClusterID() && //cluster ID match
							Adjacency.get(0).getE().getContig().equals(Adjacency.get(1).getE().getContig()) && //internal contig match
							FwdAdj.get(0).getE().getContig().equals(FwdAdj.get(1).getE().getContig())){ //internal contig match
					
						EquivalentAdjacency = true;
						FwdMatch++;
					}
				}
				
				//If the two adjacencies are equivalent, compute a gap penalty
				if (EquivalentAdjacency){
					//gap computation
					int gap1 = Adjacency.get(1).getE().getStart() - Adjacency.get(0).getE().getStop();
					int gap2 = FwdAdj.get(1).getE().getStart() - FwdAdj.get(0).getE().getStop();
					int gapDiff = Math.abs(gap2-gap1);
					double gapDissimilarity = 0;
					
					//determine gap dissimilarity
					if (gapDiff > this.GPM.MaxGapLimit){
						gapDissimilarity = this.GPM.MaxDissimilarity;
						
						//only for customized
						//if (!DisplayMsgAboutInternalMotif)
						//	DisplayMsgAboutInternalMotif = CheckForInternalPromoter(Adjacency,FwdAdj);
						
						
					} else if (gapDiff > this.GPM.MinGaplimit){
						gapDissimilarity = this.GPM.Mapping.get(gapDiff);
					}
					
					//add to running total
					ForwardDissimilarity = ForwardDissimilarity + gapDissimilarity;
					
					//reset adjacency
					EquivalentAdjacency = false;
					
					//add to hash map
					LinkedList<Object> ObjKey;
					if (Type.equals("cluster")){
						LinkedList<Integer> Key = new LinkedList<Integer>();
						Key.add(Adjacency.get(1).getE().getClusterID());
						Key.add(Adjacency.get(0).getE().getClusterID());
						Collections.sort(Key);
						ObjKey = new LinkedList<Object>(Key);
					} else {
						LinkedList<String> Key = new LinkedList<String>();
						Key.add(Adjacency.get(1).getE().getAnnotation().toUpperCase());
						Key.add(Adjacency.get(0).getE().getAnnotation().toUpperCase());
						Collections.sort(Key);
						ObjKey = new LinkedList<Object>(Key);
					}
					
					if (FwdAdjHash.get(ObjKey) != null){
						int StoredGapDiff = FwdAdjHash.get(ObjKey);
						if (gapDiff < StoredGapDiff){
							StoredGapDiff = gapDiff;
						}
						FwdAdjHash.put(ObjKey, StoredGapDiff);
					} else{
						FwdAdjHash.put(ObjKey, gapDiff);
					}
				}
			}
			
			//check reverse
			for (LinkedList<GenomicElementAndQueryMatch> RevAdj : O2AdjacenciesRev){
				boolean EquivalentAdjacencyRev = false;
				
				if (Type.equals("annotation")){
					if (Adjacency.get(0).getE().getAnnotation().toUpperCase()
							.equals(RevAdj.get(0).getE().getAnnotation().toUpperCase()) && //annotation match
							Adjacency.get(1).getE().getAnnotation().toUpperCase()
							.equals(RevAdj.get(1).getE().getAnnotation().toUpperCase()) && //annotation match
							Adjacency.get(0).getE().getContig().equals(Adjacency.get(1).getE().getContig()) && //internal contig match
							RevAdj.get(0).getE().getContig().equals(RevAdj.get(1).getE().getContig())){ //internal contig match
						
						EquivalentAdjacencyRev = true;
						RevMatch++;
					}
				} else {
					if (Adjacency.get(0).getE().getClusterID() ==
							RevAdj.get(0).getE().getClusterID() && //cluster ID match
							Adjacency.get(1).getE().getClusterID() ==
							RevAdj.get(1).getE().getClusterID() && //cluster ID match
							Adjacency.get(0).getE().getContig().equals(Adjacency.get(1).getE().getContig()) && //internal contig match
							RevAdj.get(0).getE().getContig().equals(RevAdj.get(1).getE().getContig())){ //internal contig match
					
						EquivalentAdjacencyRev = true;
						RevMatch++;
					}
				}
				
				//If the two adjacencies are equivalent, compute a gap penalty
				if (EquivalentAdjacencyRev){
					
					//gap computation
					int gap1 = Adjacency.get(1).getE().getStart() - Adjacency.get(0).getE().getStop();
					int gap2 = RevAdj.get(0).getE().getStart() - RevAdj.get(1).getE().getStop();
					int gapDiff = Math.abs(gap2-gap1);
					double gapDissimilarity = 0;
					
					//determine gap dissimilarity
					if (gapDiff > this.GPM.MaxGapLimit){
						gapDissimilarity = this.GPM.MaxDissimilarity;
						
						//only for customized
						//if (!DisplayMsgAboutInternalMotif)
						//	DisplayMsgAboutInternalMotif = CheckForInternalPromoter(Adjacency,RevAdj);
						
					} else if (gapDiff > this.GPM.MinGaplimit){
						gapDissimilarity = this.GPM.Mapping.get(gapDiff);
					}
					
					//add to running total
					ReverseDissimilarity = ReverseDissimilarity + gapDissimilarity;
					
					//reset adjacency
					EquivalentAdjacencyRev = false;
					
					//add to hash map
					LinkedList<Object> ObjKey;
					if (Type.equals("cluster")){
						LinkedList<Integer> Key = new LinkedList<Integer>();
						Key.add(Adjacency.get(1).getE().getClusterID());
						Key.add(Adjacency.get(0).getE().getClusterID());
						Collections.sort(Key);
						ObjKey = new LinkedList<Object>(Key);
					} else {
						LinkedList<String> Key = new LinkedList<String>();
						Key.add(Adjacency.get(1).getE().getAnnotation().toUpperCase());
						Key.add(Adjacency.get(0).getE().getAnnotation().toUpperCase());
						Collections.sort(Key);
						ObjKey = new LinkedList<Object>(Key);
					}
					
					if (RevAdjHash.get(ObjKey) != null){
						int StoredGapDiff = RevAdjHash.get(ObjKey);
						if (gapDiff < StoredGapDiff){
							StoredGapDiff = gapDiff;
						}
						RevAdjHash.put(ObjKey, StoredGapDiff);
					} else{
						RevAdjHash.put(ObjKey,gapDiff);
					}

				}
			}
			
		}
		
		//NEW METHOD Jan 13, 2014
		//build dissimilarities from hash maps
		
		//forward
		ForwardDissimilarity = 0.0;
		for (Integer x : FwdAdjHash.values()){
			if (x > this.GPM.MaxGapLimit){
				ForwardDissimilarity = ForwardDissimilarity + this.GPM.MaxDissimilarity;
			} else {
				ForwardDissimilarity = ForwardDissimilarity + this.GPM.Mapping.get(x);
			}
		}
		
		//reverse
		ReverseDissimilarity = 0.0;
		for (Integer x : RevAdjHash.values()){
			if (x > this.GPM.MaxGapLimit){
				ReverseDissimilarity = ReverseDissimilarity + this.GPM.MaxDissimilarity;
			} else {
				ReverseDissimilarity = ReverseDissimilarity + this.GPM.Mapping.get(x);
			}
		}
		
		//The proper orientation is the one with more common adjacent pairs with O1.
//		if (FwdMatch >= RevMatch){
//			Dissimilarity = ForwardDissimilarity;
//		} else {
//			Dissimilarity = ReverseDissimilarity;
//		}
			
		if (!isStrandFlipped){
			Dissimilarity = ForwardDissimilarity;
		} else {
			Dissimilarity = ReverseDissimilarity;
		}
		
		//Maximum dissimilarity is 1, minimum is 0.
		if (Dissimilarity > 1){
			Dissimilarity = 1;
		} else if (Dissimilarity <= 0){
			Dissimilarity = 0;
		}
		
		//debugging
		//System.out.println("Dissimilarity: " + Dissimilarity);


		
		return Dissimilarity;
	}
	
	//Strandedness
	public double SSDissimilarity(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2, String Type){
			
		//Initialize output
		double Dissimilarity = 0;
		double IndividualDissimilarity = 0;
		double WholeGroupDissimilarity = 0;
		double TotalRelativeWeights = 0;		
		
		//Create special sets for counts
		LinkedList<GenomicElementAndQueryMatch> O1Counts = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O2Counts = new LinkedList<GenomicElementAndQueryMatch>();
		
		//Add elements found in both sets.
		boolean RetainElement;
		//from O1
		for (GenomicElementAndQueryMatch GandE1 : O1){
			RetainElement = false;
			for (GenomicElementAndQueryMatch GandE2 : O2){
				if (Type.equals("annotation")){
					if (GandE1.getE().getAnnotation().toUpperCase().equals(GandE2.getE().getAnnotation().toUpperCase())){
						RetainElement = true;
						break;
					}	
				} else {
					if (GandE1.getE().getClusterID() == GandE2.getE().getClusterID()){
						RetainElement = true;
						break;
					}
				}
			}
			
			//add element elements
			if (RetainElement){
				O1Counts.add(GandE1);
			}
		}
		
		//from O2
		for (GenomicElementAndQueryMatch GandE2 : O2){
			RetainElement = false;
			for (GenomicElementAndQueryMatch GandE1 : O1){
				if (GandE2.getE().getAnnotation().toUpperCase().equals(GandE1.getE().getAnnotation().toUpperCase())){
					RetainElement = true;
					break;
				} else {
					if (GandE2.getE().getClusterID() == GandE1.getE().getClusterID()){
						RetainElement = true;
						break;
					}
				}
			}
			
			//remove elements
			if (RetainElement){
				O2Counts.add(GandE2);
			}
		}
		
		boolean MatchingElement;
		int CommonTotal;
		int CommonSameStrand = 0;
		int CommonDiffStrand = 0;
		
		//Create special sets for strand-matching counts
		LinkedList<GenomicElementAndQueryMatch> O1CountsM = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O2CountsM = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O1CountsN = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O2CountsN = new LinkedList<GenomicElementAndQueryMatch>();
		
		
		//sort O1Counts appropriately
		for (GenomicElementAndQueryMatch GandE1 : O1Counts){
			
			//find a common element in O2Counts that matches, if possible
			MatchingElement = false;
			
			for (GenomicElementAndQueryMatch GandE2 : O2Counts){
				if (Type.equals("annotation")){
					if (GandE1.getE().getAnnotation().toUpperCase().equals(GandE2.getE().getAnnotation().toUpperCase()) &&
							GandE1.getE().getStrand().equals(GandE2.getE().getStrand()) &&
							!O1CountsM.contains(GandE1)){
						MatchingElement = true;
						break;
					}	
				} else {
					if (GandE1.getE().getClusterID() == GandE2.getE().getClusterID() &&
							GandE1.getE().getStrand().equals(GandE2.getE().getStrand()) &&
							!O1CountsM.contains(GandE1)){
						MatchingElement = true;
						break;
					}
				}
			}
			
			// Update counts, and remove from list
			if (MatchingElement){
				O1CountsM.add(GandE1);
			} else {
				O1CountsN.add(GandE1);
			}
			
		}
		
		
		//sort O1Counts appropriately
		for (GenomicElementAndQueryMatch GandE2 : O2Counts){
			
			//find a common element in O2Counts that matches, if possible
			MatchingElement = false;
			
			for (GenomicElementAndQueryMatch GandE1 : O1){
				if (Type.equals("annotation")){
					if (GandE2.getE().getAnnotation().toUpperCase().equals(GandE1.getE().getAnnotation().toUpperCase()) &&
							GandE2.getE().getStrand().equals(GandE1.getE().getStrand()) &&
							!O2CountsM.contains(GandE2)){
						MatchingElement = true;
						break;
					}	
				} else {
					if (GandE2.getE().getClusterID() == GandE1.getE().getClusterID() &&
							GandE2.getE().getStrand().equals(GandE1.getE().getStrand()) &&
							!O2CountsM.contains(GandE2)){
						MatchingElement = true;
						break;
					}
				}
			}
			
			// Update counts, and remove from list
			if (MatchingElement){
				O2CountsM.add(GandE2);
			} else {
				O2CountsN.add(GandE2);
			}
			
		}
		
		//Finalize counts
		CommonSameStrand = O1CountsM.size();
		CommonDiffStrand = O1CountsN.size() + O2CountsN.size();
		CommonTotal = CommonSameStrand + CommonDiffStrand;
		
		//Dissimilarity for individual changes in strandedness
		if (SSIndividualGenes){
		
		//Jaccard dissimilarity
		IndividualDissimilarity = 1.0 - (Math.max((double) CommonSameStrand, (double) CommonDiffStrand)
										/ (double)CommonTotal);
		
		//increment total weights contribution
		TotalRelativeWeights = TotalRelativeWeights + SSRelWeightIndGenes;
	}
	
	if (SSWholeGroup) {
		
		//whole group is switched
		if (CommonDiffStrand > CommonSameStrand ){
			WholeGroupDissimilarity = 1.0;
		} else {
			WholeGroupDissimilarity = 0.0;
		}

		//increment total weights contribution
		TotalRelativeWeights = TotalRelativeWeights + SSRelWeightWholeGroup;
	}
	
	//compute overall dissimilarity
	Dissimilarity = (SSRelWeightIndGenes/TotalRelativeWeights) * IndividualDissimilarity +
					(SSRelWeightWholeGroup/TotalRelativeWeights) * WholeGroupDissimilarity;
	
		//adjust dissimilarity
		if (Dissimilarity > 1){
			Dissimilarity = 1;
		} else if (Dissimilarity < 0){
			Dissimilarity = 0;
		}
		
		return Dissimilarity;
		
	}
	
	//determine if the whole group has flipped or not
	public boolean TotalGroupStrandFlip(LinkedList<GenomicElementAndQueryMatch> O1, LinkedList<GenomicElementAndQueryMatch> O2, String Type){
				
		//Create special sets for counts
		LinkedList<GenomicElementAndQueryMatch> O1Counts = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O2Counts = new LinkedList<GenomicElementAndQueryMatch>();
		
		//Add elements found in both sets.
		boolean RetainElement;
		//from O1
		for (GenomicElementAndQueryMatch GandE1 : O1){
			RetainElement = false;
			for (GenomicElementAndQueryMatch GandE2 : O2){
				if (Type.equals("annotation")){
					if (GandE1.getE().getAnnotation().toUpperCase().equals(GandE2.getE().getAnnotation().toUpperCase())){
						RetainElement = true;
						break;
					}	
				} else {
					if (GandE1.getE().getClusterID() == GandE2.getE().getClusterID()){
						RetainElement = true;
						break;
					}
				}
			}
			
			//add element elements
			if (RetainElement){
				O1Counts.add(GandE1);
			}
		}
		
		//from O2
		for (GenomicElementAndQueryMatch GandE2 : O2){
			RetainElement = false;
			for (GenomicElementAndQueryMatch GandE1 : O1){
				if (GandE2.getE().getAnnotation().toUpperCase().equals(GandE1.getE().getAnnotation().toUpperCase())){
					RetainElement = true;
					break;
				} else {
					if (GandE2.getE().getClusterID() == GandE1.getE().getClusterID()){
						RetainElement = true;
						break;
					}
				}
			}
			
			//remove elements
			if (RetainElement){
				O2Counts.add(GandE2);
			}
		}
		
		boolean MatchingElement;
		int CommonSameStrand = 0;
		int CommonDiffStrand = 0;
		
		//Create special sets for strand-matching counts
		LinkedList<GenomicElementAndQueryMatch> O1CountsM = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O2CountsM = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O1CountsN = new LinkedList<GenomicElementAndQueryMatch>();
		LinkedList<GenomicElementAndQueryMatch> O2CountsN = new LinkedList<GenomicElementAndQueryMatch>();
		
		
		//sort O1Counts appropriately
		for (GenomicElementAndQueryMatch GandE1 : O1Counts){
			
			//find a common element in O2Counts that matches, if possible
			MatchingElement = false;
			
			for (GenomicElementAndQueryMatch GandE2 : O2Counts){
				if (Type.equals("annotation")){
					if (GandE1.getE().getAnnotation().toUpperCase().equals(GandE2.getE().getAnnotation().toUpperCase()) &&
							GandE1.getE().getStrand().equals(GandE2.getE().getStrand()) &&
							!O1CountsM.contains(GandE1)){
						MatchingElement = true;
						break;
					}	
				} else {
					if (GandE1.getE().getClusterID() == GandE2.getE().getClusterID() &&
							GandE1.getE().getStrand().equals(GandE2.getE().getStrand()) &&
							!O1CountsM.contains(GandE1)){
						MatchingElement = true;
						break;
					}
				}
			}
			
			// Update counts, and remove from list
			if (MatchingElement){
				O1CountsM.add(GandE1);
			} else {
				O1CountsN.add(GandE1);
			}
			
		}
		
		
		//sort O1Counts appropriately
		for (GenomicElementAndQueryMatch GandE2 : O2Counts){
			
			//find a common element in O2Counts that matches, if possible
			MatchingElement = false;
			
			for (GenomicElementAndQueryMatch GandE1 : O1){
				if (Type.equals("annotation")){
					if (GandE2.getE().getAnnotation().toUpperCase().equals(GandE1.getE().getAnnotation().toUpperCase()) &&
							GandE2.getE().getStrand().equals(GandE1.getE().getStrand()) &&
							!O2CountsM.contains(GandE2)){
						MatchingElement = true;
						break;
					}	
				} else {
					if (GandE2.getE().getClusterID() == GandE1.getE().getClusterID() &&
							GandE2.getE().getStrand().equals(GandE1.getE().getStrand()) &&
							!O2CountsM.contains(GandE2)){
						MatchingElement = true;
						break;
					}
				}
			}
			
			// Update counts, and remove from list
			if (MatchingElement){
				O2CountsM.add(GandE2);
			} else {
				O2CountsN.add(GandE2);
			}
			
		}
		
	//Finalize counts
	CommonSameStrand = O1CountsM.size();
	CommonDiffStrand = O1CountsN.size() + O2CountsN.size();

	//whole group is switched
	if (CommonDiffStrand > CommonSameStrand ){
		return true; //strand has flipped
	} else {
		return false; // strand has not flipped
	}


		
	}
	
	//Total Dissimilarity
	public double TotalDissimilarity(LinkedList<GenomicElementAndQueryMatch> G1, LinkedList<GenomicElementAndQueryMatch> G2, String T){
		
		//refactor, if appropriate
		
		//Linear Scale
		if (AmalgamationType.equals("Linear")){
			
			//determine total weight, to scale by.
			Double AllProvidedWeights = 0.0;
			
			//initialize values
			Double CGContribution = 0.0;
			Double CMContribution = 0.0;
			Double GOContribution = 0.0;
			Double GGContribution = 0.0;
			Double SSContribution = 0.0;
			
			//Determine Factors
			if (Factors.contains("CG")){
				AllProvidedWeights = AllProvidedWeights + CGWeight;
				CGContribution = CGDissimilarity(G1,G2,T);
			}
			if (Factors.contains("CM")){
				
				//running weights total
				AllProvidedWeights = AllProvidedWeights + CMWeight;
				
				//exclude the head prior to determination
				if (CMExcludeOperonHead){
					
					//create special dummy lists just to assess the dissimilarity
					LinkedList<GenomicElementAndQueryMatch> G1mod = new LinkedList<GenomicElementAndQueryMatch>(G1);
					LinkedList<GenomicElementAndQueryMatch> G2mod = new LinkedList<GenomicElementAndQueryMatch>(G2);
					
					//exclude the head of the operon, if appropriate
					
					//first list					
					if (G1mod.size() > 1){
						if (G1mod.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G1mod.remove(0);
						} else {
							G1mod.remove(G1mod.size()-1);
						}
					}
					
					//second list
					if (G2mod.size() > 1){
						if (G2mod.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G2mod.remove(0);
						} else {
							G2mod.remove(G2mod.size()-1);
						}
					}
					
					CMContribution = CMDissimilarity(G1mod,G2mod,T);
					
				} else if (CMExcludeOperonTail){
					
					//create special dummy lists just to assess the dissimilarity
					LinkedList<GenomicElementAndQueryMatch> G1mod = new LinkedList<GenomicElementAndQueryMatch>();
					LinkedList<GenomicElementAndQueryMatch> G2mod = new LinkedList<GenomicElementAndQueryMatch>();
					
					//first list					
					if (G1.size() > 1){
						if (G1.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G1mod.add(G1.get(0));
						} else {
							G1mod.add(G1.get(G1.size()-1));
						}
					}
					
					//second list
					//first list					
					if (G2.size() > 1){
						if (G2.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G2mod.add(G2.get(0));
						} else {
							G2mod.add(G2.get(G2.size()-1));
						}
					}
					
					CMContribution = CMDissimilarity(G1mod,G2mod,T);
					
				} else {
					CMContribution = CMDissimilarity(G1,G2,T);
				}
				
			}
			if (Factors.contains("GO")){
				AllProvidedWeights = AllProvidedWeights + GOWeight;
				GOContribution = GODissimilarity(G1,G2,T);
			}
			if (Factors.contains("GG")){
				AllProvidedWeights = AllProvidedWeights + GGWeight;
				GGContribution = GGDissimilarity(G1,G2,T);
			}
			if (Factors.contains("SS")){
				AllProvidedWeights = AllProvidedWeights + SSWeight;
				SSContribution = SSDissimilarity(G1,G2,T);
			}
			
			//weighted average.
			double TotalContribution = 
					
				   (CGWeight/AllProvidedWeights) * CGContribution +
				   (CMWeight/AllProvidedWeights) * CMContribution +
				   (GOWeight/AllProvidedWeights) * GOContribution +
				   (GGWeight/AllProvidedWeights) * GGContribution +
				   (SSWeight/AllProvidedWeights) * SSContribution;
			
			/*
			 * Modification: empty set comparisons should either yield maximum dissimilarity
			 * (empty set versus non-empty set) or maximum similarity (empty set vs. empty set).
			 */
			
			if (G1.size() == 0){
				
				//double empty set case (empty G1, empty G2)
				if (G2.size() == 0){
					TotalContribution = 0.0; 
				
				//empty set versus non-empty set case I (empty G1, non-empty G2)
				} else {
					TotalContribution = 1.0;
				}

			} else {
				
				//other empty set versus non-empty case II (non-empty G1, empty G2)
				if (G2.size() == 0){
					TotalContribution = 1.0;
				}
			}
			
			//return modified (if necessary) contribution
			return TotalContribution;
			
		} else {
			/*
			 * Scale-hierarchy ensures that a dissimilarity contribution of lower importance
			 * never overtakes a contribution of higher importance.
			 * 
			 * Individual dissimilarities are computed, and factors are truncated at their maximum
			 * allowed fraction of the next ranking:
			 * 
			 * Dissimilarity From Lower Importance 
			 * 		<= (Dissimilarity from Higher Importance * Importance Fraction)
			 */
			//initialize values
			Double TotalContribution = 0.0;
			
			Double CGContribution = 0.0;
			Double CMContribution = 0.0;
			Double GOContribution = 0.0;
			Double GGContribution = 0.0;
			Double SSContribution = 0.0;
			
			
			LinkedList<ImportanceMapping> ImpMapping = new LinkedList<ImportanceMapping>();
			
			//Determine Factors
			if (Factors.contains("CG")){
				CGContribution = CGDissimilarity(G1,G2,T);
				ImportanceMapping IM = new ImportanceMapping();
				IM.FactorType = "CG";
				IM.Dissimilarity = CGContribution;
				IM.Importance = CGImportance;
				ImpMapping.add(IM);
			}
			if (Factors.contains("CM")){
				
				//exclude the head prior to determination
				if (CMExcludeOperonHead){
					
					//create special dummy lists just to assess the dissimilarity
					LinkedList<GenomicElementAndQueryMatch> G1mod = new LinkedList<GenomicElementAndQueryMatch>(G1);
					LinkedList<GenomicElementAndQueryMatch> G2mod = new LinkedList<GenomicElementAndQueryMatch>(G2);
					
					//exclude the head of the operon, if appropriate
					
					//first list					
					if (G1mod.size() > 1){
						if (G1mod.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G1mod.remove(0);
						} else {
							G1mod.remove(G1mod.size()-1);
						}
					}
					
					//second list
					if (G2mod.size() > 1){
						if (G2mod.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G2mod.remove(0);
						} else {
							G2mod.remove(G2mod.size()-1);
						}
					}
					
					CMContribution = CMDissimilarity(G1mod,G2mod,T);
				
				} else if (CMExcludeOperonTail){
					
					//create special dummy lists just to assess the dissimilarity
					LinkedList<GenomicElementAndQueryMatch> G1mod = new LinkedList<GenomicElementAndQueryMatch>();
					LinkedList<GenomicElementAndQueryMatch> G2mod = new LinkedList<GenomicElementAndQueryMatch>();
					
					//first list					
					if (G1.size() > 1){
						if (G1.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G1mod.add(G1.get(0));
						} else {
							G1mod.add(G1.get(G1.size()-1));
						}
					}
					
					//second list
					//first list					
					if (G2.size() > 1){
						if (G2.get(0).getE().getStrand().equals(Strand.POSITIVE)){
							G2mod.add(G2.get(0));
						} else {
							G2mod.add(G2.get(G2.size()-1));
						}
					}
					
					CMContribution = CMDissimilarity(G1mod,G2mod,T);
					
				} else {
					CMContribution = CMDissimilarity(G1,G2,T);
				}

				ImportanceMapping IM = new ImportanceMapping();
				IM.FactorType = "CM";
				IM.Dissimilarity = CMContribution;
				IM.Importance = CMImportance;
				ImpMapping.add(IM);
			}
			if (Factors.contains("GO")){
				GOContribution = GODissimilarity(G1,G2,T);
				ImportanceMapping IM = new ImportanceMapping();
				IM.FactorType = "GO";
				IM.Dissimilarity = GOContribution;
				IM.Importance = GOImportance;
				ImpMapping.add(IM);
			}
			if (Factors.contains("GG")){
				GGContribution = GGDissimilarity(G1,G2,T);
				ImportanceMapping IM = new ImportanceMapping();
				IM.FactorType = "GG";
				IM.Dissimilarity = GGContribution;
				IM.Importance = GGImportance;
				ImpMapping.add(IM);
			}
			if (Factors.contains("SS")){
				SSContribution = SSDissimilarity(G1,G2,T);
				ImportanceMapping IM = new ImportanceMapping();
				IM.FactorType = "SS";
				IM.Dissimilarity = SSContribution;
				IM.Importance = SSImportance;
				ImpMapping.add(IM);
			}
			
			//sort importance mapping
			Collections.sort(ImpMapping, new IMComparator());
			
			if (ImpMapping.size() > 1){
				
				TotalContribution = ImpMapping.get(0).Dissimilarity;
				
				for (int i = 0; i < ImpMapping.size()-1; i++){
					
					//check for different levels
					if (ImpMapping.get(i).Importance != ImpMapping.get(i+1).Importance){
						if (ImpMapping.get(i).Dissimilarity * ImportanceFraction
								< ImpMapping.get(i+1).Dissimilarity){
							ImpMapping.get(i+1).Dissimilarity = ImpMapping.get(i).Dissimilarity * ImportanceFraction;
						}
						TotalContribution = TotalContribution + ImpMapping.get(i+1).Dissimilarity;
					}	
					
				}

				//adjust contribution by size
				TotalContribution = TotalContribution / (double)ImpMapping.size();
				
			} else {
				TotalContribution = ImpMapping.getFirst().Dissimilarity;
			}

			
			/*
			 * Modification: empty set comparisons should either yield maximum dissimilarity
			 * (empty set versus non-empty set) or maximum similarity (empty set vs. empty set).
			 */
			
			if (G1.size() == 0){
				
				//double empty set case (empty G1, empty G2)
				if (G2.size() == 0){
					TotalContribution = 0.0; 
				
				//empty set versus non-empty set case I (empty G1, non-empty G2)
				} else {
					TotalContribution = 1.0;
				}

			} else {
				
				//other empty set versus non-empty case II (non-empty G1, empty G2)
				if (G2.size() == 0){
					TotalContribution = 1.0;
				}
			}
			
			return TotalContribution;
		}

	}

	public String getName() {
		return Name;
	}

	public void setName(String name) {
		Name = name;
	}

	public Double getImportanceFraction() {
		return ImportanceFraction;
	}

	public void setImportanceFraction(Double importanceFraction) {
		ImportanceFraction = importanceFraction;
	}
	
	// ------- Extra methods -----------//
	
	//a customized method for use only for 80 halophiles/operon search stuff.
	public boolean CheckForInternalPromoter(LinkedList<GenomicElementAndQueryMatch> Adj1, LinkedList<GenomicElementAndQueryMatch> Adj2){
		
		//initialize return value
		boolean NewInternalMotifInGap = false;
				
		//Adj 1 derives from a (+)-stranded operon
		if (Adj1.get(0).getE().getStrand().equals(Strand.POSITIVE)){
		
			//Adj 2 derives from a (+)-stranded operon
			if (Adj2.get(0).getE().getStrand().equals(Strand.POSITIVE)){
				
				//Adj1 is the case with the widened gap.
				if (Adj1.get(1).getE().getStart()-Adj1.get(0).getE().getStop() > 
					Adj2.get(1).getE().getStart()-Adj2.get(0).getE().getStop()){
				
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(1).getE().getAssociatedMotifs().size() >
							Adj2.get(1).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				//Adj2 is the case with widened gap.
				} else{
					
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(1).getE().getAssociatedMotifs().size() <
							Adj2.get(1).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				}

			
			//Adj 2 derives from a (-)-stranded operon
			} else {

				//Adj1 is the case with the widened gap.
				if (Adj1.get(1).getE().getStart()-Adj1.get(0).getE().getStop() > 
					Adj2.get(1).getE().getStart()-Adj2.get(0).getE().getStop()){
				
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(1).getE().getAssociatedMotifs().size() >
							Adj2.get(0).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				//Adj2 is the case with widened gap.
				} else{
					
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(1).getE().getAssociatedMotifs().size() <
							Adj2.get(0).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				}
				
				
			}
			
		//Adj 1 derives from a (-)-stranded operon
		} else {
			
			//Adj 2 derives from a (+)-stranded operon
			if (Adj2.get(0).getE().getStrand().equals(Strand.POSITIVE)){
			
				//Adj1 is the case with the widened gap.
				if (Adj1.get(1).getE().getStart()-Adj1.get(0).getE().getStop() > 
					Adj2.get(1).getE().getStart()-Adj2.get(0).getE().getStop()){
				
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(0).getE().getAssociatedMotifs().size() >
							Adj2.get(1).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				//Adj2 is the case with widened gap.
				} else{
					
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(0).getE().getAssociatedMotifs().size() <
							Adj2.get(1).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				}
				
			//Adj 2 derives from a (-)-stranded operon
			} else {
				
				//Adj1 is the case with the widened gap.
				if (Adj1.get(1).getE().getStart()-Adj1.get(0).getE().getStop() > 
					Adj2.get(1).getE().getStart()-Adj2.get(0).getE().getStop()){
				
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(0).getE().getAssociatedMotifs().size() >
							Adj2.get(0).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				//Adj2 is the case with widened gap.
				} else{
					
					//The widened gene has more promoter motifs than the non-widened.
					if (Adj1.get(0).getE().getAssociatedMotifs().size() <
							Adj2.get(0).getE().getAssociatedMotifs().size()){
						NewInternalMotifInGap = true;
					}
					
				}
				
			}
			
		}
		
		//return the value
		return NewInternalMotifInGap;
		
	}
	
	//new class
	public class ImportanceMapping {
		
		public String FactorType;
		public double Dissimilarity;
		public int Importance;
	}
	
	public class IMComparator implements Comparator<ImportanceMapping>{

		@Override
		public int compare(ImportanceMapping IM1, ImportanceMapping IM2) {

			return IM1.Importance - IM2.Importance;
		}
	}
}