/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* HierarchicalClusterer.java
* Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
*/
/**
<!-- globalinfo-start -->
* Hierarchical clustering class.
* Implements a number of classic hierarchical clustering methods.
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -N
* number of clusters
* </pre>
*
*
* <pre> -L
* Link type (Single, Complete, Average, Mean, Centroid, Ward, Adjusted complete)
* [SINGLE|COMPLETE|AVERAGE|MEAN|CENTROID|WARD|ADJCOMLPETE]
* </pre>
*
* <pre> -A
* Distance function to use. (default: weka.core.EuclideanDistance)
* </pre>
*
* <pre> -P
* Print hierarchy in Newick format, which can be used for display in other programs.
* </pre>
*
*<!-- options-end -->
*
*
* @author Remco Bouckaert (rrb@xm.co.nz, remco@cs.waikato.ac.nz)
* @version $Revision: 1 $
*/
package viz;
import java.util.Arrays;
import java.util.Comparator;
import java.util.PriorityQueue;
import java.util.Vector;
import java.util.List;
import java.util.ArrayList;
public class NodeOrderer {
/** constants for ordering algorithms**/
public final static int GEOINFO = -5;
public final static int MANUAL = -4;
public final static int DEFAULT = -3;
/** the various link types */
public final static int CLOSEST_OUTSIDE_FIRST = -2;
public final static int CLOSEST_FIRST = -1;
public final static int SINGLE = 0;
public final static int COMPLETE = 1;
public final static int AVERAGE = 2;
public final static int MEAN = 3;
public final static int CENTROID = 4;
public final static int WARD = 5;
public final static int ADJCOMLPETE = 6;
public final static int OPTIMISE = 7;
public final static int SORT_BY_ROOT_CANAL_LENGTH = 8;
/** calc y-coordinate by meta data info.
* ALL = all paths from root to leafs
* SUM = sum over all paths from root to leafs
* AVERAGE = average over all paths **/
public final static int META_ALL = 100;
public final static int META_SUM = 101;
public final static int META_AVERAGE = 102;
/**
* Holds the Link type used calculate distance between clusters
*/
int m_nLinkType = SINGLE;
public NodeOrderer(int nLinkType) {
m_nLinkType = nLinkType;
} // c'tor
/** method for calculating a 'good' order of nodes for a set of trees.
*
* @param nNrOfLabels: total nr of labels in a tree
* @param trees: complete set of trees
* @param cTrees: consensus trees
* @param fTreeWeight: weight of consensus tress
* @param nOrder: mapping of node labels onto order [0, ... ,nNrOfLabels-1]
* @return order of nodes [0, ... ,nNrOfLabels-1]
* @throws Exception
*/
public int [] calcOrder(int nNrOfLabels, Node [] trees, Node [] cTrees, Node rootCanalTree, float [] fTreeWeight, /*, int [] nOrder*/
List<int[]> clades, List<Double> cladeWeights) throws Exception {
double [][] fDist = new double[nNrOfLabels][nNrOfLabels];
for (int i = 0; i < cTrees.length; i++) {
calcDistance(cTrees[i]/*, nOrder*/, fDist, fTreeWeight[i], new Vector<Integer>(), new Vector<Float>());
}
//print dist matrix (Denise):
// System.out.println("Distance matrix:");
// for (int i = 0; i < nNrOfLabels ; i++){
// for (int j = 0; j < nNrOfLabels ; j++){
// System.out.print(Math.round(fDist[i][j]*1000)/1000.00 + " | ");
// }
// System.out.println();
// }
int [] nOrder = null;
int [] nRevOrder = null;
if (m_nLinkType == CLOSEST_OUTSIDE_FIRST) {
nRevOrder = closestOutsideFirst(fDist);
} else if (m_nLinkType == CLOSEST_FIRST) {
nRevOrder = closestFirst(fDist);
} else {
nOrder = buildClusterer(fDist);
}
if (m_nLinkType == OPTIMISE) {
nRevOrder = new int[fDist.length];
traverse(rootCanalTree, nRevOrder, 0);
}
if (m_nLinkType == SORT_BY_ROOT_CANAL_LENGTH) {
nRevOrder = new int[fDist.length];
traverse2(rootCanalTree, nRevOrder, 0);
}
if (nRevOrder != null) {
nOrder = new int[nNrOfLabels];
for (int i = 0; i < nNrOfLabels ; i++) {
nOrder[nRevOrder[i]] = i;
}
}
// if (m_nLinkType == OPTIMISE) {xx
// optimise(nOrder, nRevOrder, clades, cladeWeights, fDist);
// }
System.out.print("nOrder: ");
for (int i = 0; i < nNrOfLabels ; i++) {
System.out.print(nOrder[i] + " | ");
}
System.out.println("\nfinal score = " + score(nOrder, fDist));
return nOrder;
}
private int traverse(Node node, int[] nOrder, int i) {
if (node.isLeaf()) {
nOrder[i] = node.m_iLabel;
i++;
} else {
i = traverse(node.m_left, nOrder, i);
i = traverse(node.m_right, nOrder, i);
}
return i;
}
private int traverse2(Node node, int[] nOrder, int i) {
if (node.isLeaf()) {
nOrder[i] = node.m_iLabel;
i++;
} else {
if (node.m_left.m_fLength > node.m_right.m_fLength) {
i = traverse2(node.m_left, nOrder, i);
i = traverse2(node.m_right, nOrder, i);
} else {
i = traverse2(node.m_right, nOrder, i);
i = traverse2(node.m_left, nOrder, i);
}
}
return i;
}
// private void optimise(int[] nOrder, int[] nRevOrder, List<int[]> clades, List<Double> cladeWeights, double [][] fDistance) {
// double fSumWeight = 0;
// int nTaxa = nOrder.length;
// for (int i = nTaxa; i < clades.size(); i++) {
// fSumWeight += cladeWeights.get(i);
// }
// Random rand = new Random(1);//System.currentTimeMillis());
// double fScore = score(nOrder, fDistance);
// int [] nNewOrder = new int[nOrder.length];
// int [] nNewRevOrder = new int[nOrder.length];
// System.out.println("Start score: " + fScore);
// for (int i = 0; i < 10000; i++) {
// double fWeight = fSumWeight * rand.nextDouble();
// int iClade = nTaxa;
// while(fWeight > cladeWeights.get(iClade)) {
// fWeight -= cladeWeights.get(iClade);
// iClade++;
// }
//
// // rotate clade
// System.arraycopy(nOrder, 0, nNewOrder, 0, nTaxa);
// int [] clade = clades.get(iClade);
// boolean [] bClade = new boolean[nTaxa];
// for (int j = 0; j < clade.length; j++) {
// bClade[nRevOrder[j]] = true;
// }
// int k = 0, e = nTaxa - 1;
// while (!bClade[k]) {
// k++;
// }
// while (!bClade[e]) {
// e--;
// }
// while (k < e) {
// int tmp = nNewOrder[k];
// nNewOrder[k] = nNewOrder[e];
// nNewOrder[e] = tmp;
// k++; e--;
// }
// for (int j = 0; j < nRevOrder.length; j++) {
// nNewRevOrder[nNewOrder[j]] = j;
// }
//
// // accept new order if it is doing better
// double fNewScore = score(nOrder, fDistance);
// if (fScore > fNewScore) {
// fScore = fNewScore;
// System.arraycopy(nNewOrder, 0, nOrder, 0, nTaxa);
//// double fScore2 = score(nRevOrder, fDistance);
// for (int j = 0; j < nRevOrder.length; j++) {
// nRevOrder[nOrder[j]] = j;
// }
// }
// }
//
// double fScore2 = score(nOrder, fDistance);
// System.out.println("End score: " + fScore + " " + fScore2);
// }
/** used for priority queue for efficient retrieval of pair of clusters to merge**/
class Tuple {
public Tuple(double d, int i, int j, int nSize1, int nSize2) {
m_fDist = d;
m_iCluster1 = i;
m_iCluster2 = j;
m_nClusterSize1 = nSize1;
m_nClusterSize2 = nSize2;
}
double m_fDist;
int m_iCluster1;
int m_iCluster2;
int m_nClusterSize1;
int m_nClusterSize2;
}
/** comparator used by priority queue**/
class TupleComparator implements Comparator<Tuple> {
@Override
public int compare(Tuple o1, Tuple o2) {
if (o1.m_fDist < o2.m_fDist) {
return -1;
} else if (o1.m_fDist == o2.m_fDist) {
return 0;
}
return 1;
}
} // class TupleComparator
/** class representing node in cluster hierarchy **/
class TreeNode {
TreeNode m_left;
TreeNode m_right;
TreeNode m_parent;
int m_iLeftInstance;
int m_iRightInstance;
double m_height = 0;
int m_iLabel = -1;
/** fill order array by traversal through tree
* and using leaf id in order of appearance **/
int order(int [] order, int i) {
if (m_left == null) {
order[m_iLeftInstance] = i++;
} else {
i = m_left.order(order, i);
}
if (m_right == null) {
order[m_iRightInstance] = i++;
} else {
i = m_right.order(order, i);
}
return i;
}
int label(int i) {
if (m_left == null) {
return i;
} else {
i = m_left.label(i) + 1;
m_iLabel = i+1;
}
if (m_right == null) {
return i;
} else {
i = m_right.label(i) + 1;
m_iLabel = i+1;
}
return i;
}
} // class TreeNode
/**Order nodes by starting with two closest nodes.
* Then add node left or right that is closest to the
* most left or most right node respectively, till all nodes are ordered.
* @param fDist
* @return
*/
int [] closestOutsideFirst(double [][] fDist) {
int n = fDist.length;
int [] nOrder = new int[n];
boolean [] bDone = new boolean[n];
// find the closest pair
int iMax = 0;
int jMax = 1;
double fMax = fDist[0][1];
for (int i = 0; i < n; i++) {
for (int j = i+1; j < n; j++) {
if (fDist[i][j] < fMax) {
fMax = fDist[i][j];
iMax = i;
jMax = j;
}
}
}
nOrder[0] = iMax;
nOrder[1] = jMax;
bDone[iMax] = true;
bDone[jMax] = true;
// find the order of remaining nodes
for (int k = 2; k < n; k++) {
iMax = -1;
jMax = -1;
fMax = Double.MAX_VALUE;
for (int i = 0; i < n; i++) {
if (!bDone[i]) {
if (fDist[nOrder[k-1]][i] < fMax) {
fMax = fDist[nOrder[k-1]][i];
iMax = k-1;
jMax = i;
}
if (fDist[nOrder[0]][i] < fMax) {
fMax = fDist[nOrder[0]][i];
iMax = 0;
jMax = i;
}
}
}
if (iMax == k-1) {
nOrder[k] = jMax;
bDone[jMax] = true;
} else if (iMax == 0) {
for (int j = k; j >0; j--) {
nOrder[j] = nOrder[j-1];
}
nOrder[0] = jMax;
bDone[jMax] = true;
} else {
System.err.println("Something's wrong");
}
}
return nOrder;
} // closestOutsideFirst
/** As closestOutsideFirst, but trying to match with *all* nodes that
* are already ordered. This should be more reasonable than closestOutsideFirst
* but does not appear to be so in practice (bug???)
* @param fDist
* @return
*/
int [] closestFirst(double [][] fDist) {
//
// int n = fDist.length;
// int [] nOrder = new int[n];
// boolean [] bDone = new boolean[n];
//
// // find the closest pair
// int iMax = 0;
// int jMax = 1;
// double fMax = fDist[0][1];
// for (int i = 0; i < n; i++) {
// for (int j = i+1; j < n; j++) {
// if (fDist[i][j] < fMax) {
// fMax = fDist[i][j];
// iMax = i;
// jMax = j;
// }
// }
// }
//nOrder[0] = iMax;
// nOrder[1] = jMax;
// bDone[iMax] = true;
// bDone[jMax] = true;
//
//// find the order of remaining nodes - denise' code:
////nOrder to be filled from 2 to n-1
//for (int k = 2; k < n; k++) {
// // find next node
// jMax = -1;
// boolean left = false;
// fMax = Double.MAX_VALUE;
//
// for (int j = 0; j < n; j++) {
// if (!bDone[j]) {
// // test who is closest to any ordered node
// for (int i = 0; i < k; i++)
// if (j != nOrder[i] && fDist[nOrder[i]][j] < fMax){
// fMax = fDist[nOrder[i]][j];
// jMax = j;
// if (i < k/2) left = true;
// }
// }
// }
// //System.out.println("jMax is " + jMax);
// if (left){
// for (int j = k; j > 0; j--){ nOrder[j] = nOrder[j-1]; }
// nOrder[0] = jMax;
// }
// else{
// nOrder[k] = jMax;
// }
// bDone[jMax] = true;
//}
// return nOrder;
int n = fDist.length;
int [] nOrder = new int[n];
boolean [] bDone = new boolean[n];
// find the closest pair
int iMax = 0;
int jMax = 1;
double fMax = fDist[0][1];
for (int i = 0; i < n; i++) {
for (int j = i+1; j < n; j++) {
if (fDist[i][j] < fMax) {
fMax = fDist[i][j];
iMax = i;
jMax = j;
}
}
}
nOrder[0] = iMax;
nOrder[1] = jMax;
bDone[iMax] = true;
bDone[jMax] = true;
// find the order of remaining nodes
for (int k = 2; k < n; k++) {
iMax = -1;
jMax = -1;
fMax = Double.MAX_VALUE;
for (int j = 0; j < k; j++) {
for (int i = 0; i < n; i++) {
if (!bDone[i]) {
//if (fDist[nOrder[nOrder[j]]][i] < fMax) {
if (fDist[nOrder[j]][i] < fMax) {
fMax = fDist[nOrder[j]][i];
iMax = i;
jMax = j;
}
}
}
}
if (jMax == 0) {
//if (fDist[iMax][nOrder[0]]+fDist[nOrder[0]][nOrder[1]] > fDist[nOrder[0]][iMax]+fDist[iMax][nOrder[1]]) {
if (fDist[nOrder[0]][nOrder[1]] > fDist[iMax][nOrder[1]]) {
jMax++;
}
} else if (jMax == k-1) {
//if (fDist[nOrder[k-2]][iMax]+fDist[iMax][nOrder[k-1]] > fDist[nOrder[k-2]][nOrder[k-1]] + fDist[nOrder[k-1]][iMax]) {
if (fDist[nOrder[k-2]][iMax] > fDist[nOrder[k-2]][nOrder[k-1]]) {
jMax++;
}
//} else if (fDist[nOrder[jMax-1]][iMax]+fDist[iMax][nOrder[jMax]]+fDist[nOrder[jMax]][nOrder[jMax+1]] > fDist[nOrder[jMax-1]][nOrder[jMax]] + fDist[nOrder[jMax]][iMax]+fDist[iMax][nOrder[jMax+1]]) {
} else if (fDist[nOrder[jMax-1]][iMax] + fDist[nOrder[jMax]][nOrder[jMax+1]] > fDist[nOrder[jMax-1]][nOrder[jMax]] + fDist[iMax][nOrder[jMax+1]]) {
jMax++;
}
for (int j = k; j > jMax; j--) {
nOrder[j] = nOrder[j-1];
}
nOrder[jMax] = iMax;
bDone[iMax] = true;
}
return nOrder;
} // closestFirst
/** calculate the distance between leafs in a consensus tree
* and update the distance matrix weighted with the relative
* frequency of the tree
* @param node: current node
* @param nOrder: mapping of node label to [0,...,NrOfLeafs-1]
* @param fDistMatrix: distance matrix to be updated
* @param fWeight: relative consensus tree frequency
* @param iLabel: used to report set of leafs in sub tree below node
* @param fLength: used to report set of lengths from current node to leafs in iLabel
*/
void calcDistance(Node node, /*int [] nOrder, */double[][] fDistMatrix, double fWeight, Vector<Integer> iLabel, Vector<Float> fLength) {
if (node == null) {
return;
}
if (node.isLeaf()) {
//iLabel.add(nOrder[node.m_iLabel]);
iLabel.add(node.getNr());
//fLength.add(node.m_fLength);
fLength.add(node.m_fLength + 1.0f);
} else {
Vector<Integer> iLeft = new Vector<Integer>();
Vector<Integer> iRight = new Vector<Integer>();
Vector<Float> fLeft = new Vector<Float>();
Vector<Float> fRight = new Vector<Float>();
calcDistance(node.m_left, /*nOrder, */fDistMatrix, fWeight, iLeft, fLeft);
calcDistance(node.m_right, /*nOrder, */fDistMatrix, fWeight, iRight, fRight);
for (int i = 0; i < iLeft.size(); i++) {
int i1 = iLeft.elementAt(i);
double f = fWeight * fLeft.elementAt(i);
for (int j = 0; j < iRight.size(); j++) {
int i2 = iRight.elementAt(j);
double f2 = f + fWeight * fRight.elementAt(j);
fDistMatrix[i1][i2] += f2;
fDistMatrix[i2][i1] += f2;
}
}
for (int i = 0; i < fLeft.size(); i++) {
iLabel.add(iLeft.elementAt(i));
//fLength.add(fLeft.elementAt(i) + node.m_fLength);
fLength.add(fLeft.elementAt(i) + node.m_fLength + 1.0f);
//fLength.add(fLeft.elementAt(i) + 1.0f);
}
for (int i = 0; i < fRight.size(); i++) {
iLabel.add(iRight.elementAt(i));
//fLength.add(fRight.elementAt(i) + node.m_fLength);
fLength.add(fRight.elementAt(i) + node.m_fLength + 1.0f);
//fLength.add(fRight.elementAt(i) + 1.0f);
}
}
} // calcDistance
/** Perform one of the classical hierarchical clustering methods on a distance
* matrix. The resulting hierarchy is used to report an ordering on the node.
* @param fDistance0: distance matrix
* @return order of leaf nodes
* @throws Exception
*/
@SuppressWarnings("unchecked")
public int [] buildClusterer(double [][] fDistance0) throws Exception {
// use array of integer vectors to store cluster indices,
// starting with one cluster per instance
Vector<Integer> [] nClusterID = new Vector[fDistance0.length];
for (int i = 0; i < fDistance0.length; i++) {
nClusterID[i] = new Vector<Integer>();
nClusterID[i].add(i);
}
// calculate distance matrix
int nClusters = fDistance0.length;
PriorityQueue<Tuple> queue = new PriorityQueue<Tuple>(nClusters*nClusters/2, new TupleComparator());
for (int i = 0; i < nClusters; i++) {
for (int j = i+1; j < nClusters; j++) {
queue.add(new Tuple(fDistance0[i][j], i, j, 1, 1));
}
}
int nInstances = fDistance0.length;
// used for keeping track of hierarchy
TreeNode [] clusterNodes = new TreeNode[nInstances];
while (nClusters > 1) {
// find closest two clusters
/* simple but inefficient implementation
double fMinDistance = Double.MAX_VALUE;
int iMin1 = -1;
int iMin2 = -1;
for (int i = 0; i < nInstances; i++) {
if (nClusterID[i] != null) {
for (int j = i+1; j < nInstances; j++) {
if (nClusterID[j] != null) {
double fDist = fDistance[i][j];
if (fDist < fMinDistance) {
fMinDistance = fDist;
iMin1 = i;
iMin2 = j;
}
}
}
}
}
*/
// use priority queue to find next best pair to cluster
Tuple t = null;
do {
t = queue.poll();
} while (t!=null && (nClusterID[t.m_iCluster1].size()!=t.m_nClusterSize1 || nClusterID[t.m_iCluster2].size()!=t.m_nClusterSize2));
int iMin1 = t.m_iCluster1;
int iMin2 = t.m_iCluster2;
// merge clusters
nClusterID[iMin1].addAll(nClusterID[iMin2]);
nClusterID[iMin2].removeAllElements();
for (int i = 0; i < nInstances; i++) {
if (i != iMin1 && nClusterID[i].size() > 0) {
int i1 = Math.min(iMin1,i);
int i2 = Math.max(iMin1,i);
double fDistance = getDistance(fDistance0, nClusterID[i1], nClusterID[i2]);
queue.add(new Tuple(fDistance, i1, i2, nClusterID[i1].size(), nClusterID[i2].size()));
}
}
// track hierarchy
TreeNode node = new TreeNode();
if (clusterNodes[iMin1] == null) {
node.m_iLeftInstance = iMin1;
node.m_height = 1;
} else {
node.m_left = clusterNodes[iMin1];
clusterNodes[iMin1].m_parent = node;
node.m_height = clusterNodes[iMin1].m_height + 1;
}
if (clusterNodes[iMin2] == null) {
node.m_iRightInstance = iMin2;
node.m_height = Math.max(1,node.m_height);
} else {
node.m_right = clusterNodes[iMin2];
clusterNodes[iMin2].m_parent = node;
node.m_height = Math.max(clusterNodes[iMin2].m_height + 1, node.m_height);
}
clusterNodes[iMin1] = node;
nClusters--;
}
// collect hierarchy
TreeNode cluster = null;
//int iRoot = -1;
for (int i = 0; i < nInstances; i++) {
if (nClusterID[i].size() > 0) {
cluster = clusterNodes[i];
//iRoot = i;
break;
}
}
// optimise order
int [] order = new int[nInstances];
cluster.label(1);
cluster.order(order, 0);
if (true) {
// return order;
}
double fScore = score(order, fDistance0);
boolean bProgress = false;
//int [][] orderings = new int[fDistance0.length][];
do {
bProgress = false;
List<Integer> label = new ArrayList<Integer>();
List<List<Integer>> orderings = calcOrderings(cluster, label);
// find best node to flip, i.e. node that gives the best score
double fBestScore = fScore;
int iBestNode = -1;
// first ordering is original ordering
for (int i = 1; i < orderings.size(); i++) {
double fScore2 = score2(orderings.get(i), fDistance0);
//System.out.println("score = " + fScore2);
if (fScore2 < fBestScore) {
fBestScore = fScore2;
iBestNode = label.get(i-1);
}
}
// flip left and right on iBestNode
if (iBestNode >= 0) {
flip(iBestNode, cluster);
fScore = fBestScore;
bProgress = true;
cluster.order(order, 0);
double fScore2 = score(order, fDistance0);
System.out.println(Arrays.toString(order) + " " + fScore + " " + fScore2);
}
} while (bProgress);
cluster.order(order, 0);
return order;
} // buildClusterer
private void flip(int iBestNode, TreeNode node) {
if (node.m_left != null) {
flip(iBestNode, node.m_left);
}
if (node.m_right != null) {
flip(iBestNode, node.m_right);
}
if (iBestNode == node.m_iLabel) {
int tmp = node.m_iLeftInstance;
node.m_iLeftInstance = node.m_iRightInstance;
node.m_iRightInstance = tmp;
TreeNode tmp2 = node.m_left;
node.m_left = node.m_right;
node.m_right = tmp2;
}
}
private List<List<Integer>> calcOrderings(TreeNode node, List<Integer> label) {
List<List<Integer>> leftList;
List<List<Integer>> rightList;
List<Integer> leftLabel = new ArrayList<Integer>();
List<Integer> rightLabel = new ArrayList<Integer>();
if (node.m_left == null) {
List<Integer> list = new ArrayList<Integer>();
list.add(node.m_iLeftInstance);
leftList = new ArrayList<List<Integer>>();
leftList.add(list);
} else {
leftList= calcOrderings(node.m_left, leftLabel);
}
if (node.m_right == null) {
List<Integer> list = new ArrayList<Integer>();
list.add(node.m_iRightInstance);
rightList = new ArrayList<List<Integer>>();
rightList.add(list);
} else {
rightList= calcOrderings(node.m_right, rightLabel);
}
List<List<Integer>> list = new ArrayList<List<Integer>>();
{
List<Integer> newList = new ArrayList<Integer>();
newList.addAll(leftList.get(0));
newList.addAll(rightList.get(0));
list.add(newList);
}
for (int i = 1; i < leftList.size(); i++) {
List<Integer> newList = new ArrayList<Integer>();
newList.addAll(leftList.get(i));
newList.addAll(rightList.get(0));
list.add(newList);
}
for (int i = 1; i < rightList.size(); i++) {
List<Integer> newList = new ArrayList<Integer>();
newList.addAll(leftList.get(0));
newList.addAll(rightList.get(i));
list.add(newList);
}
{
List<Integer> newList = new ArrayList<Integer>();
newList.addAll(rightList.get(0));
newList.addAll(leftList.get(0));
list.add(newList);
}
label.addAll(leftLabel);
label.addAll(rightLabel);
label.add(node.m_iLabel);
return list;
}
private double score2(List<Integer> list, double[][] fDistance0) {
int [] order = new int[list.size()];
for (int i = 0; i < list.size(); i++) {
order[list.get(i)] = i;
}
return score(order, fDistance0);
// double fSum = 0;
// for (int i = 0; i < list.size()-1; i++) {
// fSum += fDistance0[list.get(i)][list.get(i+1)];
// }
// return fSum;
}
private double score(int[] order, double [][] fDistance0) {
// System.out.println(Arrays.toString(order));
final int K = order.length;
double fSum = 0;
for (int i = 0; i < order.length-1; i++) {
for (int j = -K; j < 0; j++) {
if (i+j>= 0)
fSum -= fDistance0[order[i]][order[i+j]]/(j*j*j);
}
for (int j = 1; j <= K; j++) {
if (i+j< order.length)
fSum += fDistance0[order[i]][order[i+j]]/(j*j*j);
}
}
return fSum;
// correlation
// final int K = order.length;
// double fSum = 0;
//
// double fMeanX = 0;
// double fMeanY = 0;
// for (int i = 0; i < order.length; i++) {
// for (int j = 0; j < order.length; j++) {
// fMeanX += fDistance0[i][j];
// fMeanY += Math.abs(i-j);
// }
// }
// fMeanX /= (order.length*order.length);
// fMeanY /= (order.length*order.length);
//
// double fSum1 = 0;
// double fVarX = 0;
// double fVarY = 0;
// for (int x = 0; x < order.length; x++) {
// int i = order[x];
// for (int y = 0; y < order.length; y++) {
// int j = order[y];
// double f = (fDistance0[i][j] - fMeanX) * (Math.abs(x-y) - fMeanY);
// fSum1 += f;
// fVarX += (fDistance0[i][j] - fMeanX) *(fDistance0[i][j] - fMeanX);
// fVarY += (Math.abs(x-y) - fMeanY) * (Math.abs(x-y) - fMeanY);
// }
// }
// return fSum1 / Math.sqrt(fVarX * fVarY);
}
/** calculate the distance between two clusters
* @param cluster1 list of indices of instances in the first cluster
* @param cluster2 dito for second cluster
* @return distance between clusters based on link type
*/
double getDistance(double [][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
double fBestDist = Double.MAX_VALUE;
switch (m_nLinkType) {
case SINGLE:
// find single link distance aka minimum link, which is the closest distance between
// any item in cluster1 and any item in cluster2
fBestDist = Double.MAX_VALUE;
for (int i = 0; i < cluster1.size(); i++) {
int i1 = cluster1.elementAt(i);
for (int j = 0; j < cluster2.size(); j++) {
int i2 = cluster2.elementAt(j);
double fDist = fDistance[i1][i2];
if (fBestDist > fDist) {
fBestDist = fDist;
}
}
}
break;
case COMPLETE:
case ADJCOMLPETE:
// find complete link distance aka maximum link, which is the largest distance between
// any item in cluster1 and any item in cluster2
fBestDist = 0;
for (int i = 0; i < cluster1.size(); i++) {
int i1 = cluster1.elementAt(i);
for (int j = 0; j < cluster2.size(); j++) {
int i2 = cluster2.elementAt(j);
double fDist = fDistance[i1][i2];
if (fBestDist < fDist) {
fBestDist = fDist;
}
}
}
if (m_nLinkType == COMPLETE) {
break;
}
// calculate adjustment, which is the largest within cluster distance
double fMaxDist = 0;
for (int i = 0; i < cluster1.size(); i++) {
int i1 = cluster1.elementAt(i);
for (int j = i+1; j < cluster1.size(); j++) {
int i2 = cluster1.elementAt(j);
double fDist = fDistance[i1][i2];
if (fMaxDist < fDist) {
fMaxDist = fDist;
}
}
}
for (int i = 0; i < cluster2.size(); i++) {
int i1 = cluster2.elementAt(i);
for (int j = i+1; j < cluster2.size(); j++) {
int i2 = cluster2.elementAt(j);
double fDist = fDistance[i1][i2];
if (fMaxDist < fDist) {
fMaxDist = fDist;
}
}
}
fBestDist -= fMaxDist;
break;
case AVERAGE:
// finds average distance between the elements of the two clusters
fBestDist = 0;
for (int i = 0; i < cluster1.size(); i++) {
int i1 = cluster1.elementAt(i);
for (int j = 0; j < cluster2.size(); j++) {
int i2 = cluster2.elementAt(j);
fBestDist += fDistance[i1][i2];
}
}
fBestDist /= (cluster1.size() * cluster2.size());
break;
case MEAN:
{
// calculates the mean distance of a merged cluster (aka Group-average agglomerative clustering)
Vector<Integer> merged = new Vector<Integer>();
merged.addAll(cluster1);
merged.addAll(cluster2);
fBestDist = 0;
for (int i = 0; i < merged.size(); i++) {
int i1 = merged.elementAt(i);
for (int j = i+1; j < merged.size(); j++) {
int i2 = merged.elementAt(j);
fBestDist += fDistance[i1][i2];
}
}
int n = merged.size();
fBestDist /= (n*(n-1.0)/2.0);
}
break;
// case CENTROID:
// // finds the distance of the centroids of the clusters
// double [] fValues1 = new double[m_instances.numAttributes()];
// double [] fValues2 = new double[m_instances.numAttributes()];
// for (int i = 0; i < cluster1.size(); i++) {
// Instance instance1 = m_instances.instance(cluster1.elementAt(i));
// Instance instance2 = m_instances.instance(cluster1.elementAt(i));
// for (int j = 0; j < m_instances.numAttributes(); j++) {
// fValues1[j] += instance1.value(j);
// fValues2[j] += instance2.value(j);
// }
// }
// for (int j = 0; j < m_instances.numAttributes(); j++) {
// fValues1[j] /= cluster1.size();
// fValues2[j] /= cluster2.size();
// }
// // set up two instances for distance function
// Instance instance1 = (Instance) m_instances.instance(cluster1.elementAt(0)).copy();
// Instance instance2 = (Instance) m_instances.instance(cluster1.elementAt(0)).copy();
// for (int j = 0; j < m_instances.numAttributes(); j++) {
// instance1.setValue(j, fValues1[j]);
// instance2.setValue(j, fValues1[j]);
// }
// fBestDist = m_DistanceFunction.distance(instance1, instance2);
// break;
// case WARD:
// {
// // finds the distance of the change in caused by merging the cluster.
// // The information of a cluster is calculated as the error sum of squares of the
// // centroids of the cluster and its members.
// double ESS1 = calcESS(cluster1);
// double ESS2 = calcESS(cluster2);
// Vector<Integer> merged = new Vector<Integer>();
// merged.addAll(cluster1);
// merged.addAll(cluster2);
// double ESS = calcESS(merged);
// fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
// }
// break;
}
return fBestDist;
} // getDistance
// /** calculated error sum-of-squares for instances wrt centroid **/
// double calcESS(Vector<Integer> cluster) {
// double [] fValues1 = new double[m_instances.numAttributes()];
// for (int i = 0; i < cluster.size(); i++) {
// Instance instance = m_instances.instance(cluster.elementAt(i));
// for (int j = 0; j < m_instances.numAttributes(); j++) {
// fValues1[j] += instance.value(j);
// }
// }
// for (int j = 0; j < m_instances.numAttributes(); j++) {
// fValues1[j] /= cluster.size();
// }
// // set up two instances for distance function
// Instance centroid = (Instance) m_instances.instance(cluster.elementAt(0)).copy();
// for (int j = 0; j < m_instances.numAttributes(); j++) {
// centroid.setValue(j, fValues1[j]);
// }
// double fESS = 0;
// for (int i = 0; i < cluster.size(); i++) {
// Instance instance = m_instances.instance(cluster.elementAt(i));
// fESS += m_DistanceFunction.distance(centroid, instance);
// }
// return fESS / cluster.size();
// } // calcESS
} // class HierarchicalClusterer