/*
* #%L
* gitools-core
* %%
* Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
package org.gitools.analysis.clustering.hierarchical;
import com.google.common.base.Joiner;
import org.apache.commons.math3.util.FastMath;
import org.gitools.analysis.clustering.distance.DistanceMeasure;
import org.gitools.analysis.clustering.hierarchical.strategy.LinkageStrategy;
import org.gitools.api.analysis.IAggregator;
import org.gitools.api.analysis.IProgressMonitor;
import org.gitools.api.matrix.*;
import org.gitools.api.matrix.view.IMatrixViewDimension;
import org.gitools.heatmap.header.HierarchicalCluster;
import org.gitools.heatmap.header.HierarchicalClusterNamer;
import java.awt.*;
import java.util.*;
import java.util.List;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentSkipListSet;
import static com.google.common.collect.Sets.complementOf;
import static com.google.common.collect.Sets.newHashSet;
public class HierarchicalClusterer {
private LinkageStrategy linkageStrategy;
private DistanceMeasure measure;
private IAggregator aggregator;
public HierarchicalClusterer(LinkageStrategy linkageStrategy, DistanceMeasure measure, IAggregator aggregator) {
this.linkageStrategy = linkageStrategy;
this.measure = measure;
this.aggregator = aggregator;
}
public HierarchicalCluster cluster(IMatrix matrix, IMatrixLayer<Double> layer, IMatrixDimension clusterDimension, IMatrixDimension aggregationDimension, IProgressMonitor monitor) {
Map<String, HierarchicalCluster> clusters = new HashMap<>(clusterDimension.size());
SortedSet<ClusterPair> linkages = new ConcurrentSkipListSet<>();
// Aggregate all the values to sort the clusters by weight
monitor.begin("Aggregating values...", clusterDimension.size());
final Map<String, Double> aggregation = new HashMap<>(clusterDimension.size());
Set<String> allNullValues = new HashSet<>();
IMatrixPosition position = matrix.newPosition();
for (String id : position.iterate(clusterDimension)) {
Double value = aggregator.aggregate(position.iterate(layer, aggregationDimension));
if (value != null) {
aggregation.put(id, value);
} else {
allNullValues.add(id);
}
}
// First sort the clustering dimension to show the clusters ordered by weight at the end
if (clusterDimension instanceof IMatrixViewDimension) {
IMatrixViewDimension sortDimension = (IMatrixViewDimension) clusterDimension;
sortDimension.sort(new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return SortDirection.ASCENDING.compare(aggregation.get(o1), aggregation.get(o2));
}
});
}
// Calculate all the distances
IMatrixPosition position1 = matrix.newPosition();
IMatrixPosition position2 = matrix.newPosition();
monitor.begin("Calculating distances...", clusterDimension.size());
for (String id1 : position1.iterate(clusterDimension)) {
// Check user cancel action
monitor.worked(1);
if (monitor.isCancelled()) {
throw new CancellationException();
}
// Skip all null values
if (allNullValues.contains(id1)) {
continue;
}
HierarchicalCluster cluster1 = newCluster(clusters, id1);
cluster1.setWeight( aggregation.get(id1) );
for (String id2 : position2.iterate(clusterDimension.from(id1))) {
// Skip equal ids
if (id1.equals(id2)) continue;
// Skip all null columns
if (allNullValues.contains(id2)) {
continue;
}
Double distance = measure.compute(
position1.iterate(layer, aggregationDimension),
position2.iterate(layer, aggregationDimension)
);
HierarchicalCluster cluster2 = newCluster(clusters, id2);
linkages.add(new ClusterPair(distance, cluster1, cluster2));
}
}
// Create the clusters agglomerating nodes by the nearest distances
HierarchyBuilder builder = new HierarchyBuilder(newHashSet(clusters.values()), linkages);
builder.agglomerate(linkageStrategy, monitor, clusterDimension.size());
// Set cluster names ordered by weight
HierarchicalCluster root = builder.getRootCluster();
root.setName("");
root.setSortName("");
Color color = HierarchicalClusterNamer.nameClusters(root, HierarchicalClusterNamer.DEFAULT_PALETTE);
root.setColor(color.getRGB());
root.setName("root");
return root;
}
private static HierarchicalCluster newCluster(Map<String, HierarchicalCluster> clusters, String id) {
if (!clusters.containsKey(id)) {
HierarchicalCluster newCluster = new HierarchicalCluster(id);
clusters.put(id, newCluster);
return newCluster;
}
return clusters.get(id);
}
}