package edu.hawaii.jmotif.text.cluster;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Arrays;
import java.util.HashMap;
import java.util.TreeSet;
import edu.hawaii.jmotif.text.CosineDistanceMatrix;
/**
* Implements a cluster node for SAX terms clustering.
*
* @author psenin
*
*/
public class Cluster {
/** The left sub-cluster. */
public Cluster left = null;
/** The right sub-cluster. */
public Cluster right = null;
/** The level from the root. */
public int level;
/** The distance between left and right sub-clusters. */
public double distanceBetween;
/** The keys. Words which are within this cluster. */
private TreeSet<String> keys;
/**
* Constructor.
*/
public Cluster() {
super();
}
/**
* Constructor.
*
* @param key The single name of the cluster.
*/
public Cluster(String key) {
super();
this.keys = new TreeSet<String>();
keys.add(key);
}
/**
* Merging together clusters.
*
* @param left The left cluster.
* @param right The right cluster.
*/
public void merge(Cluster left, Cluster right, Double distance) {
this.left = left;
this.right = right;
this.keys = new TreeSet<String>();
this.keys.addAll(left.keys);
this.keys.addAll(right.keys);
this.distanceBetween = distance;
}
/**
* Compute the distance between words clusters.
*
* @param otherCluster The other cluster.
* @param data
* @param distanceMatrix The pre-computed distance matrix.
* @param criterion The linkage criterion.
* @return The distance between clusters based on the distances and the linkage.
*/
public Double distanceTo(Cluster otherCluster, HashMap<String, HashMap<String, Double>> data,
CosineDistanceMatrix distanceMatrix, LinkageCriterion criterion) {
if (otherCluster.keys.size() == 1 && this.keys.size() == 1) {
return distanceMatrix.distanceBetween(otherCluster.keys.first(), this.keys.first());
}
if (criterion.equals(LinkageCriterion.SINGLE)) {
double minDist = Double.MAX_VALUE;
for (String keyA : this.keys) {
for (String keyB : otherCluster.keys) {
Double dist = distanceMatrix.distanceBetween(keyA, keyB);
if (dist < minDist) {
minDist = dist;
}
}
}
return minDist;
}
else if (criterion.equals(LinkageCriterion.COMPLETE)) {
double maxDist = Double.MIN_VALUE;
for (String keyA : this.keys) {
for (String keyB : otherCluster.keys) {
Double dist = distanceMatrix.distanceBetween(keyA, keyB);
if (dist > maxDist) {
maxDist = dist;
}
}
}
return maxDist;
}
return null;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((keys == null) ? 0 : keys.hashCode());
return result;
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Cluster other = (Cluster) obj;
if (keys == null) {
if (other.keys != null) {
return false;
}
}
else if (!keys.equals(other.keys)) {
return false;
}
return true;
}
/**
* {@inheritDoc}
*/
public String toString() {
return Arrays.toString(this.keys.toArray());
}
/**
* Returns the NEWICK format representation of this node cluster. You still need to take it in
* parenthesis if this is your ROOT.
*
* @return The newick string representation of the cluster.
*/
public String toNewick() {
StringBuilder sb = new StringBuilder();
if ((null != this.left) && (null != this.right)) {
double height = 0D;
if (left.isTerminal() && !(right.isTerminal())) {
height = Math.abs(this.distanceBetween - right.distanceBetween);
}
else if (!(left.isTerminal()) && right.isTerminal()) {
height = Math.abs(this.distanceBetween - left.distanceBetween);
}
else {
height = this.distanceBetween;
}
height = height / 2D;
if (left.isTerminal()) {
sb.append(left.toNewick()).append(":").append(String.valueOf(height));
}
else {
sb.append("(").append(left.toNewick()).append(")").append(":")
.append(String.valueOf(height));
}
sb.append(",");
if (right.isTerminal()) {
sb.append(right.toNewick()).append(":").append(formatNumber(height));
}
else {
sb.append("(").append(right.toNewick()).append(")").append(":")
.append(formatNumber(height));
}
return sb.toString();
}
else {
return this.keys.first();
}
}
/**
* Formats the double flow-point number.
*
* @param number The number to format.
* @return formatted flow-point.
*/
private String formatNumber(double number) {
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance();
symbols.setDecimalSeparator('.');
DecimalFormat format = new DecimalFormat("#.##########", symbols);
return format.format(number);
}
/**
* Returns true if left and right sub-nodes of this node are NULLs.
*
* @return true if the node is terminal.
*/
public boolean isTerminal() {
if ((null == this.left) && (null == this.right)) {
return true;
}
return false;
}
}