/*
* This file is part of Caliph & Emir.
*
* Caliph & Emir is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Caliph & Emir is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Caliph & Emir; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright statement:
* --------------------
* (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
* http://www.juggle.at, http://caliph-emir.sourceforge.net
*/
package at.lux.retrieval.clustering;
import at.lux.retrieval.calculations.SimilarityMatrix;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
/**
* Date: 07.02.2005
* Time: 22:47:15
*
* @author Mathias Lux, mathias@juggle.at
*/
public class HAC {
private SimilarityMatrix matrix;
private int numberOfClusters;
private HashSet<ArrayList<Integer>> clusters;
/**
* Allows to use different types of HAC.
*/
public enum HACType {
COMPLETE_LINK, SINGLE_LINK, AVERAGE_LINK
}
public HAC(SimilarityMatrix matrix, int numberOfClusters) {
this.matrix = matrix;
this.numberOfClusters = numberOfClusters;
init();
}
public HAC(SimilarityMatrix matrix) {
this.matrix = matrix;
numberOfClusters = (int) Math.sqrt(matrix.getDimension());
init();
}
private void init() {
clusters = new HashSet<ArrayList<Integer>>(matrix.getDimension());
for (int i = 0; i < matrix.getDimension(); i++) {
ArrayList<Integer> list = new ArrayList<Integer>();
list.add(i);
clusters.add(list);
}
}
/**
* @return a positive integer as long as there are steps to make.
*/
public int step() {
ArrayList<Integer> elist1 = null, elist2 = null;
float maxSimilarity = 0f;
// find best merge candidates:
for (Iterator<ArrayList<Integer>> iterator = clusters.iterator(); iterator.hasNext();) {
ArrayList<Integer> list1 = iterator.next();
for (Iterator<ArrayList<Integer>> iterator1 = clusters.iterator(); iterator1.hasNext();) {
ArrayList<Integer> list2 = iterator1.next();
if (!list1.equals(list2)) {
float tmp = getSimilarity(list1, list2, HACType.COMPLETE_LINK);
// System.out.print(tmp + ", ");
if (tmp >= maxSimilarity) {
maxSimilarity = tmp;
elist1 = list1;
elist2 = list2;
// System.out.print(maxSimilarity + " ");
}
}
}
}
// merge them:
clusters.remove(elist2);
// System.out.println("Merging {" + printList(elist1) + "} with {" + printList(elist2) + "}");
clusters.remove(elist1);
elist1.addAll(elist2);
clusters.add(elist1);
return clusters.size() - numberOfClusters;
}
/**
* Implements different types of HAC.
* @param list1
* @param list2
* @param type
* @return
*/
private float getSimilarity(ArrayList<Integer> list1, ArrayList<Integer> list2, HACType type) {
float similarity = 1f;
if (type == HACType.COMPLETE_LINK) {
for (Iterator<Integer> iterator = list1.iterator(); iterator.hasNext();) {
int int1 = iterator.next();
for (Iterator<Integer> iterator1 = list2.iterator(); iterator1.hasNext();) {
int int2 = iterator1.next();
float tmp = matrix.getSimilarity(int1, int2);
if (tmp < similarity) similarity = tmp;
}
}
}
else if (type == HACType.AVERAGE_LINK) {
float average = 0f;
int count = 0;
for (Iterator<Integer> iterator = list1.iterator(); iterator.hasNext();) {
int int1 = iterator.next();
for (Iterator<Integer> iterator1 = list2.iterator(); iterator1.hasNext();) {
int int2 = iterator1.next();
average += matrix.getSimilarity(int1, int2);
count++;
// if (tmp < similarity) similarity = tmp;
}
}
similarity = average/((float) count);
}
else if (type == HACType.SINGLE_LINK) {
similarity = 0f;
for (Iterator<Integer> iterator = list1.iterator(); iterator.hasNext();) {
int int1 = iterator.next();
for (Iterator<Integer> iterator1 = list2.iterator(); iterator1.hasNext();) {
int int2 = iterator1.next();
float tmp = matrix.getSimilarity(int1, int2);
if (tmp > similarity) similarity = tmp;
}
}
}
return similarity;
}
public HashSet<ArrayList<Integer>> getClusters() {
return clusters;
}
public SimilarityMatrix getMatrix() {
return matrix;
}
public String toString() {
StringWriter sw = new StringWriter();
int count = 0;
for (Iterator<ArrayList<Integer>> iterator = clusters.iterator(); iterator.hasNext();) {
count++;
ArrayList<Integer> integers = iterator.next();
sw.append("Cluster ");
sw.append(count + ": ");
for (Iterator<Integer> iterator1 = integers.iterator(); iterator1.hasNext();) {
sw.append(iterator1.next().toString());
if (iterator1.hasNext()) sw.append(", ");
}
sw.append("\n");
}
return sw.toString();
}
/**
* For debugging lists ...
* @param list
* @return all elements of the list as strings
*/
private String printList(List list) {
StringWriter sw = new StringWriter(64);
for (Iterator iterator = list.iterator(); iterator.hasNext();) {
Object o = iterator.next();
sw.append(o.toString());
if (iterator.hasNext()) sw.append(", ");
}
return sw.toString();
}
}