package it.unito.geosummly.clustering.subspace; import com.sun.org.apache.xpath.internal.operations.Mod; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.HierarchicalClusteringAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm; import de.lmu.ifi.dbs.elki.data.Cluster; import de.lmu.ifi.dbs.elki.data.Clustering; import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.model.*; import de.lmu.ifi.dbs.elki.database.ids.*; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import org.apache.commons.lang3.ObjectUtils; import javax.validation.constraints.Max; import javax.validation.constraints.Null; import java.util.*; /** * Created by PU Yang on 5/22/16. */ public class AGNES <M extends Model> { //private ArrayList<Clustering<Model>> Clusterings; private Clustering<Model> Clustering; private int IterateNum; public AGNES (Clustering<Model> clustering, int threshold) { this.Clustering = clustering; this.IterateNum= threshold; } //Cluster Pair in Distance Map private class Pair { int Index_1; int Index_2; public Pair(int var1, int var2) { Index_2 = var2; Index_1 = var1; } public void setIndex(int var1, int var2) { Index_1 = var1; Index_2 = var2; } public boolean contains(int var) { if(Index_1 == var) return true; else if(Index_2 == var) return true; else return false; } public boolean equals(Pair obj) { if(Index_1 == obj.Index_1 && Index_2 == obj.Index_2) return true; else if(Index_1 == obj.Index_2 && Index_2 == obj.Index_1) return true; else return false; } public String toStirng() { return Index_1+","+Index_2; } } private class DistanceMatrix { private int MaxRow; private Vector<Vector<Double>> Matrix; public DistanceMatrix() { Matrix = new Vector<>(); } public int getMaxRow() { return MaxRow; } public int getItemNum() { return Matrix.size(); } public Pair findMin(){ double min = 100; int index1 = 0, index2 = 0; for(int i = 0; i < MaxRow; i++) { int length = Matrix.get(i).size(); for(int j = 0; j < length; j++) { if(min > Matrix.get(i).get(j)) { min = Matrix.get(i).get(j); System.out.println("Min Value: " + min); index1 = i; index2 = j; } } } System.out.println("Min Value: " + min); return new Pair(index1, index2); } public void removeOneItem(int index) { Iterator<Vector <Double>> it= Matrix.iterator(); Vector<Double> row = null; while(it.hasNext()) { row = it.next(); if(row.size() > index) row.remove(index); } Matrix.remove(index); MaxRow = MaxRow - 1; } public void addOneItem(Vector<Double> vector) { Matrix.add(vector); MaxRow = MaxRow + 1; } } public Clustering<Model> run() { Clustering<Model> result = new Clustering<>("GEOSUBCLU+AGNES clustering", "geosubclu"); ArrayList<Cluster<Model>> clusterList = new ArrayList<>(); //Keep the clusters for new clustering for(Cluster<Model> c:Clustering.getAllClusters()) { clusterList.add(c); } DistanceMatrix Matrix = new DistanceMatrix(); System.out.println("Begin"); System.out.println(Clustering.getAllClusters().size()); //TODO 1. Build the distance Matrix for all the clusters int index1 = 0, numOfzero = 0; for (Cluster<?> cluster : Clustering.getAllClusters()) { MeanModel model_1 = (MeanModel)cluster.getModel(); DoubleVector v1 = (DoubleVector) model_1.getMean(); int index2 = 0; Vector<Double> row = new Vector<>(); for (Cluster<?> cluster2 : Clustering.getAllClusters()) { MeanModel model_2 = (MeanModel) cluster2.getModel(); DoubleVector v2 = (DoubleVector) model_2.getMean(); //Calculate the Euclidean Distance between two clusters //may have problem BitSet bs = new BitSet(); bs.set(0,2,true); double distance = new FirstSubspaceEuclideanDistanceFunction(bs).doubleDistance(v1, v2); if(index1 == index2 || cluster.getName().compareTo(cluster2.getName()) != 0) { row.add(100.0); } else if (index1 > index2){ if (distance == 0) numOfzero++; row.add(distance); } else if(index1 < index2) break; index2++; } Matrix.addOneItem(row); index1++; } System.out.println("Number of Zero: " + numOfzero); //TODO 2. Merge the two clusters who have the minimum distance over all pairs of clusters in current clustering //TODO 3. Update the Distance Matrix //TODO 4. If number of iteration reaches the threshold, then stop, otherwise go to step 2 System.out.println("Main Part"); for(int i = 0; i < this.IterateNum; i++) { //Find the minimum index and value in DistanceMatrix System.out.println("Round "+ i); Pair pair = Matrix.findMin(); int index_1 = pair.Index_1, index_2 = pair.Index_2; System.out.println("Index_1, Index_2: " + index_1 +" " +index_2); //Merge clusters Cluster<Model> cluster_1= clusterList.get(index_1); Cluster<Model> cluster_2 = clusterList.get(index_2); if (!cluster_1.getName().equals(cluster_2.getName())) { System.out.println("***************Category is different***************"); // continue; } MeanModel m_1 = (MeanModel) cluster_1.getModel(); MeanModel m_2 = (MeanModel) cluster_2.getModel(); //Merge DBIDs ArrayModifiableDBIDs IDs = DBIDUtil.newArray(); IDs.addDBIDs(cluster_1.getIDs()); IDs.addDBIDs(cluster_2.getIDs()); int numOfCells_1 = cluster_1.size(), numOfCells_2 = cluster_2.size(); double weight_1 = numOfCells_2/(numOfCells_1+numOfCells_2), weight_2 = numOfCells_2/(numOfCells_1+numOfCells_2); //Merge FeatureVector double [] v_1 = ((DoubleVector) m_1.getMean()).getValues(); double [] v_2= ((DoubleVector) m_2.getMean()).getValues(); double [] v_3 = new double [v_1.length]; for(int j = 0; j < v_3.length; j++) { v_3[j] = v_1[j]*weight_1 + v_2[j]*weight_2; } DoubleVector v_new = new DoubleVector(v_3); MeanModel<DoubleVector> m_new = new MeanModel<>(v_new); String name = cluster_1.getName(); Cluster<Model> cluster_new = new Cluster<Model>(name, IDs, false, m_new); System.out.println("Cluster's size: " + cluster_new.getIDs().size() + " " + cluster_new.size()); //Update DistanceMatrix //Delete the merged cluster if(index_1 == index_2) System.out.println("WRONG!!!"); clusterList.remove(index_1); clusterList.remove(index_2); Matrix.removeOneItem(index_1); Matrix.removeOneItem(index_2); //Add new cluster Vector<Double> item = new Vector<>(); for(Cluster<?> cluster:clusterList) { MeanModel model_4 = (MeanModel) cluster.getModel(); DoubleVector v_4 = (DoubleVector) model_4.getMean(); BitSet bs = new BitSet(); bs.set(0,2,true); double distance = new FirstSubspaceEuclideanDistanceFunction(bs).doubleDistance(v_new, v_4); //Compute the Distance item.add(distance); //DistanceMap.put(index_new+","+index_old, distance); } item.add(100.0); clusterList.add(cluster_new); Matrix.addOneItem(item); } System.out.println("Finish"); for(Cluster<Model> cluster:clusterList) { result.addToplevelCluster(cluster); } System.out.println("Result Size: "+result.getAllClusters().size()); return result; } }