/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.visualization.clustering; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.TreeSet; import java.util.logging.Logger; import at.tuwien.ifs.somtoolbox.apps.viewer.GeneralUnitPNode; /** * Class to create Clustering trees with the Ward's Linkage algorithm. This class is not compatible with mnemonic SOMs * (and probably also not compatible with hierarchical SOMs) * * @author Angela Roiger * @version $Id: WardsLinkageTreeBuilderAll.java 3938 2010-11-17 15:15:25Z mayer $ */ public class WardsLinkageTreeBuilderAll extends AbstractWardsLinkageTreeBuilder { // lazyUpdate is faster and should return the same results private boolean lazyUpdate; /** * Only use this constructor if you suspect WardsLinkageTreeBuilderAll(true) does not return a correct clustering */ public WardsLinkageTreeBuilderAll() { this(false); } /** * Only use false if you suspect WardsLinkageTreeBuilderAll(true) does not return a correct clustering. The new * Update function only recalculates the distance values before merging two clusters. This way a lot less * calculations have to be made, reducing the complexity from n^3 to n^2. * * @param lazyUpdate true to use faster update function */ public WardsLinkageTreeBuilderAll(boolean lazyUpdate) { super(); this.lazyUpdate = lazyUpdate; } /** * Calculation of the Clustering. This code is only compatible with rectangular, non hierarchical SOMs! * * @param units the GeneralUnitPNode Array containing all the units of the SOM * @return the ClusteringTree (i.e. the top node of the tree) */ @Override public ClusteringTree createTree(GeneralUnitPNode[][] units) throws ClusteringAbortedException { /* * Computational complexity (n= number of units): n^2 if the Tree builder was constructed using lazyUpdate = true; else n^3 */ /* * To make this code compatible with mnemonic soms: set this.level to the number of units used. */ Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Start Clustering "); this.level = units.length * units[0].length; // initialize monitor resetMonitor(2 * level); TreeSet<NodeDistance> dists = calculateInitialDistances(units); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Building Tree "); NodeDistance toMerge; ClusterNode newNode = null; HashMap<ClusterNode, ClusterNode> parents = null; // only used for lazyUpdate if (lazyUpdate) { parents = new HashMap<ClusterNode, ClusterNode>(); } while (dists.size() > 0) { toMerge = dists.first(); dists.remove(toMerge); if (lazyUpdate) { // check if this is an old entry that need to be updated boolean dirty = false; for (;;) { ClusterNode p = parents.get(toMerge.n1); if (p == null) { break; } toMerge.n1 = p; dirty = true; } for (;;) { ClusterNode p = parents.get(toMerge.n2); if (p == null) { break; } toMerge.n2 = p; dirty = true; } if (toMerge.n1 == toMerge.n2) { // deprecated entry continue; } if (dirty) { // recalculate distance, reinsert and continue the while loop toMerge.dist = calcESSincrease(toMerge.n1, toMerge.n2); dists.add(toMerge); continue; } } // if (lazyUpdate) level--; incrementMonitor(); allowAborting(); newNode = new ClusterNode(toMerge.n1, toMerge.n2, level, toMerge.dist); if (lazyUpdate) { parents.put(toMerge.n1, newNode); parents.put(toMerge.n2, newNode); } else { HashMap<List<Object>, NodeDistance> duplicateEliminator = new HashMap<List<Object>, NodeDistance>(); List<Object> pair; // remove not needed connections and change distances from n1,n2 to newNode for (Iterator<NodeDistance> i = dists.iterator(); i.hasNext();) { NodeDistance x = i.next(); if (x.n1 == toMerge.n1 || x.n1 == toMerge.n2) { x.n1 = newNode; } if (x.n2 == toMerge.n1 || x.n2 == toMerge.n2) { x.n2 = newNode; } if (x.n1 == x.n2) { throw new AssertionError("this should have been removed by dists.remove(toMerge)"); // i.remove(); } // & keep only the shortest distance for each connection if (x.n1 == newNode || x.n2 == newNode) { if (x.n1 == newNode) { // make pair where new node is first pair = Arrays.asList(new Object[] { x.n1, x.n2 }); } else { pair = Arrays.asList(new Object[] { x.n2, x.n1 }); } // keep only shorter distance if (!duplicateEliminator.containsKey(pair)) { x.dist = calcESSincrease(x.n1, x.n2); duplicateEliminator.put(pair, x); } else { } i.remove(); } } dists.addAll(duplicateEliminator.values()); } // if (lazyUpdate) else } finishMonitor(); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Finished Clustering - Wards Linkage (all)"); return new ClusteringTree(newNode, units.length); } /** * Calculates the initial distances from each To each other unit where there is not yet a distance calculated. This * results in n^2/2 distance calculations and the same amount of created objects. Can take long in case of large * maps. This code is only compatible with rectangular, non hierarchical SOMs! * * @param units A GeneralUnitPNode[][] containing the Units of the som * @return a TreeSet of NodeDistances containing the distances between the units starting with the smallest. */ private TreeSet<NodeDistance> calculateInitialDistances(GeneralUnitPNode[][] units) throws ClusteringAbortedException { /* * To make this code compatible with mnemonic soms: Take care only distances between existing units are calculated only calculate (store) the * distance between 2 clusters once */ int xdim = units.length; int ydim = units[0].length; ClusterNode[][] tmp = new ClusterNode[xdim][ydim]; TreeSet<NodeDistance> dists = new TreeSet<NodeDistance>(); // create all basic Nodes for (int i = 0; i < xdim; i++) { for (int j = 0; j < ydim; j++) { tmp[i][j] = new ClusterNode(units[i][j], level); } } // calculate initial distances: for (int i = 0; i < xdim; i++) { for (int j = 0; j < ydim; j++) { incrementMonitor(); allowAborting(); try { for (int k = i; k < xdim; k++) { int start = 0; // start at the beginning of the row... if (k == i) { // unless it's my own row ... start = j + 1; // then start with the next item. } for (int l = start; l < ydim; l++) { dists.add(new NodeDistance(tmp[i][j], tmp[k][l], calcESSincrease(tmp[i][j], tmp[k][l]))); } } } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe("Cannot create clustering: " + e.getMessage()); e.printStackTrace(); } } } return dists; } @Override public String getClusteringAlgName() { return "Ward's Linkage (all)"; } }