/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.visualization.comparison; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.logging.Logger; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.apps.viewer.CommonSOMViewerStateData; import at.tuwien.ifs.somtoolbox.apps.viewer.GeneralUnitPNode; import at.tuwien.ifs.somtoolbox.apps.viewer.MapPNode; import at.tuwien.ifs.somtoolbox.input.SOMLibFormatInputReader; import at.tuwien.ifs.somtoolbox.layers.GrowingLayer; import at.tuwien.ifs.somtoolbox.layers.LayerAccessException; import at.tuwien.ifs.somtoolbox.layers.Unit; import at.tuwien.ifs.somtoolbox.layers.metrics.MetricException; import at.tuwien.ifs.somtoolbox.models.GrowingSOM; import at.tuwien.ifs.somtoolbox.util.CollectionUtils; import at.tuwien.ifs.somtoolbox.visualization.clustering.ClusterEquivalence; import at.tuwien.ifs.somtoolbox.visualization.clustering.ClusteringAbortedException; import at.tuwien.ifs.somtoolbox.visualization.clustering.ClusteringTree; import at.tuwien.ifs.somtoolbox.visualization.clustering.LabelCoordinates; import at.tuwien.ifs.somtoolbox.visualization.clustering.WardsLinkageTreeBuilder; import at.tuwien.ifs.somtoolbox.visualization.clustering.WardsLinkageTreeBuilderAll; /** * @author Doris Baum * @version $Id: SOMComparison.java 3590 2010-05-21 10:43:45Z mayer $ */ public class SOMComparison { private GrowingSOM gsom1 = null; private GrowingSOM gsom2 = null; private LabelCoordinates[] coords1 = null; private LabelCoordinates[] coords2 = null; private String[] labelList1 = null; private String[] labelList2 = null; private double[][] dist1 = null; private double[][] dist2 = null; private double maxDistance = 0; private int maxCount = 0; private double sourceThreshold = 0; private double targetThreshold = 0; private int outlierCountThreshold = 0; private int stableCountThreshold = 0; private double outlierPercentThreshold = 75; private double stablePercentThreshold = 100; private int minAbsoluteCount = 1; private boolean absolute = true; private boolean multiMatch = false; private int clusterNo = 5; public final int MAXCLUSTERNO = 200; public static LabelCoordinates[] getLabelCoordinates(GrowingSOM gsom) { GrowingLayer layer = gsom.getLayer(); String[] vectorLabels = layer.getAllMappedDataNames(true); int xSize = layer.getXSize(); int ySize = layer.getYSize(); LabelCoordinates[] coords = new LabelCoordinates[vectorLabels.length]; for (int i = 0; i < coords.length; i++) { coords[i] = new LabelCoordinates(); } try { // go through all units in the layer... for (int i = 0; i < xSize; i++) { for (int j = 0; j < ySize; j++) { // ... for each unit, get the labels of the // vectors mapped to them String[] unitnames = layer.getUnit(i, j).getMappedInputNames(); if (unitnames != null) { for (String element : unitnames) { // for each label, look up its position in the ArrayList... int index = Arrays.binarySearch(vectorLabels, element); // and save its units coordinates coords[index].x = i; coords[index].y = j; coords[index].label = element; } } } } } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").warning(e.getMessage()); } return coords; } // private LabelCoordinates[] alternateGetLabelCoordinates(GrowingSOM gsom) { // // GrowingLayer layer = gsom.getLayer(); // // String[] vectorLabels = gsom.getLayer().getAllMappedDataNames(true); // // LabelCoordinates[] coords = new LabelCoordinates[vectorLabels.length]; // for (int i = 0; i < coords.length; i++) { // coords[i] = new LabelCoordinates(); // } // // for (int i = 0; i < vectorLabels.length; i++) { // String label = (String) vectorLabels[i]; // Unit unit = layer.getUnitForDatum(label); // coords[i].x = unit.getXPos(); // coords[i].y = unit.getYPos(); // coords[i].label = label; // } // // return coords; // } public static GrowingSOM loadGSOM(String setname) { String unitDescriptionFileName = setname + SOMLibFormatInputReader.unitFileNameSuffix; String weightVectorFileName = setname + SOMLibFormatInputReader.weightFileNameSuffix; String mapDescriptionFileName = setname + SOMLibFormatInputReader.mapFileNameSuffix; try { return new GrowingSOM(new SOMLibFormatInputReader(weightVectorFileName, unitDescriptionFileName, mapDescriptionFileName)); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); return null; } } /** Calculate the distance matrix for all mapped vectors from the information where the inputs are mapped */ public static double[][] calculcateIntraSOMDistanceMatrix(LabelCoordinates[] coords) throws MetricException { double[][] distanceMatrix = new double[coords.length][coords.length]; for (int j = 0; j < coords.length; j++) { // label 1 for (int i = j; i < coords.length; i++) { // label 2 double distance = coords[j].distance(coords[i]); distanceMatrix[i][j] = distance; distanceMatrix[j][i] = distance; } } return distanceMatrix; } public static double[][] calculateClusterDistances(int[][] clusterAssig, int clusterNo) { int xSize = clusterAssig.length; int ySize = clusterAssig[0].length; double maxdist = Math.ceil(Math.sqrt(xSize * xSize + ySize * ySize)); double[][] mindist = new double[clusterNo][clusterNo]; for (double[] element : mindist) { Arrays.fill(element, maxdist); } double distance = -1; int cluster1 = -1; int cluster2 = -1; for (int x1 = 0; x1 < xSize; x1++) { for (int y1 = 0; y1 < ySize; y1++) { for (int x2 = x1; x2 < xSize; x2++) { for (int y2 = y1; y2 < ySize; y2++) { distance = Math.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2)); cluster1 = clusterAssig[x1][y1]; cluster2 = clusterAssig[x2][y2]; if (distance < mindist[cluster1][cluster2]) { mindist[cluster1][cluster2] = distance; mindist[cluster2][cluster1] = distance; } } } } } return mindist; } /** Assigns each unit in the grid a number for the cluster it belongs to */ public static int[][] calculateClusterAssignment(GrowingSOM gsom, int level) throws LayerAccessException, ClusteringAbortedException { // generate GUPNodes for the SOM GeneralUnitPNode[][] units = new GeneralUnitPNode[gsom.getLayer().getXSize()][gsom.getLayer().getYSize()]; for (int j = 0; j < gsom.getLayer().getYSize(); j++) { for (int i = 0; i < gsom.getLayer().getXSize(); i++) { if (gsom.getLayer().getUnit(i, j) != null) { // check needed for mnemonic SOMs (might not have all units // != null) units[i][j] = new GeneralUnitPNode(gsom.getLayer().getUnit(i, j), CommonSOMViewerStateData.getInstance(), null, null, 0, 0); } } } // TODO move these lines to Comparison? Even better would be to use tree attribute in MapPNode // create cluster tree builder. WardsLinkageTreeBuilder wards = new WardsLinkageTreeBuilder(); ClusteringTree tree = wards.createTree(units); return tree.getClusterAssignment(level, gsom.getLayer().getXSize(), gsom.getLayer().getYSize()); } public int[] clusterEquivalent(int[][] assignment1, int[][] assignment2, LabelCoordinates[] coords1, LabelCoordinates[] coords2, int numberOfClusters, double[] percentages) { final boolean debug = true; if (debug) { System.out.println("first"); for (int y = 0; y < assignment1[0].length; y++) { for (int[] element : assignment1) { System.out.print(element[y] + " "); } System.out.println(); } System.out.println("second"); for (int y = 0; y < assignment2[0].length; y++) { for (int[] element : assignment2) { System.out.print(element[y] + " "); } System.out.println(); } } // have counters for equivalence assignments int[][] equiv = new int[numberOfClusters][numberOfClusters]; for (int a = 0; a < numberOfClusters; a++) { Arrays.fill(equiv[a], 0); } // have counter for number of data vectors in cluster int[] clusterSize = new int[numberOfClusters]; Arrays.fill(clusterSize, 0); // count how often a data vector from cluster a in SOM 1 is // assigned to cluster b in SOM 2 int cluster1 = -1; int cluster2 = -1; for (int i = 0; i < coords1.length; i++) { cluster1 = assignment1[coords1[i].x][coords1[i].y]; cluster2 = assignment2[coords2[i].x][coords2[i].y]; equiv[cluster1][cluster2]++; clusterSize[cluster1]++; } // make an equivalence table which holds in each element an assignment and the // percentage of vectors from cluster a in SOM 1 that moved to cluster b in SOM 2 int rowcount = 0; ClusterEquivalence[] equivTable = new ClusterEquivalence[numberOfClusters * numberOfClusters]; for (int i = 0; i < numberOfClusters; i++) { for (int j = 0; j < numberOfClusters; j++) { equivTable[rowcount] = new ClusterEquivalence(); if (clusterSize[i] > 0) { equivTable[rowcount].percentage = (double) equiv[i][j] / (double) clusterSize[i]; } else { equivTable[rowcount].percentage = 0.0; } equivTable[rowcount].cluster1 = i; equivTable[rowcount].cluster2 = j; rowcount++; } } // sort the table according to percentages Arrays.sort(equivTable); if (debug) { System.out.println(); System.out.println("1st cluster; 2nd cluster; confidence;"); NumberFormat nf = NumberFormat.getPercentInstance(); for (int i = equivTable.length - 1; i >= 0; i--) { System.out.println(equivTable[i].cluster1 + "; " + equivTable[i].cluster2 + "; " + nf.format(equivTable[i].percentage) + ";"); } } // have markers whether clusters were already assigned // don't assign a cluster twice! boolean[] used1 = new boolean[numberOfClusters]; Arrays.fill(used1, false); boolean[] used2 = new boolean[numberOfClusters]; Arrays.fill(used2, false); // count how many assignments have been made so far int equivCount = 0; int[] equivAssignment = new int[numberOfClusters]; Arrays.fill(equivAssignment, -1); // go through the table and find the best assignments // Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Final cluster equivalence"); breakpoint: for (int i = equivTable.length - 1; i >= 0; i--) { if (!used1[equivTable[i].cluster1] && !used2[equivTable[i].cluster2]) { equivAssignment[equivTable[i].cluster1] = equivTable[i].cluster2; percentages[equivTable[i].cluster1] = equivTable[i].percentage; used1[equivTable[i].cluster1] = true; if (!multiMatch) { used2[equivTable[i].cluster2] = true; } equivCount++; // Logger.getLogger("at.tuwien.ifs.somtoolbox").info(equivTable[i].cluster1 + " -> " + // equivTable[i].cluster2 + " : " + // equivTable[i].percentage); } if (equivCount >= numberOfClusters) { break breakpoint; } } // for all unassigned clusters // may be unnecessary for (int i = 0; i < numberOfClusters - equivCount; i++) { int count1 = 0; int count2 = 0; while (used1[count1] && count1 < used1.length) { count1++; } while (used2[count2] && count2 < used2.length) { count2++; } equivAssignment[count1] = count2; used1[count1] = true; used2[count2] = true; } if (debug) { System.out.println(); System.out.println("1st cluster; 2nd cluster; confidence;"); NumberFormat nf = NumberFormat.getPercentInstance(); for (int i = 0; i < equivAssignment.length; i++) { System.out.println(i + "; " + equivAssignment[i] + "; " + nf.format(percentages[i]) + ";"); } } return equivAssignment; } /** * Calculate the cluster distance matrix for all mapped vectors from the information where the inputs are mapped */ public static double[][] calculcateIntraSOMClusterDistanceMatrix(LabelCoordinates[] coords, int[][] secSOMClusterAssignment, int clusterNo, double[][] distances) { int size = coords.length; double[][] distanceMatrix = new double[size][size]; for (int j = 0; j < size; j++) { // label 1 for (int i = j; i < size; i++) { // label 2 int cluster1 = secSOMClusterAssignment[coords[j].x][coords[j].y]; int cluster2 = secSOMClusterAssignment[coords[i].x][coords[i].y]; double distance = distances[cluster1][cluster2]; distanceMatrix[i][j] = distance; distanceMatrix[j][i] = distance; } } return distanceMatrix; } private int highestCount(int[] neighbourCount) { int highestCount = 0; for (int element : neighbourCount) { if (element > highestCount) { highestCount = element; } } return highestCount; } public ArrayList<Shift> calculateShifts(boolean cumulative) throws SOMToolboxException { // if one of the necessary arrays is not set, throw exception if (coords1 == null || coords2 == null || dist1 == null || dist2 == null) { throw new SOMToolboxException( "You need to call CompareSOMs.calculateMatrices before you can calculate any shifts!"); } int size = coords1.length; // have a counter how many neighbours a vector has in the first SOM int[] oldNeighbourCount = new int[size]; Arrays.fill(oldNeighbourCount, 1); // have a counter how many old neighbours of a vector are in its new neighbourhood // init to 1 because a vector is neighbour to itself int[] neighbourCount = new int[size]; Arrays.fill(neighbourCount, 1); // count how many neighbours stay the same for (int j = 0; j < size; j++) { // label 1 for (int i = j + 1; i < size; i++) { // label 2 // if the vectors are on the same unit in the source SOM or if we're counting cumulative // and the vectors are within a radius of source threshold of each other if (dist1[j][i] == 0 || cumulative && dist1[j][i] <= sourceThreshold) { // count old neighbours oldNeighbourCount[i]++; oldNeighbourCount[j]++; // if the vectors are on the same unit in the target SOM or if we're counting cumulative // and the vectors are within a radius of target threshold of each other if (dist2[j][i] == 0 || cumulative && dist2[j][i] <= targetThreshold) { // count neighbours that stayed the same neighbourCount[i]++; neighbourCount[j]++; } } } } // find overall highest count int highestCount = this.highestCount(neighbourCount); // have marker if the vector does a stable shift boolean[] stableMarker = new boolean[size]; Arrays.fill(stableMarker, false); // list for all shifts ArrayList<Shift> allshifts = new ArrayList<Shift>(); // generate new shift objects for stable shifts and fill them with the data for (int i = 0; i < size; i++) { if (absolute && neighbourCount[i] >= stableCountThreshold || !absolute && (double) neighbourCount[i] / oldNeighbourCount[i] >= stablePercentThreshold / 100 && neighbourCount[i] >= minAbsoluteCount) { Shift shift = new Shift(); shift.setCoords(coords1[i].x, coords1[i].y, coords2[i].x, coords2[i].y); shift.setLabel(labelList1[i]); shift.setCount(neighbourCount[i]); shift.setPercent((double) neighbourCount[i] / oldNeighbourCount[i]); shift.setProportion((double) neighbourCount[i] / highestCount); shift.setType(Shift.STABLE); allshifts.add(shift); stableMarker[i] = true; } } // have marker if the vector is "adjacent" (within target threshold) to stable shift boolean[] adjacentMarker = new boolean[size]; Arrays.fill(adjacentMarker, false); // find vectors "adjacent" to stable vectors for (int j = 0; j < size; j++) { // label 1 for (int i = j + 1; i < size; i++) { // label 2 if (dist1[j][i] <= sourceThreshold) { if (dist2[j][i] <= targetThreshold) { int adjacentIndex = -1; // pick them only if they are not stable themselves if (stableMarker[j] && !stableMarker[i]) { adjacentIndex = i; } if (stableMarker[i] && !stableMarker[j]) { adjacentIndex = j; } if (adjacentIndex != -1) { adjacentMarker[adjacentIndex] = true; } } } } } // generate new shift objects for adjacent shifts and fill them with the data for (int i = 0; i < size; i++) { if (adjacentMarker[i]) { if (absolute || !absolute && neighbourCount[i] >= minAbsoluteCount) { Shift shift = new Shift(); shift.setCoords(coords1[i].x, coords1[i].y, coords2[i].x, coords2[i].y); shift.setLabel(labelList1[i]); shift.setCount(neighbourCount[i]); shift.setPercent((double) neighbourCount[i] / oldNeighbourCount[i]); shift.setProportion((double) neighbourCount[i] / highestCount); shift.setType(Shift.ADJACENT); allshifts.add(shift); } } } // generate new shift objects for outlier shifts and fill them with the data for (int i = 0; i < size; i++) { // must be higher than outlier threshold but not a stable or adjacent shift already if ((absolute && neighbourCount[i] >= outlierCountThreshold || !absolute && (double) neighbourCount[i] / oldNeighbourCount[i] >= outlierPercentThreshold / 100 && neighbourCount[i] >= minAbsoluteCount) && !stableMarker[i] && !adjacentMarker[i]) { Shift shift = new Shift(); shift.setCoords(coords1[i].x, coords1[i].y, coords2[i].x, coords2[i].y); shift.setLabel(labelList1[i]); shift.setCount(neighbourCount[i]); shift.setPercent((double) neighbourCount[i] / oldNeighbourCount[i]); shift.setProportion((double) neighbourCount[i] / highestCount); shift.setType(Shift.OUTLIER); allshifts.add(shift); } } return purgeShifts(allshifts); } // make a new list and throw out those shifts pointing from and to the same units private ArrayList<Shift> purgeShifts(ArrayList<Shift> allshifts) { // sort the shifts according to unit position Collections.sort(allshifts); // make a new list and throw out those shifts pointing from and to the same units ArrayList<Shift> resultShifts = new ArrayList<Shift>(); int oldX1 = -1, oldY1 = -1, oldX2 = -1, oldY2 = -1; int oldType = -1; Iterator<Shift> iter = allshifts.iterator(); Shift curShift = null; Shift lastNewShift = null; while (iter.hasNext()) { curShift = iter.next(); if (!(curShift.getX1() == oldX1 && curShift.getY1() == oldY1 && curShift.getX2() == oldX2 && curShift.getY2() == oldY2)) { // This is a new Shift resultShifts.add(curShift); lastNewShift = curShift; } else { // This Shift is already in the result list. if (curShift.getType() != oldType) { System.out.println("Types don't match: !" + curShift.getType() + " <-> " + oldType); } else { // Sum up the props... if (lastNewShift != null) { // Should not be necessary, but who knows... // lastNewShift.setCount(lastNewShift.getCount() + curShift.getCount()); // lastNewShift.setLabel(lastNewShift.getLabel() + ", " + curShift.getLabel()); // // Should be good... // lastNewShift.setPercent(lastNewShift.getPercent() + curShift.getPercent()); } } } oldX1 = curShift.getX1(); oldY1 = curShift.getY1(); oldX2 = curShift.getX2(); oldY2 = curShift.getY2(); oldType = curShift.getType(); } // TODO: Some stats, do more! long allS = 0, stableS = 0, outS = 0, adjS = 0; double stableP = 0, outP = 0, adjP = 0; for (Shift shift : resultShifts) { allS += shift.getCount(); switch (shift.getType()) { case Shift.STABLE: stableS += shift.getCount(); stableP += shift.getPercent() * shift.getCount(); break; case Shift.ADJACENT: adjS += shift.getCount(); adjP += shift.getPercent() * shift.getCount(); break; case Shift.OUTLIER: outS += shift.getCount(); outP += shift.getPercent() * shift.getCount(); break; } } // outP /= String stat = String.format("(%d); Stable: %d (%5.2f); " + "Adjacent: %d (%5.2f); " + "Outlier: %d (%5.2f)", allS, stableS, 100 * (double) stableS / allS, adjS, 100 * (double) adjS / allS, outS, 100 * (double) outS / allS); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Stats: " + stat); System.out.println("\n\nStats:\n" + stat.replaceAll("; ", "\n") + "\n\n"); return resultShifts; } public ArrayList<Shift> calculateClusterShifts(MapPNode map1, MapPNode map2) throws ClusteringAbortedException { // build clustering trees for both maps // Angela: WardsLinkageTreeBuilderAll() ist jetzt das "richtige" wards (aber halt langsam) // 2x "if" dazugegeben, damit nicht jedesmal der baum neu berechnet wird, wenn ein anderes level von // clustern angezeigt wird... if (map1.getCurrentClusteringTree() == null) { map1.buildTree(new WardsLinkageTreeBuilderAll()); } ClusteringTree tree1 = map1.getClusteringTree(); int[][] assignment1 = tree1.getClusterAssignment(clusterNo, gsom1.getLayer().getXSize(), gsom1.getLayer().getYSize()); if (map2.getCurrentClusteringTree() == null) { map2.buildTree(new WardsLinkageTreeBuilderAll()); } ClusteringTree tree2 = map2.getClusteringTree(); int[][] assignment2 = tree2.getClusterAssignment(clusterNo, gsom2.getLayer().getXSize(), gsom2.getLayer().getYSize()); // find the leftmost highest units to represent each cluster for both SOMs LabelCoordinates[] topunits1 = new LabelCoordinates[clusterNo]; LabelCoordinates[] topunits2 = new LabelCoordinates[clusterNo]; boolean used[] = new boolean[clusterNo]; Arrays.fill(used, false); for (int y = 0; y < gsom1.getLayer().getYSize(); y++) { for (int x = 0; x < gsom1.getLayer().getXSize(); x++) { if (!used[assignment1[x][y]]) { topunits1[assignment1[x][y]] = getClusterMeanPoint(assignment1, y, x, gsom1); used[assignment1[x][y]] = true; } } } Arrays.fill(used, false); for (int y = 0; y < gsom2.getLayer().getYSize(); y++) { for (int x = 0; x < gsom2.getLayer().getXSize(); x++) { if (!used[assignment2[x][y]]) { topunits2[assignment2[x][y]] = getClusterMeanPoint(assignment2, y, x, gsom2); used[assignment2[x][y]] = true; } } } // find equivalences between the clusters double[] percentages = new double[clusterNo]; int[] equiv = clusterEquivalent(assignment1, assignment2, coords1, coords2, clusterNo, percentages); // list of shifts ArrayList<Shift> allShifts = new ArrayList<Shift>(); int size = coords1.length; // count the vectors that move from the same unit to the same unit int[] neighbourCount = new int[size]; Arrays.fill(neighbourCount, 1); for (int j = 0; j < size; j++) { // label 1 for (int i = j + 1; i < size; i++) { // label 2 if (dist1[j][i] == 0) { if (dist2[j][i] == 0) { neighbourCount[i]++; neighbourCount[j]++; } } } } // find the highes number of moved vectors int highestCount = this.highestCount(neighbourCount); // stable and outlier shifts (that stay in or move out of the cluster) for (int i = 0; i < size; i++) { if (equiv[assignment1[coords1[i].x][coords1[i].y]] == assignment2[coords2[i].x][coords2[i].y]) { Shift shift = new Shift(); shift.setCoords(coords1[i].x, coords1[i].y, coords2[i].x, coords2[i].y); shift.setLabel(labelList1[i]); shift.setCount(neighbourCount[i]); shift.setProportion((double) neighbourCount[i] / highestCount); shift.setType(Shift.STABLE); allShifts.add(shift); } else { Shift shift = new Shift(); shift.setCoords(coords1[i].x, coords1[i].y, coords2[i].x, coords2[i].y); shift.setLabel(labelList1[i]); shift.setCount(neighbourCount[i]); shift.setProportion((double) neighbourCount[i] / highestCount); shift.setType(Shift.OUTLIER); allShifts.add(shift); } } ArrayList<Shift> resultShifts = this.purgeShifts(allShifts); // cluster shifts for (int i = 0; i < equiv.length; i++) { Shift shift = new Shift(); shift.setCoords(topunits1[i].x, topunits1[i].y, topunits2[equiv[i]].x, topunits2[equiv[i]].y); shift.setLabel(topunits1[i].label + " -> " + topunits2[equiv[i]].label); // shift.setCount(0); if (percentages[i] < 0.01) { percentages[i] = 0.01; } shift.setProportion(percentages[i]); // shift.setBreadth(); shift.setType(Shift.CLUSTER); resultShifts.add(shift); } return resultShifts; } /** Try to find a mean-point for a cluster */ private LabelCoordinates getClusterMeanPoint(int[][] assignment, int y, int x, GrowingSOM gsom) { // System.out.println(ArrayUtils.toString(assignment)); int classNumber = assignment[x][y]; double count = 0; int xPos = 0; int yPos = 0; for (int i = 0; i < assignment.length; i++) { for (int j = 0; j < assignment[i].length; j++) { if (classNumber == assignment[i][j]) { count++; xPos += i; yPos += j; // System.out.println("adding point to " + classNumber + ": " + i + ", " + j); } } } // System.out.println("found points: " + count + ", acc: " + xPos + ", " + yPos); xPos = (int) Math.floor(xPos / count + 0.49); yPos = (int) Math.floor(yPos / count + 0.49); try { // if the mean is outside the cluster, or on an empty unit in a small cluster (<3 units) if (assignment[xPos][yPos] != classNumber || gsom.getLayer().getUnit(xPos, yPos).getNumberOfMappedInputs() == 0 && count < 3) { // find the unit with most data mapped on Unit max = null; for (int i = 0; i < assignment.length; i++) { for (int j = 0; j < assignment[i].length; j++) { if (classNumber == assignment[i][j]) { final Unit unit = gsom.getLayer().getUnit(i, j); if (max == null || unit.getNumberOfMappedInputs() > max.getNumberOfMappedInputs()) { max = unit; } } } } xPos = max.getXPos(); yPos = max.getYPos(); } } catch (LayerAccessException e) { // does not happen e.printStackTrace(); } // System.out.println("finally: " + xPos + ", " + yPos); return new LabelCoordinates(xPos, yPos, "cluster " + assignment[x][y]); } public void loadGSOMsFromPrefix(String prefix1, String prefix2) throws SOMToolboxException { loadGSOMs(loadGSOM(prefix1), prefix2); } public void loadGSOMs(GrowingSOM gsom, String prefix) throws SOMToolboxException { gsom1 = gsom; gsom2 = loadGSOM(prefix); labelList1 = gsom1.getLayer().getAllMappedDataNames(true); labelList2 = gsom2.getLayer().getAllMappedDataNames(true); if (!Arrays.equals(labelList1, labelList2)) { printInputDifferenceErrorMesage(labelList1, labelList2); throw new SOMToolboxException( "The input vector sets of the SOMs aren't equal - can't do comparison! See the logs for input vector differences."); } calculateMatrices(); } public void calculateMatrices() { try { maxCount = gsom1.getLayer().getAllMappedDataNames().length; double maxDistance1 = Math.sqrt(gsom1.getLayer().getXSize() * gsom1.getLayer().getXSize() + gsom1.getLayer().getYSize() * gsom1.getLayer().getYSize()); double maxDistance2 = Math.sqrt(gsom2.getLayer().getXSize() * gsom2.getLayer().getXSize() + gsom2.getLayer().getYSize() * gsom2.getLayer().getYSize()); if (maxDistance2 > maxDistance1) { maxDistance = maxDistance2; } else { maxDistance = maxDistance1; } stableCountThreshold = 5; if (stableCountThreshold > maxCount) { stableCountThreshold = maxCount; } outlierCountThreshold = 1; maxDistance = Math.ceil(maxDistance); // get the coordinates of the unit each input vector (and its label have been mapped to coords1 = getLabelCoordinates(gsom1); coords2 = getLabelCoordinates(gsom2); // calculate the distance matrices for each SOM dist1 = calculcateIntraSOMDistanceMatrix(coords1); dist2 = calculcateIntraSOMDistanceMatrix(coords2); } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); } } /** * Compare two SOMs according to where input vectors come to lie on the map */ public int getMaxCount() { return maxCount; } public double getMaxDistance() { return maxDistance; } public double gettargetThreshold() { return targetThreshold; } public void settargetThreshold(double targetThreshold) { this.targetThreshold = targetThreshold; } public int getOutlierCountThreshold() { return outlierCountThreshold; } public int getStableCountThreshold() { return stableCountThreshold; } public double getTargetThreshold() { return targetThreshold; } public void setOutlierCountThreshold(int outlierCountThreshold) { this.outlierCountThreshold = outlierCountThreshold; } public void setStableCountThreshold(int stableCountThreshold) { this.stableCountThreshold = stableCountThreshold; } public void setTargetThreshold(double targetThreshold) { this.targetThreshold = targetThreshold; } public int getClusterNo() { return clusterNo; } public void setClusterNo(int clusterNo) { this.clusterNo = clusterNo; } public double getSourceThreshold() { return sourceThreshold; } public void setSourceThreshold(double sourceThreshold) { this.sourceThreshold = sourceThreshold; } public double getOutlierPercentThreshold() { return outlierPercentThreshold; } public void setOutlierPercentThreshold(double outlierPercentThreshold) { this.outlierPercentThreshold = outlierPercentThreshold; } public double getStablePercentThreshold() { return stablePercentThreshold; } public void setStablePercentThreshold(double stablePercentThreshold) { this.stablePercentThreshold = stablePercentThreshold; } public boolean isAbsolute() { return absolute; } public void setAbsolute(boolean absolute) { this.absolute = absolute; } public int getMinAbsoluteCount() { return minAbsoluteCount; } public void setMinAbsoluteCount(int minAbsoluteCount) { this.minAbsoluteCount = minAbsoluteCount; } public static void printInputDifferenceErrorMesage(String[] labelList, String[] labelList2) { ArrayList<String>[] uniqueElements = CollectionUtils.getUniqueElements(labelList, labelList2); System.out.println("\n=============================================================="); System.out.println("Inputs only in first SOM (" + uniqueElements[0].size() + ")"); for (String s2 : uniqueElements[0]) { System.out.println("\t" + s2); } System.out.println("\n=============================================================="); System.out.println("Inputs only in second SOM (" + uniqueElements[1].size() + ")"); for (String s2 : uniqueElements[1]) { System.out.println("\t" + s2); } } public void setMultiMatch(boolean multiMatch) { this.multiMatch = multiMatch; } public boolean isMultiMatch() { return multiMatch; } }