/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* WeightedFFNeighborhoodInit.java
*
* Initializer that uses weighted farthest first traversal to get
* initial clusters for K-Means
*
* Copyright (C) 2004 Sugato Basu, Misha Bilenko
* */
package weka.clusterers.initializers;
import java.io.*;
import java.util.*;
import weka.core.*;
import weka.core.metrics.*;
import weka.clusterers.*;
public class WeightedFFNeighborhoodInit extends MPCKMeansInitializer {
/** holds the ([instance pair] -> [type of constraint]) mapping,
where the hashed value stores the type of link but the instance
pair does not hold the type of constraint - it holds (instanceIdx1,
instanceIdx2, DONT_CARE_LINK). This is done to make lookup easier
in future
*/
protected HashMap m_ConstraintsHash;
/** stores the ([instanceIdx] -> [ArrayList of constraints])
mapping, where the arraylist contains the constraints in which
instanceIdx is involved. Note that the instance pairs stored in
the Arraylist have the actual link type.
*/
protected HashMap m_instanceConstraintHash;
/** holds the points involved in the constraints */
protected HashSet m_SeedHash;
/** distance Metric */
protected LearnableMetric m_metric;
/** Is the objective function increasing or decreasing? Depends on type
* of metric used: for similarity-based metric, increasing, for distance-based - decreasing */
protected boolean m_objFunDecreasing;
/** Seedable or not (true by default) */
protected boolean m_Seedable = true;
/** Number of clusters in the process*/
protected int m_NumCurrentClusters = 0;
/**
* holds the random number generator used in various parts of the code
*/
protected Random m_RandomNumberGenerator;
/** temporary variable holding cluster assignments while iterating */
protected int [] m_ClusterAssignments;
/** array holding sum of cluster instances */
Instance [] m_SumOfClusterInstances;
/** Instances without labels */
protected Instances m_Instances;
/** Instances with labels */
protected Instances m_TotalTrainWithLabels;
/** adjacency list for random */
protected HashSet[] m_AdjacencyList;
/** neighbor list for active learning: points in each cluster neighborhood */
protected HashSet[] m_NeighborSets;
/**
* holds the global centroids
*/
protected Instance m_GlobalCentroid;
/**
* holds the default perturbation value for randomPerturbInit
*/
protected double m_DefaultPerturb = 0.7;
protected boolean m_verbose = false;
/** colors for DFS */
final int WHITE = 300;
final int GRAY = 301;
final int BLACK = 302;
/** number of neighborhood sets */
protected int m_numNeighborhoods;
/** Default constructors */
public WeightedFFNeighborhoodInit() {
super();
}
/** Initialize with a clusterer */
public WeightedFFNeighborhoodInit (MPCKMeans clusterer) {
super(clusterer);
}
/** The main method for initializing cluster centroids
*/
public Instances initialize() throws Exception {
System.out.println("Num clusters = " + m_numClusters);
m_Instances = m_clusterer.getInstances();
m_TotalTrainWithLabels = m_clusterer.getTotalTrainWithLabels();
m_ConstraintsHash = m_clusterer.getConstraintsHash();
m_instanceConstraintHash = m_clusterer.getInstanceConstraintsHash();
m_SeedHash = m_clusterer.getSeedHash();
m_Seedable = m_clusterer.getSeedable();
m_metric = m_clusterer.getMetric();
m_RandomNumberGenerator = m_clusterer.getRandomNumberGenerator();
m_objFunDecreasing = m_clusterer.getMetric().isDistanceBased();
m_NeighborSets = new HashSet[m_Instances.numInstances()];
m_AdjacencyList = new HashSet[m_Instances.numInstances()];
m_ClusterAssignments = new int [m_Instances.numInstances()];
boolean m_isOfflineMetric = m_clusterer.getIsOfflineMetric();
Instances m_ClusterCentroids = m_clusterer.getClusterCentroids();
boolean m_useTransitiveConstraints = m_clusterer.getUseTransitiveConstraints();
boolean m_isSparseInstance = (m_Instances.instance(0) instanceof SparseInstance) ?
true: false;
if (m_isSparseInstance) {
m_SumOfClusterInstances = new SparseInstance[m_Instances.numInstances()];
} else {
m_SumOfClusterInstances = new Instance[m_Instances.numInstances()];
}
for (int i=0; i<m_Instances.numInstances(); i++) {
m_ClusterAssignments[i] = -1;
}
if (m_ConstraintsHash != null) {
Set pointPairs = (Set) m_ConstraintsHash.keySet();
Iterator pairItr = pointPairs.iterator();
System.out.println("In non-active init");
// iterate over the pairs in ConstraintHash
while( pairItr.hasNext() ){
InstancePair pair = (InstancePair) pairItr.next();
int linkType = ((Integer) m_ConstraintsHash.get(pair)).intValue();
if (m_verbose)
System.out.println(pair + ": type = " + linkType);
if( linkType == InstancePair.MUST_LINK ){ // mainly concerned with MUST-LINK
if (m_AdjacencyList[pair.first] == null) {
m_AdjacencyList[pair.first] = new HashSet();
}
if (!m_AdjacencyList[pair.first].contains(new Integer(pair.second))) {
m_AdjacencyList[pair.first].add(new Integer(pair.second));
}
if (m_AdjacencyList[pair.second] == null) {
m_AdjacencyList[pair.second] = new HashSet();
}
if (!m_AdjacencyList[pair.second].contains(new Integer(pair.first))) {
m_AdjacencyList[pair.second].add(new Integer(pair.first));
}
}
}
// DFS for finding connected components, updates requires stats
DFS();
}
// print out cluster assignments right here!!
if (m_ConstraintsHash.size() > 0) {
if (m_metric instanceof BarHillelMetric) {
System.out.println("Starting building BarHillel metric ...\n\n");
((BarHillelMetric) m_metric).buildAttributeMatrix(m_Instances, m_ClusterAssignments);
System.out.println("Finished building BarHillel metric!!\n\n");
} else if (m_metric instanceof XingMetric) {
((XingMetric) m_metric).buildAttributeMatrix(m_Instances, m_ConstraintsHash);
} else if (m_metric instanceof BarHillelMetricMatlab) {
System.out.println("Starting building BarHillelMatlab metric ...\n\n");
((BarHillelMetricMatlab) m_metric).buildAttributeMatrix(m_Instances, m_ClusterAssignments);
System.out.println("Finished building BarHillelMatlab metric!!\n\n");
}
}
if (!m_Seedable) { // don't perform any seeding, initialize from random
m_NumCurrentClusters = 0;
System.out.println("Not performing any seeding!");
for (int i=0; i<m_Instances.numInstances(); i++) {
m_ClusterAssignments[i] = -1;
}
}
// if the required number of clusters has been obtained, wrap-up
if( m_NumCurrentClusters >= m_numClusters ){
{//if (m_verbose) {
System.out.println("Got the required number of clusters ...");
System.out.println("num clusters: " + m_numClusters + ", num current clusters: " + m_NumCurrentClusters);
}
int clusterSizes[] = new int[m_NumCurrentClusters];
for (int i=0; i<m_NumCurrentClusters; i++) {
if (m_verbose) {
System.out.println("Neighbor set: " + i + " has size: " + m_NeighborSets[i].size());
}
clusterSizes[i] = -m_NeighborSets[i].size(); // for reverse sort
}
int[] indices = Utils.sort(clusterSizes);
System.out.println("Total neighborhoods: " + m_NumCurrentClusters + "; Sorted neighborhood sizes: ");
// store number of neighborhoods after DFS
m_numNeighborhoods = m_NumCurrentClusters;
for (int i=0; i < m_NumCurrentClusters; i++) {
System.out.print(m_NeighborSets[indices[i]].size());
if (m_TotalTrainWithLabels.classIndex() >= 0) {
System.out.println("(" + m_TotalTrainWithLabels.instance(((Integer) (m_NeighborSets[indices[i]].iterator().next())).intValue()).classValue()+ ")\t");
} else {
System.out.println();
}
}
Instance[] clusterCentroids = new Instance[m_NumCurrentClusters];
// Added: Code for better random selection of neighborhoods, using weighted farthest first
for (int i=0; i<m_NumCurrentClusters; i++) {
if (m_isSparseInstance) {
clusterCentroids[i] = new SparseInstance(m_SumOfClusterInstances[i]);
}
else {
clusterCentroids[i] = new Instance(m_SumOfClusterInstances[i]);
}
clusterCentroids[i].setWeight(m_NeighborSets[i].size()); // setting weight = neighborhood size
clusterCentroids[i].setDataset(m_Instances);
if (!m_objFunDecreasing) {
ClusterUtils.normalize(clusterCentroids[i]);
} else {
ClusterUtils.normalizeByWeight(clusterCentroids[i]);
}
}
HashSet selectedNeighborhoods = new HashSet((int) (m_numClusters/0.75 + 10));
System.out.println("Initializing " + m_numClusters + " clusters");
if (m_isOfflineMetric) {
System.out.println("Offline metric - using random neighborhoods");
for (int i=0; i<m_numClusters; i++) {
int next = m_RandomNumberGenerator.nextInt(m_numNeighborhoods);
while (selectedNeighborhoods.contains(new Integer (next))) {
next = m_RandomNumberGenerator.nextInt(m_numNeighborhoods);
}
System.out.print("Neighborhood selected: " + next);
if (m_TotalTrainWithLabels.classIndex() >= 0) {
System.out.println("(" + m_TotalTrainWithLabels.instance(((Integer)(m_NeighborSets[next].iterator().next())).intValue()).classValue()+ ")\t");
} else {
System.out.println();
}
selectedNeighborhoods.add(new Integer(next));
}
} else {
System.out.println("Learnable metric - using weighted FF to select neighborhoods");
selectedNeighborhoods.add(new Integer(indices[0])); // initializing with largest neighborhood
System.out.print("First neighborhood selected: " + m_NeighborSets[indices[0]].size());
if (m_TotalTrainWithLabels.classIndex() >= 0) {
System.out.println("(" + m_TotalTrainWithLabels.instance(((Integer)(m_NeighborSets[indices[0]].iterator().next())).intValue()).classValue()+ ")\t");
} else {
System.out.println();
}
HashSet selectedNeighborhood = new HashSet();
System.out.println("Initializing rest by weightedFarthestFromSetOfPoints");
for (int i=1; i<m_numClusters; i++) {
int next = (int) weightedFarthestFromSetOfPoints(clusterCentroids, selectedNeighborhoods, null);
selectedNeighborhoods.add(new Integer(next));
System.out.print("Neighborhood selected: " + m_NeighborSets[next].size());
if (m_TotalTrainWithLabels.classIndex() >= 0) {
System.out.println("(" + m_TotalTrainWithLabels.instance(((Integer)(m_NeighborSets[next].iterator().next())).intValue()).classValue()+ ")\t");
} else {
System.out.println();
}
}
}
// compute centroids of m_numClusters clusters from selectedNeighborhoods
m_ClusterCentroids = new Instances(m_Instances, m_numClusters);
Iterator neighborhoodIter = selectedNeighborhoods.iterator();
int num=0; // cluster number
while (neighborhoodIter.hasNext()) {
int i = ((Integer) neighborhoodIter.next()).intValue();
if (m_SumOfClusterInstances[i] != null) {
if (m_verbose) {
System.out.println("Normalizing instance " + i);
}
if (!m_objFunDecreasing) {
ClusterUtils.normalize(m_SumOfClusterInstances[i]);
}
else {
ClusterUtils.normalizeByWeight(m_SumOfClusterInstances[i]);
}
}
Iterator iter = m_NeighborSets[i].iterator();
while (iter.hasNext()) { // assign points of new cluster
int instNumber = ((Integer) iter.next()).intValue();
if (m_verbose) {
System.out.println("Assigning " + instNumber + " to cluster: " + num);
}
m_ClusterAssignments[instNumber] = num;
}
m_SumOfClusterInstances[num].setDataset(m_Instances);
m_ClusterCentroids.add(m_SumOfClusterInstances[i]);
num++;
}
for (int j=0; j < m_NumCurrentClusters; j++) {
int i = indices[j];
if (!selectedNeighborhoods.contains(new Integer(i))) { // not assigned as centroid
Iterator iter = m_NeighborSets[i].iterator();
while (iter.hasNext()) {
int instNumber = ((Integer) iter.next()).intValue();
if (m_verbose) {
System.out.println("Assigning " + instNumber + " to cluster -1");
}
m_ClusterAssignments[instNumber] = -1;
}
}
}
m_NumCurrentClusters = m_numClusters;
// adding other inferred ML and CL links
if (m_useTransitiveConstraints) {
addMLAndCLTransitiveClosure(indices);
System.out.println("Adding constraints by transitive closure");
} else {
System.out.println("Not adding constraints by transitive closure");
}
} else if( m_NumCurrentClusters < m_numClusters ){
// make random for rest
// adding other inferred ML and CL links
if (m_useTransitiveConstraints) {
addMLAndCLTransitiveClosure(null);
}
System.out.println("Found " + m_NumCurrentClusters + " neighborhoods ...");
System.out.println("Will have to start " + (m_numClusters - m_NumCurrentClusters) + " clusters at random");
// compute centroids of m_NumCurrentClusters clusters
m_ClusterCentroids = new Instances(m_Instances, m_numClusters);
for (int i=0; i<m_NumCurrentClusters; i++) {
if (m_SumOfClusterInstances[i] != null) {
if (m_verbose) {
System.out.println("Normalizing cluster center " + i);
}
if (!m_objFunDecreasing) {
ClusterUtils.normalize(m_SumOfClusterInstances[i]);
} else {
ClusterUtils.normalizeByWeight(m_SumOfClusterInstances[i]);
}
}
m_SumOfClusterInstances[i].setDataset(m_Instances);
m_ClusterCentroids.add(m_SumOfClusterInstances[i]);
}
// find global centroid
double [] globalValues = new double[m_Instances.numAttributes()];
if (m_isSparseInstance) {
globalValues = ClusterUtils.meanOrMode(m_Instances); // uses fast meanOrMode
} else {
for (int j = 0; j < m_Instances.numAttributes(); j++) {
globalValues[j] = m_Instances.meanOrMode(j); // uses usual meanOrMode
}
}
System.out.println("Done calculating global centroid");
// global centroid is dense in SPKMeans
m_GlobalCentroid = new Instance(1.0, globalValues);
m_GlobalCentroid.setDataset(m_Instances);
if (!m_objFunDecreasing) {
ClusterUtils.normalizeInstance(m_GlobalCentroid);
}
// System.out.println("Global centroid is: " + m_GlobalCentroid);
for (int i=m_NumCurrentClusters; i<m_numClusters; i++) {
double [] values = new double[m_Instances.numAttributes()];
double normalizer = 0;
for (int j = 0; j < m_Instances.numAttributes(); j++) {
values[j] = m_GlobalCentroid.value(j) * (1 + m_DefaultPerturb * (m_RandomNumberGenerator.nextFloat() - 0.5));
normalizer += values[j] * values[j];
}
if (!m_objFunDecreasing) {
normalizer = Math.sqrt(normalizer);
for (int j = 0; j < m_Instances.numAttributes(); j++) {
values[j] /= normalizer;
}
}
if (m_isSparseInstance) {
m_ClusterCentroids.add(new SparseInstance(1.0, values)); // sparse for consistency with other cluster centroids
} else {
m_ClusterCentroids.add(new Instance(1.0, values));
}
}
System.out.println("Done calculating random centroids by perturbation");
m_NumCurrentClusters = m_numClusters;
}
m_clusterer.setClusterAssignments(m_ClusterAssignments);
return m_ClusterCentroids;
}
/** Main Depth First Search routine */
protected void DFS() throws Exception {
int [] vertexColor = new int[m_Instances.numInstances()];
m_NumCurrentClusters = 0;
for(int u=0; u<m_Instances.numInstances(); u++){
vertexColor[u] = WHITE;
}
for(int u=0; u<m_Instances.numInstances(); u++){
// NOTE: Have to uncomment check for m_AdjacencyList != null to enable farthestFirst
// as default initialization, instead of randomPerturbInit
if (m_AdjacencyList[u] != null && vertexColor[u] == WHITE) {
m_NeighborSets[m_NumCurrentClusters] = new HashSet();
// m_NeighborSets[m_NumCurrentClusters].add(new Integer(u));
// m_SumOfClusterInstances[m_NumCurrentClusters] = sumWithInstance(m_SumOfClusterInstances[m_NumCurrentClusters], m_Instances.instance(u));
// m_ClusterAssignments[u] = m_NumCurrentClusters;
DFS_VISIT(u, vertexColor); // found whole neighbourhood of u
m_NumCurrentClusters++;
}
}
}
/** Recursive subroutine for DFS */
protected void DFS_VISIT(int u, int[] vertexColor) throws Exception {
vertexColor[u] = GRAY;
Iterator iter = null;
if (m_AdjacencyList[u] != null) {
iter = m_AdjacencyList[u].iterator();
while (iter.hasNext()) {
int j = ((Integer) iter.next()).intValue();
if(vertexColor[j] == WHITE){ // if the vertex is still undiscovered
DFS_VISIT(j, vertexColor);
}
}
}
// update stats for u
m_ClusterAssignments[u] = m_NumCurrentClusters;
m_NeighborSets[m_NumCurrentClusters].add(new Integer(u));
m_SumOfClusterInstances[m_NumCurrentClusters] = ClusterUtils.sumWithInstance(m_SumOfClusterInstances[m_NumCurrentClusters], m_Instances.instance(u),m_Instances);
vertexColor[u] = BLACK;
}
/** Finds point in setOfPoints which has weighted max min-distance from set visitedPoints
*/
int weightedFarthestFromSetOfPoints(Instance[] setOfPoints, HashSet visitedPoints, HashSet eliminationSet)
throws Exception {
// implements weighted farthest-first search algorithm in the given setOfPoints:
/*
for (each datapoint x from setOfPoints not in visitedPoints) {
distance of x to visitedPoints = min{weighted d(x,f):f \in visitedPoints}
}
select the point x with maximum distance as new center;
*/
if (visitedPoints.size() == 0) {
int point;
point = m_RandomNumberGenerator.nextInt(setOfPoints.length);
// Note - no need to check for labeled data now, since we have
// no visitedPoints => no labeled data
if (m_verbose)
System.out.println("First point selected: " + point);
return point;
}
double minSimilaritySoFar = Double.POSITIVE_INFINITY;
double maxDistanceSoFar = Double.NEGATIVE_INFINITY;
ArrayList bestPoints = new ArrayList();
for (int i=0; i<setOfPoints.length; i++) {
if (!visitedPoints.contains(new Integer(i))) {
if (eliminationSet == null || !eliminationSet.contains(new Integer(i))) {
// point should not belong to visitedPoints or eliminationSet
Instance inst = setOfPoints[i];
Iterator iter = visitedPoints.iterator();
double minDistanceFromSet = Double.POSITIVE_INFINITY;
double maxSimilarityFromSet = Double.NEGATIVE_INFINITY;
while (iter.hasNext()) {
Instance pointInSet = setOfPoints[((Integer) iter.next()).intValue()];
if (!m_objFunDecreasing) {
double sim = m_metric.similarity(inst, pointInSet) / Math.sqrt(pointInSet.weight() * inst.weight());
if (sim > maxSimilarityFromSet) {
maxSimilarityFromSet = sim;
}
} else {
double dist = 0;
if (m_metric instanceof KL) {
dist = ((KL)m_metric).distanceJS(inst, pointInSet) * Math.sqrt(pointInSet.weight() * inst.weight());
} else {
dist = m_metric.distance(inst, pointInSet) * Math.sqrt(pointInSet.weight() * inst.weight());
}
if (dist < minDistanceFromSet) {
minDistanceFromSet = dist;
}
}
}
if (!m_objFunDecreasing) {
if (maxSimilarityFromSet == minSimilaritySoFar) {
minSimilaritySoFar = maxSimilarityFromSet;
bestPoints.add(new Integer(i));
} else if (maxSimilarityFromSet < minSimilaritySoFar) {
minSimilaritySoFar = maxSimilarityFromSet;
bestPoints.clear();
bestPoints.add(new Integer(i));
}
} else {
if (minDistanceFromSet == maxDistanceSoFar) {
minDistanceFromSet = maxDistanceSoFar;
bestPoints.add(new Integer(i));
if (m_verbose) {
System.out.println("Additional point added: " + i + " with distance: " + maxDistanceSoFar);
}
} else if (minDistanceFromSet > maxDistanceSoFar) {
maxDistanceSoFar = minDistanceFromSet;
bestPoints.clear();
bestPoints.add(new Integer(i));
if (m_verbose) {
System.out.println("Farthest point from set is: " + i + " with distance: " + maxDistanceSoFar);
}
}
}
}
}
}
int bestPoint = -1;
if (bestPoints.size() > 1) { // multiple points, get random from whole set
bestPoint = m_RandomNumberGenerator.nextInt(setOfPoints.length);
while ((visitedPoints != null && visitedPoints.contains(new Integer(bestPoint)))
|| (eliminationSet != null && eliminationSet.contains(new Integer(bestPoint)))) {
bestPoint = m_RandomNumberGenerator.nextInt(setOfPoints.length);
}
}
else { // only 1 point, fine
bestPoint = ((Integer)bestPoints.get(0)).intValue();
}
if (m_verbose) {
if (!m_objFunDecreasing) {
System.out.println("Selected " + bestPoint + " with similarity: " + minSimilaritySoFar);
}
else {
System.out.println("Selected " + bestPoint + " with distance: " + maxDistanceSoFar);
}
}
return bestPoint;
}
/** adding other inferred ML and CL links to m_ConstraintsHash, from
* m_NeighborSets
*/
protected void addMLAndCLTransitiveClosure(int[] indices) throws Exception {
// add all ML links within clusters
if (m_verbose) {
for (int j=0; j<m_NumCurrentClusters; j++) {
int i = j;
if (indices != null) {
i = indices[j];
}
System.out.println("Neighborhood list " + j + " is:");
System.out.println(m_NeighborSets[i]);
}
}
for (int j=0; j<m_NumCurrentClusters; j++) {
int i = j;
if (indices != null) {
i = indices[j];
}
if (m_NeighborSets[i] != null) {
Iterator iter1 = m_NeighborSets[i].iterator();
while (iter1.hasNext()) {
int first = ((Integer) iter1.next()).intValue();
Iterator iter2 = m_NeighborSets[i].iterator();
while (iter2.hasNext()) {
int second = ((Integer) iter2.next()).intValue();
if (first < second) {
InstancePair pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK);
if (!m_ConstraintsHash.containsKey(pair)) {
m_ConstraintsHash.put(pair, new Integer(InstancePair.MUST_LINK));
if (m_verbose) {
System.out.println("Adding inferred ML (" + pair.first +","+pair.second+")");
}
// hash the constraints for the instances involved
Integer firstInt = new Integer(first);
Integer secondInt = new Integer(second);
InstancePair pairML = new InstancePair(first, second, InstancePair.MUST_LINK);
Object constraintList1 = m_instanceConstraintHash.get(firstInt);
if (constraintList1 == null) {
ArrayList constraintList = new ArrayList();
constraintList.add(pairML);
m_instanceConstraintHash.put(firstInt, constraintList);
} else {
((ArrayList)constraintList1).add(pairML);
}
Object constraintList2 = m_instanceConstraintHash.get(secondInt);
if (constraintList2 == null) {
ArrayList constraintList = new ArrayList();
constraintList.add(pairML);
m_instanceConstraintHash.put(secondInt, constraintList);
} else {
((ArrayList)constraintList2).add(pairML);
}
if (m_verbose) {
System.out.println("Adding inferred ML link: " + pair);
}
if (!m_SeedHash.contains(new Integer(first))) {
m_SeedHash.add(new Integer(first));
}
if (!m_SeedHash.contains(new Integer(second))) {
m_SeedHash.add(new Integer(second));
}
}
}
}
}
}
}
// add all CL links between clusters
for (int ii=0; ii<m_NumCurrentClusters; ii++) {
int i = ii;
if (indices != null) {
i = indices[ii];
}
if (m_NeighborSets[i] != null) {
for (int jj=ii+1; jj<m_NumCurrentClusters; jj++) {
int j = jj;
if (indices != null) {
j = indices[jj];
}
// check if there is at least one CL between neighborhoods ii & jj
boolean existsCL = false;
Iterator iter1 = m_NeighborSets[i].iterator();
while (iter1.hasNext()) {
int index1 = ((Integer) iter1.next()).intValue();
if (m_NeighborSets[j] != null) {
Iterator iter2 = m_NeighborSets[j].iterator();
while (iter2.hasNext()) {
int index2 = ((Integer) iter2.next()).intValue();
int first = (index1 < index2)? index1:index2;
int second = (index1 >= index2)? index1:index2;
if (first == second) {
throw new Exception(" Same instance " + first + " cannot be in cluster: " + i + " and cluster " + j);
}
else if (first < second) {
InstancePair pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK);
if (m_ConstraintsHash.containsKey(pair)) {
// found one CL between the neighborhoods
existsCL = true;
break; // out of inner while
}
}
}
}
if (existsCL) {
break; // out of outer while
}
}
// now add the inferred CLs
if (existsCL) {
iter1 = m_NeighborSets[i].iterator();
while (iter1.hasNext()) {
int index1 = ((Integer) iter1.next()).intValue();
if (m_NeighborSets[j] != null) {
Iterator iter2 = m_NeighborSets[j].iterator();
while (iter2.hasNext()) {
int index2 = ((Integer) iter2.next()).intValue();
int first = (index1 < index2)? index1:index2;
int second = (index1 >= index2)? index1:index2;
if (first == second) {
throw new Exception(" Same instance " + first + " cannot be in cluster: " + i + " and cluster " + j);
}
else if (first < second) { // add new constraint
InstancePair pair = new InstancePair(first, second, InstancePair.DONT_CARE_LINK);
if (!m_ConstraintsHash.containsKey(pair)) {
m_ConstraintsHash.put(pair, new Integer(InstancePair.CANNOT_LINK));
if (m_verbose) {
System.out.println("Adding inferred CL (" + pair.first +","+pair.second+")");
}
// hash the constraints for the instances involved
Integer firstInt = new Integer(first);
Integer secondInt = new Integer(second);
InstancePair pairCL = new InstancePair(first, second, InstancePair.CANNOT_LINK);
Object constraintList1 = m_instanceConstraintHash.get(firstInt);
if (constraintList1 == null) {
ArrayList constraintList = new ArrayList();
constraintList.add(pairCL);
m_instanceConstraintHash.put(firstInt, constraintList);
} else {
((ArrayList)constraintList1).add(pairCL);
}
Object constraintList2 = m_instanceConstraintHash.get(secondInt);
if (constraintList2 == null) {
ArrayList constraintList = new ArrayList();
constraintList.add(pairCL);
m_instanceConstraintHash.put(secondInt, constraintList);
} else {
((ArrayList)constraintList2).add(pairCL);
}
if (m_verbose) {
System.out.println("Adding inferred CL link: " + pair);
}
if (!m_SeedHash.contains(new Integer(first))) {
m_SeedHash.add(new Integer(first));
}
if (!m_SeedHash.contains(new Integer(second))) {
m_SeedHash.add(new Integer(second));
}
}
}
}
}
}
}
}
}
}
m_clusterer.setInstanceConstraintsHash(m_instanceConstraintHash);
}
public void setOptions (String[] options)
throws Exception {
// TODO
}
public Enumeration listOptions () {
// TODO
return null;
}
public String [] getOptions () {
String[] options = new String[10];
int current = 0;
options[current++] = "-N";
options[current++] = "" + m_numClusters;
while (current < options.length) {
options[current++] = "";
}
return options;
}
}