/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.clustering.constrained.constraints;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import java.util.TreeSet;
import com.rapidminer.operator.learner.clustering.Cluster;
/**
* This is a specialized ClusterConstraintList to keep LinkClusterConstraints that includes an additional mapping from object ids to lists of LinkClusterConstraints to allow efficient tests, if an
* object is part of a link constraint.
*
* @author Alexander Daxenberger
* @version $Id: LinkClusterConstraintList.java,v 1.8 2008/09/12 10:31:46 tobiasmalbrecht Exp $
*/
public class LinkClusterConstraintList extends ClusterConstraintList {
private static final long serialVersionUID = 8137809668796376937L;
private static class SetComparator implements Comparator<Set>, Serializable {
private static final long serialVersionUID = 6940643923899884839L;
public int compare(Set s1, Set s2) {
if (s1.size() < s2.size())
return 1;
else if (s1.size() > s2.size())
return -1;
else if (s1 != s2)
return 1;
else
return 0;
}
}
private HashMap<String, ArrayList<LinkClusterConstraint>> idConstraintsMap;
public static final float initialSizeOfIdMapFactor = 1.5f;
public static final int minSizeOfConstraintList = 4;
public LinkClusterConstraintList(String name) {
this(name, 200);
}
public LinkClusterConstraintList(String name, int initialCapacity) {
super(name, initialCapacity);
this.idConstraintsMap = new HashMap<String, ArrayList<LinkClusterConstraint>>((int) ((initialCapacity * initialSizeOfIdMapFactor) / 0.75f), 0.75f);
}
/**
* Returns a list of all LinkClusterConstraints that involve the object with id 'id'
*
* @param id
*/
public ArrayList<LinkClusterConstraint> getLinkConstraintsFor(String id) {
return idConstraintsMap.get(id);
}
/**
* Returns a list of all LinkClusterConstraints that involve objects of the cluster 'c'.
*/
public ArrayList getLinkConstraintsFor(Cluster c) {
HashSet<LinkClusterConstraint> set = new HashSet<LinkClusterConstraint>(this.constraintList.size(), 0.75f);
LinkClusterConstraint lcc;
if (c.getNumberOfObjects() > this.constraintList.size()) {
for (int i = 0; i < this.constraintList.size(); i++) {
lcc = (LinkClusterConstraint) this.constraintList.get(i);
if ((c.contains(lcc.getId0())) || (c.contains(lcc.getId1())))
set.add(lcc);
}
} else {
Iterator<String> clusterIdIterator = c.getObjects();
while (clusterIdIterator.hasNext()) {
String id = clusterIdIterator.next();
ArrayList<LinkClusterConstraint> list = idConstraintsMap.get(id);
if (list != null) {
for (int i = 0; i < list.size(); i++) {
lcc = list.get(i);
set.add(lcc);
}
}
}
}
ArrayList<LinkClusterConstraint> list = new ArrayList<LinkClusterConstraint>(set.size());
Iterator<LinkClusterConstraint> setIterator = set.iterator();
while (setIterator.hasNext()) {
list.add(setIterator.next());
}
return list;
}
/**
* Returns a list of sets of object ids that form the connected components in the graph spanned by the Must-Link-constraints (neighbourhoodsets).
*/
public ArrayList<Set<String>> getNeighbourhoodSets() {
LinkedList<LinkClusterConstraint> queue = new LinkedList<LinkClusterConstraint>();
LinkedList<LinkClusterConstraint> conList = new LinkedList<LinkClusterConstraint>();
HashSet<LinkClusterConstraint> seenConstraint = new HashSet<LinkClusterConstraint>(this.constraintList.size() + 1, 1.0f);
HashSet<String> neighbourhood;
LinkClusterConstraint lcc1;
LinkClusterConstraint lcc2;
TreeSet<Set<String>> sets = new TreeSet<Set<String>>(new SetComparator());
for (int i = 0; i < this.constraintList.size(); i++) {
lcc1 = (LinkClusterConstraint) this.constraintList.get(i);
if ((lcc1.getType() == LinkClusterConstraint.MUST_LINK) && (!seenConstraint.contains(lcc1))) {
conList.clear();
queue.clear();
queue.add(lcc1);
seenConstraint.add(lcc1);
while (queue.size() > 0) {
lcc1 = queue.removeFirst();
conList.add(lcc1);
ArrayList<LinkClusterConstraint> list = idConstraintsMap.get(lcc1.getId0());
if (list != null) {
for (int j = 0; j < list.size(); j++) {
lcc2 = list.get(j);
if ((lcc2.getType() == LinkClusterConstraint.MUST_LINK) && (!seenConstraint.contains(lcc2))) {
queue.add(lcc2);
seenConstraint.add(lcc2);
}
}
}
list = idConstraintsMap.get(lcc1.getId1());
if (list != null) {
for (int j = 0; j < list.size(); j++) {
lcc2 = list.get(j);
if ((lcc2.getType() == LinkClusterConstraint.MUST_LINK) && (!seenConstraint.contains(lcc2))) {
queue.add(lcc2);
seenConstraint.add(lcc2);
}
}
}
}
neighbourhood = new HashSet<String>(2 * conList.size(), 0.75f);
Iterator<LinkClusterConstraint> constraintIterator = conList.iterator();
while (constraintIterator.hasNext()) {
lcc1 = constraintIterator.next();
neighbourhood.add(lcc1.getId0());
neighbourhood.add(lcc1.getId1());
}
sets.add(neighbourhood);
}
}
ArrayList<Set<String>> list = new ArrayList<Set<String>>(sets.size());
Iterator<Set<String>> setsIterator = sets.iterator();
while (setsIterator.hasNext()) {
list.add(setsIterator.next());
}
return list;
}
public boolean addConstraint(ClusterConstraint cc) {
LinkClusterConstraint lcc;
if (cc instanceof LinkClusterConstraint) {
lcc = (LinkClusterConstraint) cc;
if (super.addConstraint(lcc)) {
this.addConstraintForId(lcc.getId0(), lcc);
this.addConstraintForId(lcc.getId1(), lcc);
return true;
}
}
return false;
}
public ClusterConstraint removeConstraint(ClusterConstraint cc) {
LinkClusterConstraint lcc;
if (cc instanceof LinkClusterConstraint) {
lcc = (LinkClusterConstraint) super.removeConstraint(cc);
if (lcc != null) {
this.removeConstraintForId(lcc.getId0(), lcc);
this.removeConstraintForId(lcc.getId1(), lcc);
return lcc;
}
}
return null;
}
/**
* Adds a LinkClusterConstraint for an object id to the list pointed to by the idConstraintsMap.
*
* @param id
* @param lcc
*/
private void addConstraintForId(String id, LinkClusterConstraint lcc) {
ArrayList<LinkClusterConstraint> list = this.idConstraintsMap.get(id);
if (list == null) {
list = new ArrayList<LinkClusterConstraint>(calculateInitialConstraintListSize());
idConstraintsMap.put(id, list);
}
list.add(lcc);
}
/**
* Removes a LinkClusterConstraint for an object id from the list pointed to by the idConstraintsMap.
*
* @param id
* @param lcc
*/
private boolean removeConstraintForId(String id, LinkClusterConstraint lcc) {
ArrayList list;
list = this.idConstraintsMap.get(id);
if (list != null)
return list.remove(lcc);
else
return false;
}
/**
* Should calculate a reasonable value for the initial size of a LinkClusterConstraint list used by the idConstraintsMap to avoid creating too large ArrayLists and save memory.
*/
private int calculateInitialConstraintListSize() {
Iterator iter;
ArrayList list;
int a = 0;
if (this.idConstraintsMap.size() > 0) {
iter = this.idConstraintsMap.values().iterator();
while (iter.hasNext()) {
list = (ArrayList) iter.next();
a += list.size();
}
a = a / this.idConstraintsMap.size();
}
if (a < LinkClusterConstraintList.minSizeOfConstraintList)
return LinkClusterConstraintList.minSizeOfConstraintList;
else
return a;
}
}