/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.clustering.constrained.constraints; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.InputDescription; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.RandomGenerator; /** * Creates a ClusterConstraintList of the specified type from a (possibly partially) labeled ExampleSet. For the type * 'link' you can choose, if you want the LinkClusterConstraints to be created randomly or orderly, always bounded by * the maximal number of constraints to create. Choosing 'random walk' the Must-Link-constraints for each label will * form a connected component. * * @author Alexander Daxenberger * @version $Id: ExampleSet2ClusterConstraintList.java,v 1.10 2008/09/12 10:31:46 tobiasmalbrecht Exp $ */ public class ExampleSet2ClusterConstraintList extends Operator { private RandomGenerator randomGenerator; private int maxmust; private int maxcannot; private double weight; public static final String[] TYPES = { "link" }; public static final int TYPE_LINK = 0; public static final String[] MODE_LINK = { "random", "random walk", "orderly" }; public static final int MODE_LINK_RND = 0; public static final int MODE_LINK_RND_WALK = 1; public static final int MODE_LINK_ORDERLY = 2; /** The parameter name for "Use the given random seed instead of global random numbers (-1: use global)" */ public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; public ExampleSet2ClusterConstraintList(OperatorDescription description) { super(description); } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ClusterConstraintList.class }; } public InputDescription getInputDescription(Class cls) { if (ExampleSet.class.isAssignableFrom(cls)) { return new InputDescription(cls, true, true); } else { return super.getInputDescription(cls); } } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeCategory("type", "the type of constraints to create", ExampleSet2ClusterConstraintList.TYPES, 0)); types.add(new ParameterTypeCategory("link_mode", "the policy how to choose link constraints", ExampleSet2ClusterConstraintList.MODE_LINK, 0)); types.add(new ParameterTypeInt("link_max_must", "the maximal number of MUST_LINK constraints to create", 0, Integer.MAX_VALUE, 100)); types.add(new ParameterTypeInt("link_max_cannot", "the maximal number of CANNOT_LINK constraints to create", 0, Integer.MAX_VALUE, 100)); types.add(new ParameterTypeDouble("link_weight", "the global weight of the created link constraints", 0.0, Double.POSITIVE_INFINITY, 1.0)); types.add(new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global)", -1, Integer.MAX_VALUE, -1)); return types; } public IOObject[] apply() throws OperatorException { // initializing random generator int seed = getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED); if (seed == -1 ) { randomGenerator = RandomGenerator.getGlobalRandomGenerator(); } else { randomGenerator = RandomGenerator.getRandomGenerator(seed); } LinkClusterConstraintList result = null; HashMap<Double, Collection<Example>> labelMap; ExampleSet es = getInput(ExampleSet.class); // checking and creating attributes if needed Tools.isLabelled(es); Tools.checkAndCreateIds(es); maxmust = this.getParameterAsInt("link_max_must"); maxcannot = this.getParameterAsInt("link_max_cannot"); weight = this.getParameterAsDouble("link_weight"); labelMap = getLabelExamplesMap(es); switch (this.getParameterAsInt("type")) { case TYPE_LINK: switch (this.getParameterAsInt("link_mode")) { case MODE_LINK_RND: result = this.getRandomLinkClusterConstraints(labelMap, es); break; case MODE_LINK_RND_WALK: result = this.getRandomWalkLinkClusterConstraints(labelMap, es); break; case MODE_LINK_ORDERLY: result = this.getOrderlyLinkClusterConstraints(labelMap, es); break; default: throw new OperatorException("Invalid constraint creation mode"); } break; default: throw new OperatorException("Invalid constraint type"); } return new IOObject[] { result }; } /** * Returns a list of LinkClusterConstraints chosen randomly * * @param labelMap * a mapping label -> example * @param es * an ExampleSet * @return * @throws OperatorException */ private LinkClusterConstraintList getRandomLinkClusterConstraints(HashMap<Double, Collection<Example>> labelMap, ExampleSet es) throws OperatorException { LinkClusterConstraintList constraintList; LinkClusterConstraint newConstraint; Attribute id = es.getAttributes().getId(); Example e1; Example e2; boolean added = false; int tries = 0; constraintList = new LinkClusterConstraintList("link cluster constraint set", this.maxmust + this.maxcannot); ArrayList<Collection<Example>> exList = new ArrayList<Collection<Example>>(labelMap.values()); while ((tries < 3) && ((this.maxmust > 0) || (this.maxcannot > 0))) { for (int list1 = 0; list1 < exList.size(); list1++) { for (int list2 = 0; list2 < exList.size(); list2++) { if ((list1 != list2) && (this.maxcannot > 0)) { e1 = getRandomExample(exList.get(list1)); e2 = getRandomExample(exList.get(list2)); while (e1 == e2) { e2 = getRandomExample(exList.get(list2)); } newConstraint = new LinkClusterConstraint(e1.getValueAsString(id), e2.getValueAsString(id), weight, LinkClusterConstraint.CANNOT_LINK); if (constraintList.addConstraint(newConstraint)) { maxcannot--; added = true; } } if (this.maxmust > 0) { e1 = getRandomExample(exList.get(list2)); e2 = getRandomExample(exList.get(list2)); while (e1 == e2) { e2 = getRandomExample(exList.get(list2)); } newConstraint = new LinkClusterConstraint(e1.getValueAsString(id), e2.getValueAsString(id), this.weight, LinkClusterConstraint.MUST_LINK); if (constraintList.addConstraint(newConstraint)) { this.maxmust--; added = true; } } } } if (!added) { tries++; } added = false; } return constraintList; } /** * Returns a list of LinkClusterConstraints chosen randomly. The Must-Link- constraints will form a connected * component * * @param labelMap * a mapping label -> example * @param es * an ExampleSet * @return * @throws OperatorException */ private LinkClusterConstraintList getRandomWalkLinkClusterConstraints(HashMap<Double, Collection<Example>> labelMap, ExampleSet es) throws OperatorException { LinkClusterConstraintList constraintList; LinkClusterConstraint newConstraint; Attribute id = es.getAttributes().getId(); Example[] mustLinkLastExample; Example[][] cannotLinkLastExample; Example e1; Example e2; boolean added = false; int tries = 0; constraintList = new LinkClusterConstraintList("link cluster constraint set", this.maxmust + this.maxcannot); ArrayList<Collection<Example>> exList = new ArrayList<Collection<Example>>(labelMap.values()); mustLinkLastExample = new Example[exList.size()]; cannotLinkLastExample = new Example[exList.size()][exList.size()]; for (int list1 = 0; list1 < exList.size(); list1++) { mustLinkLastExample[list1] = null; for (int list2 = 0; list2 < exList.size(); list2++) { cannotLinkLastExample[list1][list2] = null; } } while ((tries < 3) && ((this.maxmust > 0) || (this.maxcannot > 0))) { for (int list1 = 0; list1 < exList.size(); list1++) { for (int list2 = 0; list2 < exList.size(); list2++) { if ((list1 != list2) && (this.maxcannot > 0)) { if (list1 < list2) { if (cannotLinkLastExample[list1][list2] == null) e2 = this.getRandomExample(exList.get(list2)); else e2 = cannotLinkLastExample[list1][list2]; e1 = this.getRandomExample(exList.get(list1)); while (e1 == e2) e1 = this.getRandomExample(exList.get(list1)); } else { if (cannotLinkLastExample[list1][list2] == null) e1 = this.getRandomExample(exList.get(list1)); else e1 = cannotLinkLastExample[list1][list2]; e2 = this.getRandomExample(exList.get(list2)); while (e1 == e2) e2 = this.getRandomExample(exList.get(list2)); } newConstraint = new LinkClusterConstraint(e1.getValueAsString(id), e2.getValueAsString(id), this.weight, LinkClusterConstraint.CANNOT_LINK); if (constraintList.addConstraint(newConstraint)) { this.maxcannot--; added = true; if (list1 < list2) cannotLinkLastExample[list2][list1] = e1; else cannotLinkLastExample[list2][list1] = e2; } } if (this.maxmust > 0) { if (mustLinkLastExample[list2] == null) e1 = this.getRandomExample(exList.get(list2)); else e1 = mustLinkLastExample[list2]; e2 = this.getRandomExample(exList.get(list2)); while (e1 == e2) e2 = this.getRandomExample(exList.get(list2)); newConstraint = new LinkClusterConstraint(e1.getValueAsString(id), e2.getValueAsString(id), this.weight, LinkClusterConstraint.MUST_LINK); if (constraintList.addConstraint(newConstraint)) { this.maxmust--; added = true; mustLinkLastExample[list2] = e2; } } } } if (!added) { tries++; } added = false; } return constraintList; } /** * Returns a list of LinkClusterConstraints chosen orderly * * @param labelMap * a mapping label -> example * @param es * an ExampleSet * @return * @throws OperatorException */ private LinkClusterConstraintList getOrderlyLinkClusterConstraints(HashMap<Double, Collection<Example>> labelMap, ExampleSet es) throws OperatorException { LinkClusterConstraintList constraintList; LinkClusterConstraint newConstraint; // Collection<Collection<Example>> exLists; Iterator[] mustLinkListIterator; Iterator[][] cannotLinkListIterator; Object[] example; Object examples; Attribute id = es.getAttributes().getId(); boolean hasNext = true; constraintList = new LinkClusterConstraintList("link cluster constraint list", this.maxmust + this.maxcannot); ArrayList<Collection<Example>> exLists = new ArrayList<Collection<Example>>(labelMap.values()); mustLinkListIterator = new Iterator[exLists.size()]; cannotLinkListIterator = new Iterator[exLists.size()][exLists.size()]; for (int list1 = 0; list1 < exLists.size(); list1++) { ArrayList[] newListArray = new ArrayList[2]; newListArray[0] = (ArrayList) exLists.get(list1); newListArray[1] = (ArrayList) exLists.get(list1); mustLinkListIterator[list1] = new ListIteratorsIterator(newListArray, false); for (int list2 = 0; list2 < exLists.size(); list2++) { if (list1 != list2) { newListArray = new ArrayList[2]; newListArray[0] = (ArrayList) exLists.get(list1); newListArray[1] = (ArrayList) exLists.get(list2); cannotLinkListIterator[list1][list2] = new ListIteratorsIterator(newListArray, true); } else cannotLinkListIterator[list1][list2] = null; } } while (hasNext && ((this.maxmust > 0) || (this.maxcannot > 0))) { hasNext = false; for (int list1 = 0; list1 < exLists.size(); list1++) { for (int list2 = 0; list2 < exLists.size(); list2++) { if ((list1 != list2) && (this.maxcannot > 0) && cannotLinkListIterator[list1][list2].hasNext()) { examples = cannotLinkListIterator[list1][list2].next(); example = (Object[]) examples; newConstraint = new LinkClusterConstraint(((Example) example[0]).getValueAsString(id), ((Example) example[1]) .getValueAsString(id), this.weight, LinkClusterConstraint.CANNOT_LINK); if (constraintList.addConstraint(newConstraint)) { this.maxcannot--; } hasNext = true; } if ((this.maxmust > 0) && mustLinkListIterator[list2].hasNext()) { examples = mustLinkListIterator[list2].next(); example = (Object[]) examples; newConstraint = new LinkClusterConstraint(((Example) example[0]).getValueAsString(id), ((Example) example[1]) .getValueAsString(id), this.weight, LinkClusterConstraint.MUST_LINK); if (constraintList.addConstraint(newConstraint)) { this.maxmust--; } hasNext = true; } } } } return constraintList; } /* just for testing purposes */ public Iterator getListIteratorsIterator(List[] list, boolean equalAllowed) { return new ListIteratorsIterator(list, equalAllowed); } private Example getRandomExample(Collection<Example> exampleCollection) { int rnd = randomGenerator.nextIntInRange(0, exampleCollection.size() - 1); ArrayList<Example> examples = new ArrayList<Example>(exampleCollection); return examples.get(rnd); } /** * this method returns a map, mapping the labels on collections of examples with this label. * * @param es * the exampleSet */ private HashMap<Double, Collection<Example>> getLabelExamplesMap(ExampleSet es) { HashMap<Double, Collection<Example>> labelExampleMap = new HashMap<Double, Collection<Example>>(); for (Example example : es) { double currentValue = example.getLabel(); if (!Double.isNaN(currentValue)) { if (labelExampleMap.containsKey(currentValue)) { labelExampleMap.get(currentValue).add(example); } else { ArrayList<Example> newList = new ArrayList<Example>(); newList.add(example); labelExampleMap.put(currentValue, newList); } } } return labelExampleMap; } /** * Iterates over any combination of objects of several lists. Returns these combinations as array of objects. * * @author Alexander Daxenberger * */ private static class ListIteratorsIterator implements Iterator { protected List[] list; protected Iterator[] iter; protected Object[] object; protected boolean allowEqual; protected boolean prepared; protected boolean endReached; public ListIteratorsIterator(List[] list, boolean allowEqual) { this.list = list; this.iter = new Iterator[list.length]; this.object = new Object[list.length]; this.allowEqual = allowEqual; for (int i = 0; i < list.length; i++) { if ((list[i] != null) && (list[i].size() > 0)) this.iter[i] = list[i].iterator(); else this.iter[i] = null; this.object[i] = null; } this.endReached = !this.init(); this.prepared = true; } public boolean hasNext() { if (this.endReached) return false; else { if (!this.prepared) this.prepareNext(); this.prepared = true; return !this.endReached; } } public Object next() { if (this.endReached) return null; else { if (!this.prepared) this.prepareNext(); this.prepared = false; if (this.endReached) return null; else return this.object; } } public void remove() { } private void prepareNext() { int i = 0; while (i > -1) { if (this.iter[i] != null) { if (this.iter[i].hasNext()) { this.object[i] = this.iter[i].next(); if (!this.allowEqual && this.equalObjectFound(this.object[i], i)) continue; do { i--; } while ((i > -1) && (this.iter[i] == null)); } else { this.iter[i] = this.list[i].iterator(); i++; } } else i++; if (i == this.iter.length) { this.endReached = true; return; } } } private boolean init() { boolean hasNext = false; int i = this.list.length - 1; while (i > -1) { if (this.iter[i] != null) { if (this.iter[i].hasNext()) { this.object[i] = this.iter[i].next(); if (!this.allowEqual && this.equalObjectFound(this.object[i], i)) continue; hasNext = true; } else { this.iter[i] = null; } } i--; } return hasNext; } private boolean equalObjectFound(Object o, int oindex) { for (int i = oindex + 1; i < this.object.length; i++) { if (o == this.object[i]) return true; } return false; } } }