/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.clustering.clusterer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.clustering.Cluster;
import com.rapidminer.operator.learner.clustering.ClusterModel;
import com.rapidminer.operator.learner.clustering.FlatClusterModel;
import com.rapidminer.operator.learner.clustering.FlatCrispClusterModel;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.Kernel;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelNeural;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelPolynomial;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelRadial;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.tools.ClassNameMapper;
import com.rapidminer.tools.RandomGenerator;
/**
* Simple implementation of kernel k-means {@rapidminer.cite Dhillon/etal/2004a}.
*
* @rapidminer.reference Dhillon/etal/2004a
* @author Michael Wurst, Ingo Mierswa
* @version $Id: KernelKMeans.java,v 1.9 2008/09/12 10:31:42 tobiasmalbrecht Exp $
*/
public class KernelKMeans extends AbstractKMethod {
/** The parameter name for "scale the examples before applying clustering" */
public static final String PARAMETER_SCALE = "scale";
/** The parameter name for "the size of the kernel cache (currently not supported)" */
public static final String PARAMETER_CACHE_SIZE_MB = "cache_size_mb";
/** The parameter name for "similarity measure to apply" */
public static final String PARAMETER_KERNEL_TYPE = "kernel_type";
/** The parameter name for "The SVM kernel parameter gamma (radial)." */
public static final String PARAMETER_KERNEL_GAMMA = "kernel_gamma";
/** The parameter name for "The SVM kernel parameter degree (polynomial)." */
public static final String PARAMETER_KERNEL_DEGREE = "kernel_degree";
/** The parameter name for "The SVM kernel parameter a (neural)." */
public static final String PARAMETER_KERNEL_A = "kernel_a";
/** The parameter name for "The SVM kernel parameter b (neural)." */
public static final String PARAMETER_KERNEL_B = "kernel_b";
private double[] g = null;
private Kernel kernel;
private Map<String, com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample> index;
public static final String[] DEFAULT_KERNEL_CLASSES = {
"com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelDot",
"com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelRadial",
"com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelPolynomial"
};
private ClassNameMapper KERNEL_CLASS_MAP;
public KernelKMeans(OperatorDescription description) {
super(description);
}
protected void initKMethod(List<String> ids, int k) {
g = null;
}
public ClusterModel createClusterModel(ExampleSet es) throws OperatorException {
int maxK = getParameterAsInt(PARAMETER_K);
int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
int maxRuns = getParameterAsInt(PARAMETER_MAX_RUNS);
Tools.onlyNumericalAttributes(es, "Kernel KMeans");
kernel = null;
String kernelClassName = getParameterAsString(PARAMETER_KERNEL_TYPE);
kernel = (Kernel) KERNEL_CLASS_MAP.getInstantiation(kernelClassName);
if (kernel instanceof KernelRadial)
((KernelRadial) kernel).setGamma(getParameterAsDouble(PARAMETER_KERNEL_GAMMA));
else if (kernel instanceof KernelPolynomial)
((KernelPolynomial) kernel).setDegree(getParameterAsInt(PARAMETER_KERNEL_DEGREE));
else if (kernel instanceof KernelNeural)
((KernelNeural) kernel).setParameters(getParameterAsDouble(PARAMETER_KERNEL_A), getParameterAsDouble(PARAMETER_KERNEL_B));
com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmEs = new com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples(es, null, getParameterAsBoolean(PARAMETER_SCALE));
kernel.init(svmEs, getParameterAsInt(PARAMETER_CACHE_SIZE_MB));
index = new HashMap<String, com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample>();
for (int i = 0; i < svmEs.count_examples(); i++) {
String id = svmEs.getId(i);
com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample ex = svmEs.get_example(i);
index.put(id, new com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample(ex));
}
FlatClusterModel result = kmethod(es, maxK, maxOptimizationSteps, maxRuns);
return result;
}
protected int bestIndex(String id, FlatCrispClusterModel cmNew, FlatCrispClusterModel cm) {
// If the system is not initialized yet (the first run) return random
// cluster assigments
if (g == null)
return RandomGenerator.getGlobalRandomGenerator().nextInt(cmNew.getNumberOfClusters());
int best = -1;
double min = Double.POSITIVE_INFINITY;
for (int i = 0; i < cm.getNumberOfClusters(); i++) {
double v = 0;
Cluster cl = cm.getClusterAt(i);
Iterator it = cl.getObjects();
int count = 0;
while (it.hasNext()) {
String id2 = (String) it.next();
double kernelValue = calculateK(id, id2);
v = v + kernelValue;
count++;
}
v = g[i] - 2 * (v / count);
log("Membership " + id + " to cluster " + i + " is " + v);
if (v < min) {
min = v;
best = i;
}
}
return best;
}
private void recalculateG(FlatClusterModel cm) {
for (int i = 0; i < cm.getNumberOfClusters(); i++) {
double v = 0;
Cluster cl = cm.getClusterAt(i);
Iterator it = cl.getObjects();
int count = 0;
while (it.hasNext()) {
String id1 = (String) it.next();
Iterator it2 = cl.getObjects();
while (it2.hasNext()) {
String id2 = (String) it2.next();
v = v + calculateK(id1, id2);
count++;
}
}
if (count > 0)
g[i] = v / count;
else
g[i] = 0.0;
}
}
private double calculateK(String id1, String id2) {
com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample ex1 = index.get(id1);
com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample ex2 = index.get(id2);
double v = kernel.calculate_K(ex1, ex2);
return v;
}
protected void recalculateCentroids(FlatCrispClusterModel cl) {
if (g == null)
g = new double[cl.getNumberOfClusters()];
recalculateG(cl);
}
protected double evaluateClusterModel(FlatCrispClusterModel cl) {
double v = 0.0;
for (int i = 0; i < g.length; i++)
v = v + g[i];
return v;
}
public List<ParameterType> getParameterTypes() {
KERNEL_CLASS_MAP = new ClassNameMapper(DEFAULT_KERNEL_CLASSES);
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeBoolean(PARAMETER_SCALE, "Indicates if the examples are scaled before clustering is applied.", true));
types.add(new ParameterTypeInt(PARAMETER_CACHE_SIZE_MB, "The size of the kernel cache.", 0, Integer.MAX_VALUE, 50));
ParameterType type = new ParameterTypeStringCategory(PARAMETER_KERNEL_TYPE, "The kernel type, i.e. the similarity measure which should be applied.", KERNEL_CLASS_MAP.getShortClassNames());
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_GAMMA, "The SVM kernel parameter gamma (radial).", 0.0d, Double.POSITIVE_INFINITY, 1.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_KERNEL_DEGREE, "The SVM kernel parameter degree (polynomial).", 0, Integer.MAX_VALUE, 2);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_A, "The SVM kernel parameter a (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_B, "The SVM kernel parameter b (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d);
type.setExpert(false);
types.add(type);
return types;
}
}