/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeWeights;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.LearnerCapability;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.Kernel;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelAnova;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelDot;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelEpanechnikov;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelGaussianCombination;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelMultiquadric;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelNeural;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelPolynomial;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelRadial;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.svm.SVMInterface;
import com.rapidminer.operator.performance.EstimatedPerformance;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
/**
* This is the abstract superclass for the support vector machine / KLR
* implementations of Stefan Rüping.
*
* @rapidminer.reference Rueping/2000a
* @rapidminer.reference Vapnik/98a
* @rapidminer.index SVM
*
* @author Ingo Mierswa
* @version $Id: AbstractMySVMLearner.java,v 1.15 2006/03/27 13:22:01
* ingomierswa Exp $
*/
public abstract class AbstractMySVMLearner extends AbstractLearner {
/** The parameter name for "The SVM kernel parameter gamma (radial, anova)." */
public static final String PARAMETER_KERNEL_GAMMA = "kernel_gamma";
/** The parameter name for "The SVM kernel parameter sigma1 (epanechnikov, gaussian combination, multiquadric)." */
public static final String PARAMETER_KERNEL_SIGMA1 = "kernel_sigma1";
/** The parameter name for "The SVM kernel parameter sigma2 (gaussian combination)." */
public static final String PARAMETER_KERNEL_SIGMA2 = "kernel_sigma2";
/** The parameter name for "The SVM kernel parameter sigma3 (gaussian combination)." */
public static final String PARAMETER_KERNEL_SIGMA3 = "kernel_sigma3";
/** The parameter name for "The SVM kernel parameter shift (multiquadric)." */
public static final String PARAMETER_KERNEL_SHIFT = "kernel_shift";
/** The parameter name for "The SVM kernel parameter degree (polynomial, anova, epanechnikov)." */
public static final String PARAMETER_KERNEL_DEGREE = "kernel_degree";
/** The parameter name for "The SVM kernel parameter a (neural)." */
public static final String PARAMETER_KERNEL_A = "kernel_a";
/** The parameter name for "The SVM kernel parameter b (neural)." */
public static final String PARAMETER_KERNEL_B = "kernel_b";
/** The parameter name for "Size of the cache for kernel evaluations im MB " */
public static final String PARAMETER_KERNEL_CACHE = "kernel_cache";
/** The parameter name for "Precision on the KKT conditions" */
public static final String PARAMETER_CONVERGENCE_EPSILON = "convergence_epsilon";
/** The parameter name for "Stop after this many iterations" */
public static final String PARAMETER_MAX_ITERATIONS = "max_iterations";
/** The parameter name for "Scale the example values and store the scaling parameters for test set." */
public static final String PARAMETER_SCALE = "scale";
/** The parameter name for "Indicates if final optimization fitness should be returned as performance." */
public static final String PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE = "return_optimization_performance";
public static final String PARAMETER_C = "C";
public static final String PARAMETER_KERNEL_TYPE = "kernel_type";
public static final String PARAMETER_CALCULATE_WEIGHTS = "calculate_weights";
/** The kernels which can be used from RapidMiner for the mySVM / myKLR. */
public static final String[] KERNEL_TYPES = {
"dot", "radial", "polynomial", "neural", "anova", "epachnenikov", "gaussian_combination", "multiquadric"
};
/** Indicates a linear kernel. */
public static final int KERNEL_DOT = 0;
/** Indicates a rbf kernel. */
public static final int KERNEL_RADIAL = 1;
/** Indicates a polynomial kernel. */
public static final int KERNEL_POLYNOMIAL = 2;
/** Indicates a neural net kernel. */
public static final int KERNEL_NEURAL = 3;
/** Indicates an anova kernel. */
public static final int KERNEL_ANOVA = 4;
/** Indicates a epanechnikov kernel. */
public static final int KERNEL_EPANECHNIKOV = 5;
/** Indicates a gaussian combination kernel. */
public static final int KERNEL_GAUSSIAN_COMBINATION = 6;
/** Indicates a multiquadric kernel. */
public static final int KERNEL_MULTIQUADRIC = 7;
/** The SVM which is used for learning. */
private SVMInterface svm = null;
/** The SVM kernel. */
private Kernel kernel;
/** The SVM example set. */
private com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmExamples;
public AbstractMySVMLearner(OperatorDescription description) {
super(description);
}
/** Creates a new SVM according to the given label. */
public abstract SVMInterface createSVM(Attribute label, Kernel kernel, com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmExamples, ExampleSet rapidMinerExamples) throws OperatorException;
/** Creates a new SVM model from the given data. */
public abstract AbstractMySVMModel createSVMModel(ExampleSet exampleSet, com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmExamples, Kernel kernel, int kernelType);
/** Returns the kernel of this SVM. */
protected Kernel getKernel() {
return kernel;
}
/** Returns the used SVM. */
protected SVMInterface getSVM() {
return svm;
}
/** Returns the value of the corresponding parameter. */
public boolean shouldDeliverOptimizationPerformance() {
return getParameterAsBoolean(PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE);
}
/** Returns the optimization performance of the best result. This method must be called after
* training, not before. */
public PerformanceVector getOptimizationPerformance() {
double finalFitness = getFitness(svmExamples.get_alphas(), svmExamples.get_ys(), kernel);
PerformanceVector result = new PerformanceVector();
result.addCriterion(new EstimatedPerformance("svm_objective_function", finalFitness, 1, false));
result.addCriterion(new EstimatedPerformance("no_support_vectors", svmExamples.getNumberOfSupportVectors(), 1, true));
return result;
}
/** Returns true if the user has specified that weights should be calculated. */
public boolean shouldCalculateWeights() {
return getParameterAsBoolean(PARAMETER_CALCULATE_WEIGHTS);
}
/** Returns the weights for all features. */
public AttributeWeights getWeights(ExampleSet exampleSet) throws OperatorException {
if (getParameterAsInt(PARAMETER_KERNEL_TYPE) != KERNEL_DOT)
throw new UserError(this, 916, this, "Cannot create weights for nonlinear kernel!");
double[] weights = svm.getWeights();
AttributeWeights weightVector = new AttributeWeights();
int i = 0;
for (Attribute attribute : exampleSet.getAttributes())
weightVector.setWeight(attribute.getName(), weights[i++]);
return weightVector;
}
public Model learn(ExampleSet exampleSet) throws OperatorException {
Attribute label = exampleSet.getAttributes().getLabel();
if ((label.isNominal()) && (label.getMapping().size() != 2)) {
throw new UserError(this, 114, getName(), label.getName());
}
this.svmExamples = new com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples(exampleSet, label, getParameterAsBoolean(PARAMETER_SCALE));
// kernel
int kernelType = getParameterAsInt(PARAMETER_KERNEL_TYPE);
int cacheSize = getParameterAsInt(PARAMETER_KERNEL_CACHE);
kernel = createKernel(kernelType);
if (kernelType == KERNEL_RADIAL)
((KernelRadial) kernel).setGamma(getParameterAsDouble(PARAMETER_KERNEL_GAMMA));
else if (kernelType == KERNEL_POLYNOMIAL)
((KernelPolynomial) kernel).setDegree(getParameterAsDouble(PARAMETER_KERNEL_DEGREE));
else if (kernelType == KERNEL_NEURAL)
((KernelNeural) kernel).setParameters(
getParameterAsDouble(PARAMETER_KERNEL_A),
getParameterAsDouble(PARAMETER_KERNEL_B));
else if (kernelType == KERNEL_ANOVA)
((KernelAnova) kernel).setParameters(
getParameterAsDouble(PARAMETER_KERNEL_GAMMA),
getParameterAsDouble(PARAMETER_KERNEL_DEGREE));
else if (kernelType == KERNEL_EPANECHNIKOV)
((KernelEpanechnikov) kernel).setParameters(
getParameterAsDouble(PARAMETER_KERNEL_SIGMA1),
getParameterAsDouble(PARAMETER_KERNEL_DEGREE));
else if (kernelType == KERNEL_GAUSSIAN_COMBINATION)
((KernelGaussianCombination) kernel).setParameters(
getParameterAsDouble(PARAMETER_KERNEL_SIGMA1),
getParameterAsDouble(PARAMETER_KERNEL_SIGMA2),
getParameterAsDouble(PARAMETER_KERNEL_SIGMA3));
else if (kernelType == KERNEL_MULTIQUADRIC)
((KernelMultiquadric) kernel).setParameters(
getParameterAsDouble(PARAMETER_KERNEL_SIGMA1),
getParameterAsDouble(PARAMETER_KERNEL_SHIFT));
kernel.init(svmExamples, cacheSize);
// SVM
svm = createSVM(label, kernel, svmExamples, exampleSet);
svm.init(kernel, svmExamples);
svm.train();
return createSVMModel(exampleSet, svmExamples, kernel, kernelType);
}
private double getFitness(double[] alphas, double[] ys, Kernel kernel) {
double sum = 0.0d;
int numberSV = 0;
for (int i = 0; i < ys.length; i++) {
sum += alphas[i];
if (alphas[i] > 0)
numberSV++;
}
double matrixSum = 0.0d;
for (int i = 0; i < ys.length; i++) {
if (alphas[i] == 0.0d)
continue;
for (int j = 0; j < ys.length; j++) {
if (alphas[j] == 0.0d)
continue;
matrixSum += (alphas[i] * alphas[j] * ys[i] * ys[j] * kernel.calculate_K(i, j));
}
}
return sum - 0.5d * matrixSum;
}
/**
* Creates a new kernel of the given type. The kernel type has to be one out
* of KERNEL_DOT, KERNEL_RADIAL, KERNEL_POLYNOMIAL, KERNEL_NEURAL,
* KERNEL_EPANECHNIKOV, KERNEL_GAUSSIAN_COMBINATION, or KERNEL_MULTIQUADRIC.
*/
public static Kernel createKernel(int kernelType) {
switch (kernelType) {
case KERNEL_DOT:
return new KernelDot();
case KERNEL_RADIAL:
return new KernelRadial();
case KERNEL_POLYNOMIAL:
return new KernelPolynomial();
case KERNEL_NEURAL:
return new KernelNeural();
case KERNEL_ANOVA:
return new KernelAnova();
case KERNEL_EPANECHNIKOV:
return new KernelEpanechnikov();
case KERNEL_GAUSSIAN_COMBINATION:
return new KernelGaussianCombination();
case KERNEL_MULTIQUADRIC:
return new KernelMultiquadric();
default:
return new KernelDot();
}
}
public boolean supportsCapability(LearnerCapability lc) {
if (lc == LearnerCapability.NUMERICAL_ATTRIBUTES)
return true;
if (lc == LearnerCapability.BINOMINAL_CLASS)
return true;
if (lc == LearnerCapability.NUMERICAL_CLASS)
return true;
if (lc == LearnerCapability.WEIGHTED_EXAMPLES)
return true;
return false;
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory(PARAMETER_KERNEL_TYPE, "The SVM kernel type", KERNEL_TYPES, 0);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_GAMMA, "The SVM kernel parameter gamma (radial, anova).", 0.0d, Double.POSITIVE_INFINITY, 1.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA1, "The SVM kernel parameter sigma1 (epanechnikov, gaussian combination, multiquadric).", 0.0d, Double.POSITIVE_INFINITY, 1.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA2, "The SVM kernel parameter sigma2 (gaussian combination).", 0.0d, Double.POSITIVE_INFINITY, 0.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA3, "The SVM kernel parameter sigma3 (gaussian combination).", 0.0d, Double.POSITIVE_INFINITY, 2.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_SHIFT, "The SVM kernel parameter shift (multiquadric).", 0.0d, Double.POSITIVE_INFINITY, 1.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_DEGREE, "The SVM kernel parameter degree (polynomial, anova, epanechnikov).", 0.0d, Double.POSITIVE_INFINITY, 2);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_A, "The SVM kernel parameter a (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_KERNEL_B, "The SVM kernel parameter b (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeInt(PARAMETER_KERNEL_CACHE, "Size of the cache for kernel evaluations im MB ", 0, Integer.MAX_VALUE, 200));
type = new ParameterTypeDouble(PARAMETER_C, "The SVM complexity constant. Use -1 for different C values for positive and negative.", -1, Double.POSITIVE_INFINITY, 0.0d);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_CONVERGENCE_EPSILON, "Precision on the KKT conditions", 0.0d, Double.POSITIVE_INFINITY, 1e-3);
types.add(type);
types.add(new ParameterTypeInt(PARAMETER_MAX_ITERATIONS, "Stop after this many iterations", 1, Integer.MAX_VALUE, 100000));
types.add(new ParameterTypeBoolean(PARAMETER_SCALE, "Scale the example values and store the scaling parameters for test set.", true));
types.add(new ParameterTypeBoolean(PARAMETER_CALCULATE_WEIGHTS, "Indicates if attribute weights should be returned.", false));
types.add(new ParameterTypeBoolean(PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE, "Indicates if final optimization fitness should be returned as performance.", false));
return types;
}
}