/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.learner.AbstractLearner; import com.rapidminer.operator.learner.LearnerCapability; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.Kernel; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelAnova; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelDot; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelEpanechnikov; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelGaussianCombination; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelMultiquadric; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelNeural; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelPolynomial; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.KernelRadial; import com.rapidminer.operator.learner.functions.kernel.jmysvm.svm.SVMInterface; import com.rapidminer.operator.performance.EstimatedPerformance; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; /** * This is the abstract superclass for the support vector machine / KLR * implementations of Stefan Rüping. * * @rapidminer.reference Rueping/2000a * @rapidminer.reference Vapnik/98a * @rapidminer.index SVM * * @author Ingo Mierswa * @version $Id: AbstractMySVMLearner.java,v 1.15 2006/03/27 13:22:01 * ingomierswa Exp $ */ public abstract class AbstractMySVMLearner extends AbstractLearner { /** The parameter name for "The SVM kernel parameter gamma (radial, anova)." */ public static final String PARAMETER_KERNEL_GAMMA = "kernel_gamma"; /** The parameter name for "The SVM kernel parameter sigma1 (epanechnikov, gaussian combination, multiquadric)." */ public static final String PARAMETER_KERNEL_SIGMA1 = "kernel_sigma1"; /** The parameter name for "The SVM kernel parameter sigma2 (gaussian combination)." */ public static final String PARAMETER_KERNEL_SIGMA2 = "kernel_sigma2"; /** The parameter name for "The SVM kernel parameter sigma3 (gaussian combination)." */ public static final String PARAMETER_KERNEL_SIGMA3 = "kernel_sigma3"; /** The parameter name for "The SVM kernel parameter shift (multiquadric)." */ public static final String PARAMETER_KERNEL_SHIFT = "kernel_shift"; /** The parameter name for "The SVM kernel parameter degree (polynomial, anova, epanechnikov)." */ public static final String PARAMETER_KERNEL_DEGREE = "kernel_degree"; /** The parameter name for "The SVM kernel parameter a (neural)." */ public static final String PARAMETER_KERNEL_A = "kernel_a"; /** The parameter name for "The SVM kernel parameter b (neural)." */ public static final String PARAMETER_KERNEL_B = "kernel_b"; /** The parameter name for "Size of the cache for kernel evaluations im MB " */ public static final String PARAMETER_KERNEL_CACHE = "kernel_cache"; /** The parameter name for "Precision on the KKT conditions" */ public static final String PARAMETER_CONVERGENCE_EPSILON = "convergence_epsilon"; /** The parameter name for "Stop after this many iterations" */ public static final String PARAMETER_MAX_ITERATIONS = "max_iterations"; /** The parameter name for "Scale the example values and store the scaling parameters for test set." */ public static final String PARAMETER_SCALE = "scale"; /** The parameter name for "Indicates if final optimization fitness should be returned as performance." */ public static final String PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE = "return_optimization_performance"; public static final String PARAMETER_C = "C"; public static final String PARAMETER_KERNEL_TYPE = "kernel_type"; public static final String PARAMETER_CALCULATE_WEIGHTS = "calculate_weights"; /** The kernels which can be used from RapidMiner for the mySVM / myKLR. */ public static final String[] KERNEL_TYPES = { "dot", "radial", "polynomial", "neural", "anova", "epachnenikov", "gaussian_combination", "multiquadric" }; /** Indicates a linear kernel. */ public static final int KERNEL_DOT = 0; /** Indicates a rbf kernel. */ public static final int KERNEL_RADIAL = 1; /** Indicates a polynomial kernel. */ public static final int KERNEL_POLYNOMIAL = 2; /** Indicates a neural net kernel. */ public static final int KERNEL_NEURAL = 3; /** Indicates an anova kernel. */ public static final int KERNEL_ANOVA = 4; /** Indicates a epanechnikov kernel. */ public static final int KERNEL_EPANECHNIKOV = 5; /** Indicates a gaussian combination kernel. */ public static final int KERNEL_GAUSSIAN_COMBINATION = 6; /** Indicates a multiquadric kernel. */ public static final int KERNEL_MULTIQUADRIC = 7; /** The SVM which is used for learning. */ private SVMInterface svm = null; /** The SVM kernel. */ private Kernel kernel; /** The SVM example set. */ private com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmExamples; public AbstractMySVMLearner(OperatorDescription description) { super(description); } /** Creates a new SVM according to the given label. */ public abstract SVMInterface createSVM(Attribute label, Kernel kernel, com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmExamples, ExampleSet rapidMinerExamples) throws OperatorException; /** Creates a new SVM model from the given data. */ public abstract AbstractMySVMModel createSVMModel(ExampleSet exampleSet, com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples svmExamples, Kernel kernel, int kernelType); /** Returns the kernel of this SVM. */ protected Kernel getKernel() { return kernel; } /** Returns the used SVM. */ protected SVMInterface getSVM() { return svm; } /** Returns the value of the corresponding parameter. */ public boolean shouldDeliverOptimizationPerformance() { return getParameterAsBoolean(PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE); } /** Returns the optimization performance of the best result. This method must be called after * training, not before. */ public PerformanceVector getOptimizationPerformance() { double finalFitness = getFitness(svmExamples.get_alphas(), svmExamples.get_ys(), kernel); PerformanceVector result = new PerformanceVector(); result.addCriterion(new EstimatedPerformance("svm_objective_function", finalFitness, 1, false)); result.addCriterion(new EstimatedPerformance("no_support_vectors", svmExamples.getNumberOfSupportVectors(), 1, true)); return result; } /** Returns true if the user has specified that weights should be calculated. */ public boolean shouldCalculateWeights() { return getParameterAsBoolean(PARAMETER_CALCULATE_WEIGHTS); } /** Returns the weights for all features. */ public AttributeWeights getWeights(ExampleSet exampleSet) throws OperatorException { if (getParameterAsInt(PARAMETER_KERNEL_TYPE) != KERNEL_DOT) throw new UserError(this, 916, this, "Cannot create weights for nonlinear kernel!"); double[] weights = svm.getWeights(); AttributeWeights weightVector = new AttributeWeights(); int i = 0; for (Attribute attribute : exampleSet.getAttributes()) weightVector.setWeight(attribute.getName(), weights[i++]); return weightVector; } public Model learn(ExampleSet exampleSet) throws OperatorException { Attribute label = exampleSet.getAttributes().getLabel(); if ((label.isNominal()) && (label.getMapping().size() != 2)) { throw new UserError(this, 114, getName(), label.getName()); } this.svmExamples = new com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples(exampleSet, label, getParameterAsBoolean(PARAMETER_SCALE)); // kernel int kernelType = getParameterAsInt(PARAMETER_KERNEL_TYPE); int cacheSize = getParameterAsInt(PARAMETER_KERNEL_CACHE); kernel = createKernel(kernelType); if (kernelType == KERNEL_RADIAL) ((KernelRadial) kernel).setGamma(getParameterAsDouble(PARAMETER_KERNEL_GAMMA)); else if (kernelType == KERNEL_POLYNOMIAL) ((KernelPolynomial) kernel).setDegree(getParameterAsDouble(PARAMETER_KERNEL_DEGREE)); else if (kernelType == KERNEL_NEURAL) ((KernelNeural) kernel).setParameters( getParameterAsDouble(PARAMETER_KERNEL_A), getParameterAsDouble(PARAMETER_KERNEL_B)); else if (kernelType == KERNEL_ANOVA) ((KernelAnova) kernel).setParameters( getParameterAsDouble(PARAMETER_KERNEL_GAMMA), getParameterAsDouble(PARAMETER_KERNEL_DEGREE)); else if (kernelType == KERNEL_EPANECHNIKOV) ((KernelEpanechnikov) kernel).setParameters( getParameterAsDouble(PARAMETER_KERNEL_SIGMA1), getParameterAsDouble(PARAMETER_KERNEL_DEGREE)); else if (kernelType == KERNEL_GAUSSIAN_COMBINATION) ((KernelGaussianCombination) kernel).setParameters( getParameterAsDouble(PARAMETER_KERNEL_SIGMA1), getParameterAsDouble(PARAMETER_KERNEL_SIGMA2), getParameterAsDouble(PARAMETER_KERNEL_SIGMA3)); else if (kernelType == KERNEL_MULTIQUADRIC) ((KernelMultiquadric) kernel).setParameters( getParameterAsDouble(PARAMETER_KERNEL_SIGMA1), getParameterAsDouble(PARAMETER_KERNEL_SHIFT)); kernel.init(svmExamples, cacheSize); // SVM svm = createSVM(label, kernel, svmExamples, exampleSet); svm.init(kernel, svmExamples); svm.train(); return createSVMModel(exampleSet, svmExamples, kernel, kernelType); } private double getFitness(double[] alphas, double[] ys, Kernel kernel) { double sum = 0.0d; int numberSV = 0; for (int i = 0; i < ys.length; i++) { sum += alphas[i]; if (alphas[i] > 0) numberSV++; } double matrixSum = 0.0d; for (int i = 0; i < ys.length; i++) { if (alphas[i] == 0.0d) continue; for (int j = 0; j < ys.length; j++) { if (alphas[j] == 0.0d) continue; matrixSum += (alphas[i] * alphas[j] * ys[i] * ys[j] * kernel.calculate_K(i, j)); } } return sum - 0.5d * matrixSum; } /** * Creates a new kernel of the given type. The kernel type has to be one out * of KERNEL_DOT, KERNEL_RADIAL, KERNEL_POLYNOMIAL, KERNEL_NEURAL, * KERNEL_EPANECHNIKOV, KERNEL_GAUSSIAN_COMBINATION, or KERNEL_MULTIQUADRIC. */ public static Kernel createKernel(int kernelType) { switch (kernelType) { case KERNEL_DOT: return new KernelDot(); case KERNEL_RADIAL: return new KernelRadial(); case KERNEL_POLYNOMIAL: return new KernelPolynomial(); case KERNEL_NEURAL: return new KernelNeural(); case KERNEL_ANOVA: return new KernelAnova(); case KERNEL_EPANECHNIKOV: return new KernelEpanechnikov(); case KERNEL_GAUSSIAN_COMBINATION: return new KernelGaussianCombination(); case KERNEL_MULTIQUADRIC: return new KernelMultiquadric(); default: return new KernelDot(); } } public boolean supportsCapability(LearnerCapability lc) { if (lc == LearnerCapability.NUMERICAL_ATTRIBUTES) return true; if (lc == LearnerCapability.BINOMINAL_CLASS) return true; if (lc == LearnerCapability.NUMERICAL_CLASS) return true; if (lc == LearnerCapability.WEIGHTED_EXAMPLES) return true; return false; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeCategory(PARAMETER_KERNEL_TYPE, "The SVM kernel type", KERNEL_TYPES, 0); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_GAMMA, "The SVM kernel parameter gamma (radial, anova).", 0.0d, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA1, "The SVM kernel parameter sigma1 (epanechnikov, gaussian combination, multiquadric).", 0.0d, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA2, "The SVM kernel parameter sigma2 (gaussian combination).", 0.0d, Double.POSITIVE_INFINITY, 0.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA3, "The SVM kernel parameter sigma3 (gaussian combination).", 0.0d, Double.POSITIVE_INFINITY, 2.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SHIFT, "The SVM kernel parameter shift (multiquadric).", 0.0d, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_DEGREE, "The SVM kernel parameter degree (polynomial, anova, epanechnikov).", 0.0d, Double.POSITIVE_INFINITY, 2); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_A, "The SVM kernel parameter a (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_B, "The SVM kernel parameter b (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d); type.setExpert(false); types.add(type); types.add(new ParameterTypeInt(PARAMETER_KERNEL_CACHE, "Size of the cache for kernel evaluations im MB ", 0, Integer.MAX_VALUE, 200)); type = new ParameterTypeDouble(PARAMETER_C, "The SVM complexity constant. Use -1 for different C values for positive and negative.", -1, Double.POSITIVE_INFINITY, 0.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_CONVERGENCE_EPSILON, "Precision on the KKT conditions", 0.0d, Double.POSITIVE_INFINITY, 1e-3); types.add(type); types.add(new ParameterTypeInt(PARAMETER_MAX_ITERATIONS, "Stop after this many iterations", 1, Integer.MAX_VALUE, 100000)); types.add(new ParameterTypeBoolean(PARAMETER_SCALE, "Scale the example values and store the scaling parameters for test set.", true)); types.add(new ParameterTypeBoolean(PARAMETER_CALCULATE_WEIGHTS, "Indicates if attribute weights should be returned.", false)); types.add(new ParameterTypeBoolean(PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE, "Indicates if final optimization fitness should be returned as performance.", false)); return types; } }