/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.learner.AbstractLearner; import com.rapidminer.operator.learner.LearnerCapability; import com.rapidminer.operator.learner.functions.kernel.rvm.ClassificationProblem; import com.rapidminer.operator.learner.functions.kernel.rvm.ConstructiveRegression; import com.rapidminer.operator.learner.functions.kernel.rvm.Parameter; import com.rapidminer.operator.learner.functions.kernel.rvm.RVMClassification; import com.rapidminer.operator.learner.functions.kernel.rvm.RVMRegression; import com.rapidminer.operator.learner.functions.kernel.rvm.RegressionProblem; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelBasisFunction; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelCauchy; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelEpanechnikov; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelGaussianCombination; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelLaplace; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelMultiquadric; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelPoly; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelRadial; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelSigmoid; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; /** * Relevance Vector Machine (RVM) Learner. The RVM is a probabilistic method * both for classification and regression. The implementation of the relevance * vector machine is based on the original algorithm described by Tipping/2001. * The fast version of the marginal likelihood maximization (Tipping/Faul/2003) * is also available if the parameter "rvm_type" is set to * "Constructive-Regression-RVM". * * @rapidminer.reference Tipping/2001a * @rapidminer.reference Tipping/Faul/2003a * * @author Piotr Kasprzak, Ingo Mierswa * @version $Id: RVMLearner.java,v 1.9 2008/05/09 19:23:01 ingomierswa Exp $ * @rapidminer.index RVM */ public class RVMLearner extends AbstractLearner { /** The parameter name for "Regression RVM" */ public static final String PARAMETER_RVM_TYPE = "rvm_type"; /** The parameter name for "The type of the kernel functions." */ public static final String PARAMETER_KERNEL_TYPE = "kernel_type"; /** The parameter name for "The maximum number of iterations used." */ public static final String PARAMETER_MAX_ITERATION = "max_iteration"; /** The parameter name for "Abort iteration if largest log alpha change is smaller than this" */ public static final String PARAMETER_MIN_DELTA_LOG_ALPHA = "min_delta_log_alpha"; /** The parameter name for "Prune basis function if its alpha is bigger than this" */ public static final String PARAMETER_ALPHA_MAX = "alpha_max"; /** The parameter name for "The lengthscale used in all kernels." */ public static final String PARAMETER_KERNEL_LENGTHSCALE = "kernel_lengthscale"; /** The parameter name for "The degree used in the poly kernel." */ public static final String PARAMETER_KERNEL_DEGREE = "kernel_degree"; /** The parameter name for "The bias used in the poly kernel." */ public static final String PARAMETER_KERNEL_BIAS = "kernel_bias"; /** The parameter name for "The SVM kernel parameter sigma1 (Epanechnikov, Gaussian Combination, Multiquadric)." */ public static final String PARAMETER_KERNEL_SIGMA1 = "kernel_sigma1"; /** The parameter name for "The SVM kernel parameter sigma2 (Gaussian Combination)." */ public static final String PARAMETER_KERNEL_SIGMA2 = "kernel_sigma2"; /** The parameter name for "The SVM kernel parameter sigma3 (Gaussian Combination)." */ public static final String PARAMETER_KERNEL_SIGMA3 = "kernel_sigma3"; /** The parameter name for "The SVM kernel parameter shift (polynomial, Multiquadric)." */ public static final String PARAMETER_KERNEL_SHIFT = "kernel_shift"; /** The parameter name for "The SVM kernel parameter a (neural)." */ public static final String PARAMETER_KERNEL_A = "kernel_a"; /** The parameter name for "The SVM kernel parameter b (neural)." */ public static final String PARAMETER_KERNEL_B = "kernel_b"; public static final String[] RVM_TYPES = { "Regression-RVM", "Classification-RVM", "Constructive-Regression-RVM" }; public static final String[] KERNEL_TYPES = { "rbf", "cauchy", "laplace", "poly", "sigmoid", "Epanechnikov", "gaussian combination", "multiquadric" }; public RVMLearner(OperatorDescription description) { super(description); } public boolean supportsCapability(LearnerCapability lc) { if (lc == com.rapidminer.operator.learner.LearnerCapability.NUMERICAL_ATTRIBUTES) return true; if (lc == com.rapidminer.operator.learner.LearnerCapability.BINOMINAL_CLASS) return true; if (lc == com.rapidminer.operator.learner.LearnerCapability.NUMERICAL_CLASS) return true; return false; } public Model learn(ExampleSet exampleSet) throws OperatorException { log("Creating RVM."); Parameter parameter = new Parameter(); int numExamples = exampleSet.size(); int numBases = numExamples + 1; /** Get user defined control parameters from RapidMiner */ parameter.min_delta_log_alpha = getParameterAsDouble(PARAMETER_MIN_DELTA_LOG_ALPHA); // Abort iteration if largest log alpha change is smaller than this parameter.alpha_max = getParameterAsDouble(PARAMETER_ALPHA_MAX); // Prune basis function if its alpha is bigger than this parameter.maxIterations = getParameterAsInt(PARAMETER_MAX_ITERATION); // Maximum number of iterations /** Transfer input / target vectors into array form */ log("=> Creating input / output vectors."); double[][] x = new double[numExamples][exampleSet.getAttributes().size()]; double[][] t = new double[numExamples][1]; Iterator<Example> reader = exampleSet.iterator(); int k = 0; while (reader.hasNext()) { double[] targetVector = new double[1]; Example e = reader.next(); targetVector[0] = e.getLabel(); x[k] = RVMModel.makeInputVector(e); t[k] = targetVector; k++; } /** Init hyperparameters with more or less sensible values (shouldn't be too important) */ Attribute label = exampleSet.getAttributes().getLabel(); parameter.initAlpha = Math.pow((1.0 / numExamples), 2); // parameter.initSigma = Math.sqrt(label.getVariance()) * 0.1; parameter.initSigma = 0.1; /** Create kernel functions */ log("Creating kernel basis functions [" + KERNEL_TYPES[getParameterAsInt(PARAMETER_KERNEL_TYPE)] + "]."); KernelBasisFunction[] kernels = createKernels(x, numBases); /** Create RVM and learn the model */ String RVMType = RVM_TYPES[getParameterAsInt(PARAMETER_RVM_TYPE)]; com.rapidminer.operator.learner.functions.kernel.rvm.Model model = null; if (label.isNominal()) { if (label.getMapping().size() != 2) throw new UserError(this, 114, getName(), label.getName()); /** Classification problem */ int[] c = new int[numExamples]; for (k = 0; k < numExamples; k++) { c[k] = (int)t[k][0]; } ClassificationProblem problem = new ClassificationProblem(x, c, kernels); if (RVMType.equals("Classification-RVM")) { RVMClassification RVM = new RVMClassification(problem, parameter); try { model = RVM.learn(); } catch (ArrayIndexOutOfBoundsException e) { throw new UserError(this, 924); } } else { throw new UserError(this, 207, new Object[] { RVMType, "rvm_type", "only Classification-RVM can be used for the given two class classification problem" }); } } else { /** Regression problem */ RegressionProblem problem = new RegressionProblem(x, t, kernels); if (RVMType.equals("Regression-RVM")) { RVMRegression RVM = new RVMRegression(problem, parameter); model = RVM.learn(); } else if (RVMType.equals("Constructive-Regression-RVM")) { ConstructiveRegression RVM = new ConstructiveRegression(problem, parameter); model = RVM.learn(); } else { throw new UserError(this, 207, new Object[] { RVMType, "rvm_type", "only one of the regression types can be used for the given regression problem" }); } } return new RVMModel(exampleSet, model); } /** * Create the appropriate kernel functions depending on the ui settings. */ public KernelBasisFunction[] createKernels(double[][] x, int numKernels) throws OperatorException { KernelBasisFunction[] kernels = new KernelBasisFunction[numKernels]; KernelBasisFunction kernel = null; double[] input; double lengthScale = getParameterAsDouble(PARAMETER_KERNEL_LENGTHSCALE); double bias = getParameterAsDouble(PARAMETER_KERNEL_BIAS); double degree = getParameterAsDouble(PARAMETER_KERNEL_DEGREE); double a = getParameterAsDouble(PARAMETER_KERNEL_A); double b = getParameterAsDouble(PARAMETER_KERNEL_B); double sigma1 = getParameterAsDouble(PARAMETER_KERNEL_SIGMA1); double sigma2 = getParameterAsDouble(PARAMETER_KERNEL_SIGMA2); double sigma3 = getParameterAsDouble(PARAMETER_KERNEL_SIGMA3); double shift = getParameterAsDouble(PARAMETER_KERNEL_SHIFT); for (int j = 0; j < numKernels - 1; j++) { input = x[j]; switch (getParameterAsInt(PARAMETER_KERNEL_TYPE)) { case 0: kernel = new KernelBasisFunction(new KernelRadial(lengthScale), input); break; case 1: kernel = new KernelBasisFunction(new KernelCauchy(lengthScale), input); break; case 2: kernel = new KernelBasisFunction(new KernelLaplace(lengthScale), input); break; case 3: kernel = new KernelBasisFunction(new KernelPoly(lengthScale, bias, degree), input); break; case 4: kernel = new KernelBasisFunction(new KernelSigmoid(a, b), input); break; case 5: kernel = new KernelBasisFunction(new KernelEpanechnikov(sigma1, degree), input); break; case 6: kernel = new KernelBasisFunction(new KernelGaussianCombination(sigma1, sigma2, sigma3), input); break; case 7: kernel = new KernelBasisFunction(new KernelMultiquadric(sigma1, shift), input); break; default: kernel = new KernelBasisFunction(new KernelRadial(lengthScale), input); } kernels[j + 1] = kernel; } return kernels; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type; type = new ParameterTypeCategory(PARAMETER_RVM_TYPE, "Regression RVM", RVM_TYPES, 0); type.setExpert(false); types.add(type); type = new ParameterTypeCategory(PARAMETER_KERNEL_TYPE, "The type of the kernel functions.", KERNEL_TYPES, 0); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_MAX_ITERATION, "The maximum number of iterations used.", 1, Integer.MAX_VALUE, 100); types.add(type); type = new ParameterTypeDouble(PARAMETER_MIN_DELTA_LOG_ALPHA, "Abort iteration if largest log alpha change is smaller than this", 0, Double.POSITIVE_INFINITY, 1e-3); types.add(type); type = new ParameterTypeDouble(PARAMETER_ALPHA_MAX, "Prune basis function if its alpha is bigger than this", 0, Double.POSITIVE_INFINITY, 1e12); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_LENGTHSCALE, "The lengthscale used in all kernels.", 0, Double.POSITIVE_INFINITY, 3.0); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_DEGREE, "The degree used in the poly kernel.", 0.0d, Double.POSITIVE_INFINITY, 2.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_BIAS, "The bias used in the poly kernel.", 0, Double.POSITIVE_INFINITY, 1.0); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA1, "The SVM kernel parameter sigma1 (Epanechnikov, Gaussian Combination, Multiquadric).", 0.0d, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA2, "The SVM kernel parameter sigma2 (Gaussian Combination).", 0.0d, Double.POSITIVE_INFINITY, 0.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SIGMA3, "The SVM kernel parameter sigma3 (Gaussian Combination).", 0.0d, Double.POSITIVE_INFINITY, 2.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_SHIFT, "The SVM kernel parameter shift (polynomial, Multiquadric).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_A, "The SVM kernel parameter a (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_KERNEL_B, "The SVM kernel parameter b (neural).", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d); type.setExpert(false); types.add(type); return types; } }