EvoSVM.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel.evosvm;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.learner.functions.kernel.AbstractKernelBasedLearner;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.math.kernels.Kernel;
import com.rapidminer.tools.math.optimization.ec.es.ESOptimization;

import java.util.List;


/**
 * <p>
 * This is a SVM implementation using an evolutionary algorithm (ES) to solve the dual optimization
 * problem of a SVM. It turns out that on many datasets this simple implementation is as fast and
 * accurate as the usual SVM implementations. In addition, it is also capable of learning with
 * Kernels which are not positive semi-definite and can also be used for multi-objective learning
 * which makes the selection of C unecessary before learning.
 * </p>
 * 
 * <p>
 * Mierswa, Ingo. Evolutionary Learning with Kernels: A Generic Solution for Large Margin Problems.
 * In Proc. of the Genetic and Evolutionary Computation Conference (GECCO 2006), 2006.
 * </p>
 * 
 * @rapidminer.index SVM
 * 
 * @author Ingo Mierswa
 */
public class EvoSVM extends AbstractKernelBasedLearner {

	/**
	 * The parameter name for "The SVM complexity constant (0: calculates probably good
	 * value)."
	 */
	public static final String PARAMETER_C = "C";

	/**
	 * The parameter name for "The width of the regression tube loss function of the regression
	 * SVM"
	 */
	public static final String PARAMETER_EPSILON = "epsilon";

	/** The parameter name for "The type of start population initialization." */
	public static final String PARAMETER_START_POPULATION_TYPE = "start_population_type";

	/** The parameter name for "Stop after this many evaluations" */
	public static final String PARAMETER_MAX_GENERATIONS = "max_generations";

	/**
	 * The parameter name for "Stop after this number of generations without improvement (-1:
	 * optimize until max_iterations)."
	 */
	public static final String PARAMETER_GENERATIONS_WITHOUT_IMPROVAL = "generations_without_improval";

	/** The parameter name for "The population size (-1: number of examples)" */
	public static final String PARAMETER_POPULATION_SIZE = "population_size";

	/**
	 * The parameter name for "The fraction of the population used for tournament
	 * selection."
	 */
	public static final String PARAMETER_TOURNAMENT_FRACTION = "tournament_fraction";

	/**
	 * The parameter name for "Indicates if the best individual should survive (elititst
	 * selection)."
	 */
	public static final String PARAMETER_KEEP_BEST = "keep_best";

	/** The parameter name for "The type of the mutation operator." */
	public static final String PARAMETER_MUTATION_TYPE = "mutation_type";

	/** The parameter name for "The type of the selection operator." */
	public static final String PARAMETER_SELECTION_TYPE = "selection_type";

	/** The parameter name for "The probability for crossovers." */
	public static final String PARAMETER_CROSSOVER_PROB = "crossover_prob";

	/**
	 * The parameter name for "Uses this amount as a hold out set to estimate generalization
	 * error after learning (currently only used for multi-objective classification)."
	 */
	public static final String PARAMETER_HOLD_OUT_SET_RATIO = "hold_out_set_ratio";

	/**
	 * The parameter name for "Indicates if a dialog with a convergence plot should be
	 * drawn."
	 */
	public static final String PARAMETER_SHOW_CONVERGENCE_PLOT = "show_convergence_plot";

	public static final String PARAMETER_SHOW_POPULATION_PLOT = "show_population_plot";

	/**
	 * The parameter name for "Indicates if final optimization fitness should be returned as
	 * performance."
	 */
	public static final String PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE = "return_optimization_performance";

	/** The optimization procedure. */
	private EvoOptimization optimization;

	/**
	 * Creates a new SVM which uses an Evolutionary Strategy approach for optimization.
	 */
	public EvoSVM(OperatorDescription description) {
		super(description);
	}

	/** Returns the value of the corresponding parameter. */
	@Override
	public boolean shouldDeliverOptimizationPerformance() {
		return getParameterAsBoolean(PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE);
	}

	/**
	 * Returns the optimization performance of the best result. This method must be called after
	 * training, not before.
	 */
	@Override
	public PerformanceVector getOptimizationPerformance() {
		return optimization.getOptimizationPerformance();
	}

	/** Learns and returns a model. */
	@Override
	public Model learn(ExampleSet exampleSet) throws OperatorException {
		// if (exampleSet.getLabel().getNumberOfValues() != 2) {
		// throw new UserError(this, 114, getName(), exampleSet.getLabel().getName());
		// }

		// kernel
		Kernel kernel = Kernel.createKernel(this);

		RandomGenerator random = RandomGenerator.getRandomGenerator(this);

		// optimization
		Attribute label = exampleSet.getAttributes().getLabel();
		if (label.isNominal()) {
			if (label.getMapping().size() == 2) {
				ExampleSet holdOutSet = null;
				ExampleSet trainingSet = exampleSet;
				double holdOutSetRatio = getParameterAsDouble(PARAMETER_HOLD_OUT_SET_RATIO);
				if (!Tools.isZero(holdOutSetRatio)) {
					SplittedExampleSet splittedExampleSet = new SplittedExampleSet(exampleSet, new double[] {
							1.0d - holdOutSetRatio, holdOutSetRatio }, SplittedExampleSet.SHUFFLED_SAMPLING,
							getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED),
							getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED));
					splittedExampleSet.selectSingleSubset(0);
					trainingSet = splittedExampleSet.clone();
					splittedExampleSet.selectAllSubsetsBut(0);
					holdOutSet = splittedExampleSet.clone();
				}
				optimization = new ClassificationEvoOptimization(trainingSet, kernel, getParameterAsDouble(PARAMETER_C),
						getParameterAsInt(PARAMETER_START_POPULATION_TYPE), getParameterAsInt(PARAMETER_MAX_GENERATIONS),
						getParameterAsInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL),
						getParameterAsInt(PARAMETER_POPULATION_SIZE), getParameterAsInt(PARAMETER_SELECTION_TYPE),
						getParameterAsDouble(PARAMETER_TOURNAMENT_FRACTION), getParameterAsBoolean(PARAMETER_KEEP_BEST),
						getParameterAsInt(PARAMETER_MUTATION_TYPE), getParameterAsDouble(PARAMETER_CROSSOVER_PROB),
						getParameterAsBoolean(PARAMETER_SHOW_CONVERGENCE_PLOT),
						getParameterAsBoolean(PARAMETER_SHOW_POPULATION_PLOT), holdOutSet, random, this, this);
			} else {
				throw new UserError(this, 114, getName(), label.getName());
			}
		} else {
			optimization = new RegressionEvoOptimization(exampleSet, kernel, getParameterAsDouble(PARAMETER_C),
					getParameterAsDouble(PARAMETER_EPSILON), getParameterAsInt(PARAMETER_START_POPULATION_TYPE),
					getParameterAsInt(PARAMETER_MAX_GENERATIONS), getParameterAsInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL),
					getParameterAsInt(PARAMETER_POPULATION_SIZE), getParameterAsInt(PARAMETER_SELECTION_TYPE),
					getParameterAsDouble(PARAMETER_TOURNAMENT_FRACTION), getParameterAsBoolean(PARAMETER_KEEP_BEST),
					getParameterAsInt(PARAMETER_MUTATION_TYPE), getParameterAsDouble(PARAMETER_CROSSOVER_PROB),
					getParameterAsBoolean(PARAMETER_SHOW_CONVERGENCE_PLOT),
					getParameterAsBoolean(PARAMETER_SHOW_POPULATION_PLOT), random, this, this);
		}
		return optimization.train();
	}

	/**
	 * Returns true for numerical attributes, binominal classes, and numerical target attributes.
	 */
	@Override
	public boolean supportsCapability(OperatorCapability lc) {
		if (lc == OperatorCapability.NUMERICAL_ATTRIBUTES) {
			return true;
		}
		if (lc == OperatorCapability.BINOMINAL_LABEL) {
			return true;
		}
		if (lc == OperatorCapability.NUMERICAL_LABEL) {
			return true;
		}
		if (lc == OperatorCapability.WEIGHTED_EXAMPLES) {
			return true;
		}
		if (lc == OperatorCapability.FORMULA_PROVIDER) {
			return true;
		}
		return false;
	}

	public static double[] createBoundArray(double bound, int size) {
		double[] result = new double[size];
		for (int i = 0; i < result.length; i++) {
			result[i] = bound;
		}
		return result;
	}

	public static final double[] determineMax(double _c, Kernel kernel, ExampleSet exampleSet, int selectionType,
			int arraySize) {
		double[] max = new double[arraySize];

		// init the kernel !
		kernel.init(exampleSet);

		double globalC = 1000;
		if (selectionType != ESOptimization.NON_DOMINATED_SORTING_SELECTION) {
			if (_c <= 0.0d) {
				double c = 0.0d;
				for (int i = 0; i < exampleSet.size(); i++) {
					c += kernel.getDistance(i, i);
				}
				globalC = exampleSet.size() / c;
				exampleSet.getLog().log("Determine probably good value for C: set to " + c);
			} else {
				globalC = _c;
			}
		}

		for (int i = 0; i < max.length; i++) {
			max[i] = globalC;
		}

		// apply weights
		Attribute weightAttribute = exampleSet.getAttributes().getWeight();
		if (weightAttribute != null) {
			int counter = 0;
			for (Example e : exampleSet) {
				max[counter++] *= e.getValue(weightAttribute);
			}
		}

		return max;
	}

	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();
		// adding kernel parameters
		types.addAll(Kernel.getParameters(this));
		// adding SVM parameters
		ParameterType type = new ParameterTypeDouble(PARAMETER_C,
				"The SVM complexity constant (0: calculates probably good value).", 0.0d, Double.POSITIVE_INFINITY, 0.0d);
		type.setExpert(false);
		types.add(type);
		type = new ParameterTypeDouble(PARAMETER_EPSILON,
				"The width of the regression tube loss function of the regression SVM", 0.0d, Double.POSITIVE_INFINITY, 0.1d);
		type.setExpert(false);
		types.add(type);
		types.add(new ParameterTypeCategory(PARAMETER_START_POPULATION_TYPE, "The type of start population initialization.",
				ESOptimization.POPULATION_INIT_TYPES, ESOptimization.INIT_TYPE_RANDOM));
		types.add(new ParameterTypeInt(PARAMETER_MAX_GENERATIONS, "Stop after this many evaluations", 1, Integer.MAX_VALUE,
				10000));
		types.add(new ParameterTypeInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL,
				"Stop after this number of generations without improvement (-1: optimize until max_iterations).", -1,
				Integer.MAX_VALUE, 30));
		types.add(new ParameterTypeInt(PARAMETER_POPULATION_SIZE, "The population size (-1: number of examples)", -1,
				Integer.MAX_VALUE, 1));
		types.add(new ParameterTypeDouble(PARAMETER_TOURNAMENT_FRACTION,
				"The fraction of the population used for tournament selection.", 0.0d, Double.POSITIVE_INFINITY, 0.75d));
		types.add(new ParameterTypeBoolean(PARAMETER_KEEP_BEST,
				"Indicates if the best individual should survive (elititst selection).", true));
		types.add(new ParameterTypeCategory(PARAMETER_MUTATION_TYPE, "The type of the mutation operator.",
				ESOptimization.MUTATION_TYPES, ESOptimization.GAUSSIAN_MUTATION));
		types.add(new ParameterTypeCategory(PARAMETER_SELECTION_TYPE, "The type of the selection operator.",
				ESOptimization.SELECTION_TYPES, ESOptimization.TOURNAMENT_SELECTION));
		types.add(new ParameterTypeDouble(PARAMETER_CROSSOVER_PROB, "The probability for crossovers.", 0.0d, 1.0d, 1.0d));

		types.addAll(RandomGenerator.getRandomGeneratorParameters(this));

		types.add(new ParameterTypeDouble(
				PARAMETER_HOLD_OUT_SET_RATIO,
				"Uses this amount as a hold out set to estimate generalization error after learning (currently only used for multi-objective classification).",
				0.0d, 1.0d, 0.0d));
		types.add(new ParameterTypeBoolean(PARAMETER_SHOW_CONVERGENCE_PLOT,
				"Indicates if a dialog with a convergence plot should be drawn.", false));
		types.add(new ParameterTypeBoolean(PARAMETER_SHOW_POPULATION_PLOT,
				"Indicates if the population plot in case of the non-dominated sorting should be shown.", false));
		types.add(new ParameterTypeBoolean(PARAMETER_RETURN_OPTIMIZATION_PERFORMANCE,
				"Indicates if final optimization fitness should be returned as performance.", false));
		return types;
	}

	@Override
	public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
		return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getExampleSetInputPort(), EvoSVM.class,
				null);
	}
}