/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.selection; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.features.Individual; import com.rapidminer.operator.features.Population; import com.rapidminer.operator.features.PopulationOperator; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.UndefinedParameterError; /** * A genetic algorithm for feature selection (mutation=switch features on and * off, crossover=interchange used features). Selection is done by roulette * wheel. Genetic algorithms are general purpose optimization / search * algorithms that are suitable in case of no or little problem knowledge. <br/> * * A genetic algorithm works as follows * <ol> * <li>Generate an initial population consisting of * <code>population_size</code> individuals. Each attribute is switched on * with probability <code>p_initialize</code></li> * <li>For all individuals in the population * <ul> * <li>Perform mutation, i.e. set used attributes to unused with probability * <code>p_mutation</code> and vice versa.</li> * <li>Choose two individuals from the population and perform crossover with * probability <code>p_crossover</code>. The type of crossover can be * selected by <code>crossover_type</code>.</li> * </ul> * </li> * <li>Perform selection, map all individuals to sections on a roulette wheel * whose size is proportional to the individual's fitness and draw * <code>population_size</code> individuals at random according to their * probability.</li> * <li>As long as the fitness improves, go to 2</li> * </ol> * * If the example set contains value series attributes with blocknumbers, the * whole block will be switched on and off. * * @author Ingo Mierswa, Simon Fischer * @version $Id: GeneticAlgorithm.java,v 1.7 2008/05/09 19:23:18 ingomierswa Exp $ */ public class GeneticAlgorithm extends AbstractGeneticAlgorithm { /** The parameter name for "Initial probability for an attribute to be switched on." */ public static final String PARAMETER_P_INITIALIZE = "p_initialize"; /** The parameter name for "Probability for an attribute to be changed (-1: 1 / numberOfAtt)." */ public static final String PARAMETER_P_MUTATION = "p_mutation"; /** The parameter name for "Probability for an individual to be selected for crossover." */ public static final String PARAMETER_P_CROSSOVER = "p_crossover"; /** The parameter name for "Type of the crossover." */ public static final String PARAMETER_CROSSOVER_TYPE = "crossover_type"; public static final String PARAMETER_MAX_NUMBER_OF_ATTRIBUTES = "max_number_of_attributes"; public static final String PARAMETER_MIN_NUMBER_OF_ATTRIBUTES = "min_number_of_attributes"; public static final String PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES = "exact_number_of_attributes"; public GeneticAlgorithm(OperatorDescription description) { super(description); } /** * Sets up a population of given size and creates ExampleSets with randomly * selected attributes (the probability to be switched on is controlled by * pInitialize). */ public Population createInitialPopulation(ExampleSet es) throws OperatorException { int minNumber = getParameterAsInt(PARAMETER_MIN_NUMBER_OF_ATTRIBUTES); int maxNumber = getParameterAsInt(PARAMETER_MAX_NUMBER_OF_ATTRIBUTES); int exactNumber = getParameterAsInt(PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES); if (exactNumber > 0) { logNote("Using exact number of features for feature selection (" + exactNumber + "), ignoring possibly defined range for the number of features."); } else { if ((maxNumber>0) && (minNumber > maxNumber)) { throw new UserError(this, 210, PARAMETER_MAX_NUMBER_OF_ATTRIBUTES, PARAMETER_MIN_NUMBER_OF_ATTRIBUTES); } } Population initP = new Population(); if (exactNumber > 0) { // exact feature number while (initP.getNumberOfIndividuals() < getParameterAsInt(PARAMETER_POPULATION_SIZE)) { AttributeWeightedExampleSet nes = new AttributeWeightedExampleSet(es); double prob = 1.0d / nes.getAttributes().size() * (double)exactNumber; for (Attribute attribute : nes.getAttributes()) { if (getRandom().nextDouble() < prob) { nes.setAttributeUsed(attribute, true); } else { nes.setAttributeUsed(attribute, false); } } // add result with exact number of features int numberOfFeatures = nes.getNumberOfUsedAttributes(); if (exactNumber == numberOfFeatures) initP.add(new Individual(nes)); } } else { // within range while (initP.getNumberOfIndividuals() < getParameterAsInt(PARAMETER_POPULATION_SIZE)) { AttributeWeightedExampleSet nes = new AttributeWeightedExampleSet(es); for (Attribute attribute : nes.getAttributes()) { if (getRandom().nextDouble() < (1.0d - getParameterAsDouble(PARAMETER_P_INITIALIZE))) nes.flipAttributeUsed(attribute); } int numberOfFeatures = nes.getNumberOfUsedAttributes(); if (((maxNumber < 1) || (numberOfFeatures <= maxNumber)) && (numberOfFeatures >= minNumber)) { initP.add(new Individual(nes)); } } } return initP; } /** * Returns an operator that performs the mutation. Can be overridden by * subclasses. */ protected PopulationOperator getMutationPopulationOperator(ExampleSet eSet) throws UndefinedParameterError { double pMutation = getParameterAsDouble(PARAMETER_P_MUTATION); int minNumber = getParameterAsInt(PARAMETER_MIN_NUMBER_OF_ATTRIBUTES); int maxNumber = getParameterAsInt(PARAMETER_MAX_NUMBER_OF_ATTRIBUTES); int exactNumber = getParameterAsInt(PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES); return new SelectionMutation(pMutation, getRandom(), minNumber, maxNumber, exactNumber); } /** * Returns an operator that performs crossover. Can be overridden by * subclasses. */ protected PopulationOperator getCrossoverPopulationOperator(ExampleSet eSet) throws UndefinedParameterError { double pCrossover = getParameterAsDouble(PARAMETER_P_CROSSOVER); int crossoverType = getParameterAsInt(PARAMETER_CROSSOVER_TYPE); int minNumber = getParameterAsInt(PARAMETER_MIN_NUMBER_OF_ATTRIBUTES); int maxNumber = getParameterAsInt(PARAMETER_MAX_NUMBER_OF_ATTRIBUTES); int exactNumber = getParameterAsInt(PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES); return new SelectionCrossover(crossoverType, pCrossover, getRandom(), minNumber, maxNumber, exactNumber); } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeInt(PARAMETER_MIN_NUMBER_OF_ATTRIBUTES, "Determines the minimum number of features used for the combinations.", 1, Integer.MAX_VALUE, 1); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_MAX_NUMBER_OF_ATTRIBUTES, "Determines the maximum number of features used for the combinations (-1: try all combinations up to possible maximum)", -1, Integer.MAX_VALUE, -1); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES, "Determines the exact number of features used for the combinations (-1: use the feature range defined by min and max).", -1, Integer.MAX_VALUE, -1); type.setExpert(false); types.add(type); types.add(new ParameterTypeDouble(PARAMETER_P_INITIALIZE, "Initial probability for an attribute to be switched on.", 0, 1, 0.5)); type = new ParameterTypeDouble(PARAMETER_P_MUTATION, "Probability for an attribute to be changed (-1: 1 / numberOfAtt).", -1.0d, 1.0d, -1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_P_CROSSOVER, "Probability for an individual to be selected for crossover.", 0.0d, 1.0d, 0.5d); type.setExpert(false); types.add(type); types.add(new ParameterTypeCategory(PARAMETER_CROSSOVER_TYPE, "Type of the crossover.", SelectionCrossover.CROSSOVER_TYPES, SelectionCrossover.UNIFORM)); return types; } }