/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.construction; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.features.PopulationOperator; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.ParameterTypeString; /** * DirectedGGA is an acronym for a Generating Genetic Algorithm which uses * probability directed search heuristics to select attributes for generation or * removing. Its approach to generating new attributes differs from the original * one and is the same as the one of {@link YAGGA}. <br/> * * The (generating) mutation can do one of the following things with different * probabilities: * <ul> * <li>Probability {@rapidminer.math p/4}: Add a newly generated attribute to the * feature vector</li> * <li>Probability {@rapidminer.math p/4}: Add a randomly chosen original attribute * to the feature vector</li> * <li>Probability {@rapidminer.math p/2}: Remove a randomly chosen attribute from * the feature vector</li> * </ul> * Thus it is guaranteed that the length of the feature vector can both grow and * shrink. On average it will keep its original length, unless longer or shorter * individuals prove to have a better fitness.<br/> * * In addition to these mutation heuristics probablilities based on the weights * of the attributes are calculated. It is more likely for attributes with a * great weight to be selected for generating new attributes. On the other hand * the probability for removing an attribute from the example set will decrease * for attributes with great weights. This decreases the amount of needed * generations drastically. <br/> * * Another enhancement in comparison to the original GGA is the addition of * several generators like the ones for trigonometric or exponential functions. * In this way a sinple linear working learning scheme which can deliver weights * can be used as inner operator. If this learner can also estimate its * performance it is not longer necessary to use a inner cross-validation which * also decreases learning time. Such a learner is for example the * {@link com.rapidminer.operator.learner.functions.kernel.JMySVMLearner} which delivers * the xi-alpha performance estimation at least for classification tasks. <br/>. * * Summarized the advantages of this feature construction algorithm are smaller * runtimes and smaller attribute sets as result. These attribute sets increase * performance and can be used to explain the models of more complex learning * schemes like SVMs. The additional generators allow the construction of * features which are not possible by the known kernel functions. <br/> * * Since this operator does not contain algorithms to extract features from * value series, it is restricted to example sets with only single attributes. * For (automatic) feature extraction from values series the value series plugin * for RapidMiner written by Ingo Mierswa should be used. It is available at * <code>http://rapid-i.com</code>. * * @author Ingo Mierswa * @version $Id: DirectedGGA.java,v 1.5 2008/05/09 19:22:54 ingomierswa Exp $ */ public class DirectedGGA extends YAGGA2 { /** The parameter name for "The maximum number of generated attributes per generation." */ public static final String PARAMETER_MAX_GENERATED = "max_generated"; /** The parameter name for "The maximum number of original attributes added per generation." */ public static final String PARAMETER_MAX_ORIGINAL = "max_original"; public DirectedGGA(OperatorDescription description) { super(description); } /** Returns the {@link DirectedGeneratingMutation}. */ protected PopulationOperator getMutationPopulationOperator(ExampleSet eSet) throws OperatorException { List generators = getGenerators(); if (generators.size() == 0) logWarning("No FeatureGenerators specified for " + getName() + "."); Attribute[] attributes = eSet.getAttributes().createRegularAttributeArray(); return new DirectedGeneratingMutation(attributes, getParameterAsDouble(PARAMETER_P_MUTATION), generators, getParameterAsInt(PARAMETER_MAX_GENERATED), getParameterAsInt(PARAMETER_MAX_ORIGINAL), getParameterAsInt(PARAMETER_MAX_CONSTRUCTION_DEPTH), getParameterAsString(PARAMETER_UNUSED_FUNCTIONS).split(" "), getRandom()); } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeInt(PARAMETER_MAX_GENERATED, "The maximum number of generated attributes per generation.", 1, Integer.MAX_VALUE, 2)); types.add(new ParameterTypeInt(PARAMETER_MAX_ORIGINAL, "The maximum number of original attributes added per generation.", 1, Integer.MAX_VALUE, 2)); types.add(new ParameterTypeInt(PARAMETER_MAX_CONSTRUCTION_DEPTH, "The maximum depth for the argument attributes used for attribute construction (-a: allow all depths).", -1, Integer.MAX_VALUE, -1)); types.add(new ParameterTypeString(PARAMETER_UNUSED_FUNCTIONS, "Space separated list of functions which are not allowed in arguments for attribute construction.")); return types; } }