/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.features.construction;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.generator.FeatureGenerator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
/**
* DirectedGGA is an acronym for a Generating Genetic Algorithm which uses probability directed
* search heuristics to select attributes for generation or removing. Its approach to generating new
* attributes differs from the original one and is the same as the one of {@link YAGGA}. <br/>
*
* The (generating) mutation can do one of the following things with different probabilities:
* <ul>
* <li>Probability {@rapidminer.math p/4}: Add a newly generated attribute to the feature vector
* </li>
* <li>Probability {@rapidminer.math p/4}: Add a randomly chosen original attribute to the feature
* vector</li>
* <li>Probability {@rapidminer.math p/2}: Remove a randomly chosen attribute from the feature
* vector</li>
* </ul>
* Thus it is guaranteed that the length of the feature vector can both grow and shrink. On average
* it will keep its original length, unless longer or shorter individuals prove to have a better
* fitness.<br/>
*
* In addition to these mutation heuristics probablilities based on the weights of the attributes
* are calculated. It is more likely for attributes with a great weight to be selected for
* generating new attributes. On the other hand the probability for removing an attribute from the
* example set will decrease for attributes with great weights. This decreases the amount of needed
* generations drastically. <br/>
*
* Another enhancement in comparison to the original GGA is the addition of several generators like
* the ones for trigonometric or exponential functions. In this way a sinple linear working learning
* scheme which can deliver weights can be used as inner operator. If this learner can also estimate
* its performance it is not longer necessary to use a inner cross-validation which also decreases
* learning time. Such a learner is for example the
* {@link com.rapidminer.operator.learner.functions.kernel.JMySVMLearner} which delivers the
* xi-alpha performance estimation at least for classification tasks. <br/>
* .
*
* Summarized the advantages of this feature construction algorithm are smaller runtimes and smaller
* attribute sets as result. These attribute sets increase performance and can be used to explain
* the models of more complex learning schemes like SVMs. The additional generators allow the
* construction of features which are not possible by the known kernel functions. <br/>
*
* Since this operator does not contain algorithms to extract features from value series, it is
* restricted to example sets with only single attributes. For (automatic) feature extraction from
* values series the value series plugin for RapidMiner written by Ingo Mierswa should be used. It
* is available at <code>http://rapidminer.com</code>.
*
* @author Ingo Mierswa
*/
public class DirectedGGA extends YAGGA2 {
/**
* The parameter name for "The maximum number of generated attributes per generation."
*/
public static final String PARAMETER_MAX_GENERATED = "max_generated";
/**
* The parameter name for "The maximum number of original attributes added per
* generation."
*/
public static final String PARAMETER_MAX_ORIGINAL = "max_original";
public DirectedGGA(OperatorDescription description) {
super(description);
}
/** Returns the {@link DirectedGeneratingMutation}. */
@Override
protected ExampleSetBasedPopulationOperator getMutationPopulationOperator(ExampleSet eSet) throws OperatorException {
List<FeatureGenerator> generators = getGenerators();
if (generators.size() == 0) {
logWarning("No FeatureGenerators specified for " + getName() + ".");
}
Attribute[] attributes = eSet.getAttributes().createRegularAttributeArray();
return new DirectedGeneratingMutation(attributes, getParameterAsDouble(PARAMETER_P_MUTATION), generators,
getParameterAsInt(PARAMETER_MAX_GENERATED), getParameterAsInt(PARAMETER_MAX_ORIGINAL),
getParameterAsString(PARAMETER_UNUSED_FUNCTIONS).split(" "), getRandom());
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeInt(PARAMETER_MAX_GENERATED, "The maximum number of generated attributes per generation.",
1, Integer.MAX_VALUE, 2));
types.add(new ParameterTypeInt(PARAMETER_MAX_ORIGINAL,
"The maximum number of original attributes added per generation.", 1, Integer.MAX_VALUE, 2));
types.add(new ParameterTypeString(PARAMETER_UNUSED_FUNCTIONS,
"Space separated list of functions which are not allowed in arguments for attribute construction."));
return types;
}
}