/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.construction; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.generator.BasicArithmeticOperationGenerator; import com.rapidminer.generator.FeatureGenerator; import com.rapidminer.generator.GenerationException; import com.rapidminer.generator.MinMaxGenerator; import com.rapidminer.generator.ReciprocalValueGenerator; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.features.Individual; import com.rapidminer.operator.features.KeepBest; import com.rapidminer.operator.features.Population; import com.rapidminer.operator.features.PopulationOperator; import com.rapidminer.operator.features.RedundanceRemoval; import com.rapidminer.operator.features.selection.FeatureSelectionOperator; import com.rapidminer.operator.features.selection.SwitchingForwardSelection; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.tools.Tools; /** * This operator is a kind of nested forward selection and thus is (in contrast * to a genetic algorithm) a directed search. * <ol> * <li>use forward selection in order to determine the best attributes</li> * <li>Create a new attribute by multiplying any of the original attributes * with any of the attributes selected by the forward selection in the last turn</li> * <li>loop as long as performance increases</li> * </ol> * * @author Simon Fischer, Ingo Mierswa * @version $Id: GeneratingForwardSelection.java,v 1.1 2006/04/14 11:42:27 * ingomierswa Exp $ */ public class GeneratingForwardSelection extends FeatureSelectionOperator { /** The parameter name for "Generate reciprocal values." */ public static final String PARAMETER_RECIPROCAL_VALUE = "reciprocal_value"; /** The parameter name for "Generate sums." */ public static final String PARAMETER_USE_PLUS = "use_plus"; /** The parameter name for "Generate differences." */ public static final String PARAMETER_USE_DIFF = "use_diff"; /** The parameter name for "Generate products." */ public static final String PARAMETER_USE_MULT = "use_mult"; /** The parameter name for "Generate quotients." */ public static final String PARAMETER_USE_DIV = "use_div"; /** The parameter name for "Generate maximum." */ public static final String PARAMETER_USE_MAX = "use_max"; /** The parameter name for "Use restrictive generator selection (faster)." */ public static final String PARAMETER_RESTRICTIVE_SELECTION = "restrictive_selection"; /** List of AttributeReferences. */ private Attribute[] originalAttributes; private Individual bestIndividual; private List<FeatureGenerator> useGenerators; private int newAttributeStart; private int turn; public GeneratingForwardSelection(OperatorDescription description) { super(description); addValue(new ValueDouble("turn", "The number of the current turn.") { public double getDoubleValue() { return turn; } }); } public IOObject[] apply() throws OperatorException { newAttributeStart = 0; turn = 0; bestIndividual = null; originalAttributes = null; useGenerators = new LinkedList<FeatureGenerator>(); if (getParameterAsBoolean(PARAMETER_RECIPROCAL_VALUE)) { FeatureGenerator g = new ReciprocalValueGenerator(); useGenerators.add(g); } if (getParameterAsBoolean(PARAMETER_USE_PLUS)) { FeatureGenerator g = new BasicArithmeticOperationGenerator(BasicArithmeticOperationGenerator.SUM); useGenerators.add(g); } if (getParameterAsBoolean(PARAMETER_USE_DIFF)) { FeatureGenerator g = new BasicArithmeticOperationGenerator(BasicArithmeticOperationGenerator.DIFFERENCE); useGenerators.add(g); } if (getParameterAsBoolean(PARAMETER_USE_MULT)) { FeatureGenerator g = new BasicArithmeticOperationGenerator(BasicArithmeticOperationGenerator.PRODUCT); useGenerators.add(g); } if (getParameterAsBoolean(PARAMETER_USE_DIV)) { FeatureGenerator g = new BasicArithmeticOperationGenerator(BasicArithmeticOperationGenerator.QUOTIENT); useGenerators.add(g); } if (getParameterAsBoolean(PARAMETER_USE_MAX)) { FeatureGenerator g = new MinMaxGenerator(MinMaxGenerator.MAX); useGenerators.add(g); } if (useGenerators.size() == 0) { logWarning("No FeatureGenerators specified for " + getName() + "."); } if (getParameterAsBoolean(PARAMETER_RESTRICTIVE_SELECTION)) FeatureGenerator.setSelectionMode(FeatureGenerator.SELECTION_MODE_RESTRICTIVE); else FeatureGenerator.setSelectionMode(FeatureGenerator.SELECTION_MODE_ALL); return super.apply(); } /** * May <tt>es</tt> have <i>n</i> features. The initial population * contains * <li><i>n</i> elements with exactly 1 feature switched on. */ public Population createInitialPopulation(ExampleSet es) { // remember the original attributes originalAttributes = es.getAttributes().createRegularAttributeArray(); Population initP = new Population(); AttributeWeightedExampleSet nes = new AttributeWeightedExampleSet((ExampleSet) es.clone()); for (Attribute attribute : es.getAttributes()) nes.setAttributeUsed(attribute, false); for (Attribute attribute : es.getAttributes()) { AttributeWeightedExampleSet forwardES = (AttributeWeightedExampleSet) nes.clone(); forwardES.setAttributeUsed(attribute, true); initP.add(new Individual(forwardES)); } return initP; } /** * The operators performs two steps: * <ol> * <li>forward selection/backward elimination * <li>kick out all but the <tt>keep_best</tt> individuals * <li>remove redundant individuals * </ol> */ public List<PopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException { List<PopulationOperator> preOp = new LinkedList<PopulationOperator>(); preOp.add(new KeepBest(getParameterAsInt(PARAMETER_KEEP_BEST))); preOp.add(new SwitchingForwardSelection()); preOp.add(new RedundanceRemoval()); return preOp; } public boolean solutionGoodEnough(Population pop) throws OperatorException { if (super.solutionGoodEnough(pop)) { if (pop.getNumberOfIndividuals() <= 0) { return true; } // The forward selection is finished Individual fsBest = pop.getBestIndividualEver(); AttributeWeightedExampleSet fsBestExampleSet = (AttributeWeightedExampleSet) fsBest.getExampleSet().clone(); // Check whether the performance was improved by this turn if ((bestIndividual == null) || (bestIndividual.getPerformance() == null) || ((fsBest.getPerformance().compareTo(bestIndividual.getPerformance()) > 0))) { turn++; bestIndividual = new Individual((AttributeWeightedExampleSet) fsBestExampleSet.clone()); fsBestExampleSet = new AttributeWeightedExampleSet(fsBestExampleSet.createCleanClone()); Attribute[] fsBestAttributes = fsBestExampleSet.getAttributes().createRegularAttributeArray(); log(Tools.ordinalNumber(turn) + " turn's FS result: " + fsBest); // and generate all new attributes using the generators List<FeatureGenerator> generators = new LinkedList<FeatureGenerator>(); Iterator<FeatureGenerator> i = useGenerators.listIterator(); // for all generator types while (i.hasNext()) { FeatureGenerator fg = i.next(); // for all new arguments if (fg.getInputAttributes().length == 2) { for (int a = newAttributeStart; a < fsBestExampleSet.getAttributes().size(); a++) { // for all original attributes for (int o = 0; o < originalAttributes.length; o++) { FeatureGenerator g = fg.newInstance(); g.setArguments(new Attribute[] { originalAttributes[o], fsBestAttributes[a] }); generators.add(g); } } } else if (fg.getInputAttributes().length == 1) { for (int a = 0; a < fsBestAttributes.length; a++) { FeatureGenerator g = fg.newInstance(); g.setArguments(new Attribute[] { fsBestAttributes[a] }); generators.add(g); } } else { logWarning("Functions with arity " + fg.getInputAttributes().length + " not supported: " + fg); } } log("Generating " + generators.size() + " new attributes."); newAttributeStart = fsBestExampleSet.getAttributes().size(); // generate the new attributes try { List<Attribute> attributes = FeatureGenerator.generateAll(fsBestExampleSet.getExampleTable(), generators); Iterator<Attribute> j = attributes.iterator(); while (j.hasNext()) { Attribute attr = j.next(); try { fsBestExampleSet.getAttributes().addRegular(attr); fsBestExampleSet.setAttributeUsed(attr, false); } catch (Exception e) { logWarning(e.getMessage()); } } } catch (GenerationException e) { throw new UserError(this, e, 108, e.getMessage()); } // clear the population, add the generated set pop.clear(); pop.add(new Individual(fsBestExampleSet)); return false; } else { // otherwise quit return true; } } // go on with the forward selection return false; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeBoolean(PARAMETER_RECIPROCAL_VALUE, "Generate reciprocal values.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_PLUS, "Generate sums.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_DIFF, "Generate differences.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_MULT, "Generate products.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_DIV, "Generate quotients.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_MAX, "Generate maximum.", true)); types.add(new ParameterTypeBoolean(PARAMETER_RESTRICTIVE_SELECTION, "Use restrictive generator selection (faster).", true)); return types; } }