/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.aggregation; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Random; import com.rapidminer.datatable.SimpleDataTable; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.generator.AlgebraicOrGenerator; import com.rapidminer.generator.FeatureGenerator; import com.rapidminer.generator.MinMaxGenerator; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.condition.InnerOperatorCondition; import com.rapidminer.operator.condition.LastInnerOperatorCondition; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeFile; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.RandomGenerator; /** * Performs an evolutionary feature aggregation. Each base feature is only * allowed to be used as base feature, in one merged feature, or it may not be * used at all. * * @author Ingo Mierswa * @version $Id: EvolutionaryFeatureAggregation.java,v 1.6 2006/04/05 08:57:23 * ingomierswa Exp $ */ public class EvolutionaryFeatureAggregation extends OperatorChain { public static final String PARAMETER_POPULATION_CRITERIA_DATA_FILE = "population_criteria_data_file"; public static final String PARAMETER_AGGREGATION_FUNCTION = "aggregation_function"; public static final String PARAMETER_POPULATION_SIZE = "population_size"; public static final String PARAMETER_MAXIMUM_NUMBER_OF_GENERATIONS = "maximum_number_of_generations"; public static final String PARAMETER_SELECTION_TYPE = "selection_type"; public static final String PARAMETER_TOURNAMENT_FRACTION = "tournament_fraction"; public static final String PARAMETER_CROSSOVER_TYPE = "crossover_type"; public static final String PARAMETER_P_CROSSOVER = "p_crossover"; public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; /** The names for the selection types. */ private static final String[] SELECTION_TYPES = { "tournament", "non-dominated" }; /** Indicates tournament selection. */ private static final int SELECTION_TOURNAMENT = 0; /** Indicates NSGA-II selection. */ private static final int SELECTION_MO = 1; /** The names for the aggregation functions. */ private static final String[] AGGREGATION_FUNCTIONS = { "maximum", "algebraic_or" }; /** Indicates the maximum aggregation function. */ private static final int AGGREGATION_MAX = 0; /** Indicates the algebraic OR aggregation function. */ private static final int AGGREGATION_ALGEBRAIC = 1; /** The original attributes. */ private Attribute[] allAttributes; /** The used feature generator. */ private FeatureGenerator generator = new MinMaxGenerator(MinMaxGenerator.MAX); /** The current generation. */ private int generation = 0; /** The maximum generation. */ private int maxGeneration = 100; /** Creates a new evolutionary feature aggregation algorithm. */ public EvolutionaryFeatureAggregation(OperatorDescription description) { super(description); } // ================================================================================ public IOObject[] apply() throws OperatorException { // init ExampleSet exampleSet = getInput(ExampleSet.class); int popSize = getParameterAsInt(PARAMETER_POPULATION_SIZE); this.generation = 0; this.maxGeneration = getParameterAsInt(PARAMETER_MAXIMUM_NUMBER_OF_GENERATIONS); int functionType = getParameterAsInt(PARAMETER_AGGREGATION_FUNCTION); switch (functionType) { case AGGREGATION_MAX: this.generator = new MinMaxGenerator(MinMaxGenerator.MAX); break; case AGGREGATION_ALGEBRAIC: this.generator = new AlgebraicOrGenerator(); break; } RandomGenerator random = RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED)); this.allAttributes = new Attribute[exampleSet.getAttributes().size()]; int index = 0; for (Attribute attribute : exampleSet.getAttributes()) allAttributes[index++] = attribute; // plotter AggregationPopulationPlotter plotter = new AggregationPopulationPlotter(exampleSet, allAttributes, this.generator); // crossover AggregationCrossover crossover = new AggregationCrossover(getParameterAsInt(PARAMETER_CROSSOVER_TYPE), getParameterAsDouble(PARAMETER_P_CROSSOVER), random); // mutation AggregationMutation mutation = new AggregationMutation(random); // selection int selectionType = getParameterAsInt(PARAMETER_SELECTION_TYPE); AggregationSelection selection = null; switch (selectionType) { case SELECTION_TOURNAMENT: selection = new AggregationTournamentSelection(popSize, getParameterAsDouble(PARAMETER_TOURNAMENT_FRACTION), random); break; case SELECTION_MO: selection = new AggregationNonDominatedSortingSelection(popSize); break; } // initial population List<AggregationIndividual> population = createInitialPopulation(popSize, exampleSet.getAttributes().size(), random); // start optimization loop while (!solutionGoodEnough()) { generation++; crossover.crossover(population); mutation.mutate(population); evaluate(population, exampleSet); selection.performSelection(population); plotter.operate(population); inApplyLoop(); } // write criteria data of the final population into a file if (isParameterSet(PARAMETER_POPULATION_CRITERIA_DATA_FILE)) { File outFile = getParameterAsFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE); SimpleDataTable finalStatistics = plotter.createDataTable(population); plotter.fillDataTable(finalStatistics, population); PrintWriter out = null; try { out = new PrintWriter(new FileWriter(outFile)); finalStatistics.write(out); } catch (IOException e) { throw new UserError(this, e, 303, new Object[] { outFile, e.getMessage() }); } finally { if (out != null) { out.close(); } } } // return result evaluate(population, exampleSet); Iterator<AggregationIndividual> i = population.iterator(); AggregationIndividual bestEver = null; PerformanceVector bestPerformance = null; while (i.hasNext()) { AggregationIndividual current = i.next(); PerformanceVector currentPerf = current.getPerformance(); if ((bestPerformance == null) || (currentPerf.compareTo(bestPerformance) > 0)) { bestPerformance = currentPerf; bestEver = current; } } return new IOObject[] { bestEver.createExampleSet(exampleSet, allAttributes, generator), bestPerformance }; } // ================================================================================ private List<AggregationIndividual> createInitialPopulation(int popSize, int individualSize, Random random) { List<AggregationIndividual> population = new ArrayList<AggregationIndividual>(); for (int i = 0; i < popSize; i++) { int[] individual = new int[individualSize]; for (int a = 0; a < individual.length; a++) { if (random.nextBoolean()) { individual[a] = 0; } else { individual[a] = -1; } } population.add(new AggregationIndividual(individual)); } return population; } /** Returns true if the maximum number of generations was reached. */ private boolean solutionGoodEnough() { if (generation > maxGeneration) return true; else return false; } /** * Creates example sets from all individuals and invoke the inner operators * in order to estimate the performance. */ public void evaluate(List population, ExampleSet originalExampleSet) throws OperatorException { Iterator i = population.iterator(); while (i.hasNext()) { AggregationIndividual individual = (AggregationIndividual) i.next(); if (individual.getPerformance() == null) { ExampleSet exampleSet = individual.createExampleSet(originalExampleSet, allAttributes, generator); if (exampleSet.getAttributes().size() == 0) { i.remove(); } else { IOObject[] operatorChainInput = new IOObject[] { exampleSet }; IOContainer innerResult = getInput().prepend(operatorChainInput); for (int j = 0; j < getNumberOfOperators(); j++) { innerResult = getOperator(j).apply(innerResult); } PerformanceVector performanceVector = innerResult.remove(PerformanceVector.class); individual.setPerformance(performanceVector); } } } } // ================================================================================ public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class, PerformanceVector.class }; } public int getMinNumberOfInnerOperators() { return 1; } public int getMaxNumberOfInnerOperators() { return Integer.MAX_VALUE; } public InnerOperatorCondition getInnerOperatorCondition() { return new LastInnerOperatorCondition(new Class[] { ExampleSet.class }, new Class[] { PerformanceVector.class }); } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE, "The path to the file in which the criteria data of the final population should be saved.", "crit", true)); ParameterType type = new ParameterTypeCategory(PARAMETER_AGGREGATION_FUNCTION, "The aggregation function which is used for feature aggregations.", AGGREGATION_FUNCTIONS, AGGREGATION_MAX); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_POPULATION_SIZE, "Number of individuals per generation.", 1, Integer.MAX_VALUE, 10); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_MAXIMUM_NUMBER_OF_GENERATIONS, "Number of generations after which to terminate the algorithm.", 1, Integer.MAX_VALUE, 100); type.setExpert(false); types.add(type); type = new ParameterTypeCategory(PARAMETER_SELECTION_TYPE, "The type of selection.", SELECTION_TYPES, SELECTION_TOURNAMENT); type.setExpert(false); types.add(type); types.add(new ParameterTypeDouble(PARAMETER_TOURNAMENT_FRACTION, "The fraction of the population which will participate in each tournament.", 0.0d, 1.0d, 0.2d)); types.add(new ParameterTypeCategory(PARAMETER_CROSSOVER_TYPE, "The type of crossover.", AggregationCrossover.CROSSOVER_TYPES, AggregationCrossover.CROSSOVER_UNIFORM)); types.add(new ParameterTypeDouble(PARAMETER_P_CROSSOVER, "Probability for an individual to be selected for crossover.", 0.0d, 1.0d, 0.9d)); types.add(new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global).", -1, Integer.MAX_VALUE, -1)); return types; } }