/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.HashMap; import java.util.Iterator; import java.util.List; import com.rapidminer.datatable.SimpleDataTable; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.gui.dialog.IndividualSelector; import com.rapidminer.gui.dialog.StopDialog; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.condition.InnerOperatorCondition; import com.rapidminer.operator.condition.LastInnerOperatorCondition; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeFile; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.Tools; /** * This class is the superclass of all feature selection and generation * operators. It provides an easy to use plug-in interface for operators that * modify populations. Subclasses just have to supply lists of * <tt>PopulationOperators</tt> by overriding * <tt>getPreEvalutaionPopulationOperators()</tt> and * <tt>getPostEvalutaionPopulationOperators()</tt> during a loop which will * terminate if <tt>solutionGoodEnough()</tt> returns true. * * @author Simon Fischer, Ingo Mierswa * @version $Id: FeatureOperator.java,v 1.16 2008/07/07 07:06:46 ingomierswa Exp $ * <br> */ public abstract class FeatureOperator extends OperatorChain { public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; public static final String PARAMETER_SHOW_STOP_DIALOG = "show_stop_dialog"; public static final String PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION = "user_result_individual_selection"; public static final String PARAMETER_SHOW_POPULATION_PLOTTER = "show_population_plotter"; public static final String PARAMETER_PLOT_GENERATIONS = "plot_generations"; public static final String PARAMETER_CONSTRAINT_DRAW_RANGE = "constraint_draw_range"; public static final String PARAMETER_DRAW_DOMINATED_POINTS = "draw_dominated_points"; public static final String PARAMETER_POPULATION_CRITERIA_DATA_FILE = "population_criteria_data_file"; public static final String PARAMETER_MAXIMAL_FITNESS = "maximal_fitness"; private static final Class[] OUTPUT_CLASSES = { ExampleSet.class, AttributeWeights.class, PerformanceVector.class }; private static final Class[] INPUT_CLASSES = { ExampleSet.class }; private Population population; /** The optimization stops if this maximal fitness was reached. */ private double maximalFitness = Double.POSITIVE_INFINITY; private boolean checkForMaximalFitness = true; private int evaluationCounter = 0; private int totalEvaluations = 0; private RandomGenerator random; public FeatureOperator(OperatorDescription description) { super(description); addValue(new ValueDouble("generation", "The number of the current generation.") { public double getDoubleValue() { if (population == null) return 0; return population.getGeneration(); } }); addValue(new ValueDouble("performance", "The performance of the current generation (main criterion).") { public double getDoubleValue() { if (population == null) return Double.NaN; if (population.getCurrentBestPerformance() == null) return Double.NaN; PerformanceVector pv = population.getCurrentBestPerformance(); if (pv == null) return Double.NaN; return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("best", "The performance of the best individual ever (main criterion).") { public double getDoubleValue() { if (population == null) return Double.NaN; PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) return Double.NaN; return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("average_length", "The average number of attributes.") { public double getDoubleValue() { if (population == null) return Double.NaN; else { double lengthSum = 0.0d; for (int i = 0; i < population.getNumberOfIndividuals(); i++) lengthSum += population.get(i).getExampleSet().getNumberOfUsedAttributes(); return lengthSum / population.getNumberOfIndividuals(); } } }); addValue(new ValueDouble("best_length", "The number of attributes of the best example set.") { public double getDoubleValue() { if (population == null) return Double.NaN; Individual individual = population.getBestIndividualEver(); if (individual != null) { AttributeWeightedExampleSet eSet = individual.getExampleSet(); if (eSet != null) return eSet.getNumberOfUsedAttributes(); else return Double.NaN; } else { return Double.NaN; } } }); } /** * Create an initial population. The example set will be cloned before the * method is invoked. This method is invoked after the pre- and * postevaluation population operators were collected. */ public abstract Population createInitialPopulation(ExampleSet es) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are * applied to the population in their order within the list before the * population is evaluated. Since this methode is invoked only once the list * cannot by dynamically changed during runtime. */ public abstract List<PopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are * applied to the population in their order within the list after the * population is evaluated. Since this methode is invoked only once the list * cannot by dynamically changed during runtime. */ public abstract List<PopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Has to return true if the main loop can be stopped because a solution is * concidered to be good enough according to some criterion. */ public abstract boolean solutionGoodEnough(Population pop) throws OperatorException; public Class<?>[] getOutputClasses() { return OUTPUT_CLASSES; } public Class<?>[] getInputClasses() { return INPUT_CLASSES; } public InnerOperatorCondition getInnerOperatorCondition() { return new LastInnerOperatorCondition(new Class[] { ExampleSet.class}, new Class[] { PerformanceVector.class }); } protected RandomGenerator getRandom() { return random; } protected Population getPopulation() { return population; } /** * Applies the feature operator: * <ol> * <li>collects the pre- and postevaluation operators * <li>create an initial population * <li>evaluate the initial population * <li>loop as long as solution is not good enough * <ol> * <li>apply all pre evaluation operators * <li>evaluate the population * <li>update the population's best individual * <li>apply all post evaluation operators * </ol> * <li>return all generation's best individual * </ol> */ public IOObject[] apply() throws OperatorException { // init this.random = RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED)); this.evaluationCounter = 0; this.totalEvaluations = 0; this.maximalFitness = getParameterAsDouble(PARAMETER_MAXIMAL_FITNESS); ExampleSet es = getInput(ExampleSet.class); if (es.getAttributes().size() == 0) { throw new UserError(this, 125, 0, 1); } List preOps = getPreEvaluationPopulationOperators(es); List postOps = getPostEvaluationPopulationOperators(es); // stop dialog boolean userDialogOk = true; StopDialog stopDialog = null; if (getParameterAsBoolean(PARAMETER_SHOW_STOP_DIALOG)) { stopDialog = new StopDialog("Stop Dialog", "<html>Press the stop button to abort the search for best feature space.<br>" + "The best individual found so far is returned.</html>"); stopDialog.setVisible(true); } // create initial population population = createInitialPopulation(es); log("Initial population has " + population.getNumberOfIndividuals() + " individuals."); evaluate(population); // population plotter PopulationPlotter popPlotter = null; population.updateEvaluation(); if (getParameterAsBoolean(PARAMETER_SHOW_POPULATION_PLOTTER)) { popPlotter = new PopulationPlotter(getParameterAsInt(PARAMETER_PLOT_GENERATIONS), getParameterAsBoolean(PARAMETER_CONSTRAINT_DRAW_RANGE), getParameterAsBoolean(PARAMETER_DRAW_DOMINATED_POINTS)); popPlotter.operate(population); } inApplyLoop(); // optimization loop while (userDialogOk && !solutionGoodEnough(population) && !isMaximumReached()) { population.nextGeneration(); applyOpList(preOps, population); log(Tools.ordinalNumber(population.getGeneration()) + " generation has " + population.getNumberOfIndividuals() + " individuals."); log("Evaluating " + Tools.ordinalNumber(population.getGeneration()) + " population."); evaluate(population); population.updateEvaluation(); applyOpList(postOps, population); if (popPlotter != null) { popPlotter.operate(population); } userDialogOk = stopDialog == null ? true : stopDialog.isStillRunning(); inApplyLoop(); } if (stopDialog != null) { stopDialog.setVisible(false); stopDialog.dispose(); } // optimization finished applyOpList(postOps, population); log("Optimization finished. " + evaluationCounter + " / " + totalEvaluations + " evaluations performed."); // write criteria data of the final population into a file if (isParameterSet(PARAMETER_POPULATION_CRITERIA_DATA_FILE)) { SimpleDataTable finalStatistics = PopulationPlotter.createDataTable(population); PopulationPlotter.fillDataTable(finalStatistics, new HashMap<String, ExampleSet>(), population, getParameterAsBoolean(PARAMETER_DRAW_DOMINATED_POINTS)); File outFile = getParameterAsFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE); PrintWriter out = null; try { out = new PrintWriter(new FileWriter(outFile)); finalStatistics.write(out); } catch (IOException e) { throw new UserError(this, e, 303, new Object[] { outFile, e.getMessage() }); } finally { if (out != null) { out.close(); } } } // create result example set Individual bestEver = null; if (getParameterAsBoolean(PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION)) { IndividualSelector selector = new IndividualSelector(population); selector.setVisible(true); bestEver = selector.getSelectedIndividual(); if (bestEver == null) logWarning("No individual selected. Using individual with highest fitness for main criterion..."); } if (bestEver == null) { bestEver = population.getBestIndividualEver(); } // create resulting weights AttributeWeightedExampleSet weightedResultSet = bestEver.getExampleSet(); for (Attribute attribute : weightedResultSet.getAttributes()) { if (Double.isNaN(weightedResultSet.getWeight(attribute))) weightedResultSet.setWeight(attribute, 1.0d); } AttributeWeights weights = weightedResultSet.getAttributeWeights(); Iterator<String> n = weights.getAttributeNames().iterator(); while (n.hasNext()) { String name = n.next(); if (weightedResultSet.getAttributes().get(name) == null) { weights.setWeight(name, 0.0d); } } // normalize weights weights.normalize(); return new IOObject[] { weightedResultSet.createCleanClone(), weights, bestEver.getPerformance() }; } /** Applies all PopulationOperators in opList to the population. */ void applyOpList(List opList, Population population) throws OperatorException { Iterator i = opList.listIterator(); while (i.hasNext()) { PopulationOperator op = (PopulationOperator) i.next(); if (op.performOperation(population.getGeneration())) { try { op.operate(population); for (int k = 0; k < population.getNumberOfIndividuals(); k++) { if (population.get(k).getExampleSet().getNumberOfUsedAttributes() <= 0) { logError("Population operator " + op + " has produced an example set without attributes!"); } } } catch (Exception e) { throw new UserError(this, e, 108, e.toString()); } } } } /** * Evaluates all individuals in the population by applying the inner * operators. */ protected void evaluate(Population population) throws OperatorException { for (int i = 0; i < population.getNumberOfIndividuals(); i++) { evaluate(population.get(i)); } } /** * Evaluates the given individual. The performance is set as user data of * the individual and also returned by this method. */ protected PerformanceVector evaluate(Individual individual) throws OperatorException { totalEvaluations++; if (individual.getPerformance() != null) { return individual.getPerformance(); } else { evaluationCounter++; AttributeWeightedExampleSet clone = individual.getExampleSet().createCleanClone(); IOObject[] operatorChainInput = new IOObject[] { clone }; IOContainer innerResult = getInput().prepend(operatorChainInput); for (int i = 0; i < getNumberOfOperators(); i++) { innerResult = getOperator(i).apply(innerResult); } PerformanceVector performanceVector = innerResult.remove(PerformanceVector.class); individual.setPerformance(performanceVector); return performanceVector; } } /** This method checks if the maximum was reached for the main criterion. */ private boolean isMaximumReached() { if (checkForMaximalFitness) { PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) { return false; } else { if (pv.getMainCriterion().getFitness() == Double.POSITIVE_INFINITY) return true; else if (pv.getMainCriterion().getMaxFitness() == pv.getMainCriterion().getFitness()) return true; else return pv.getMainCriterion().getFitness() >= maximalFitness; } } else { return false; } } /** * Sets if the operator should check if the maximum was reached for the main * criterion. Subclasses may want to set this to false, e.g. for * multiobjective optimization. */ protected void setCheckForMaximum(boolean checkForMaximalFitness) { this.checkForMaximalFitness = checkForMaximalFitness; } /** * Returns if the operator should check if the maximum was reached for the * main criterion. Subclasses may want to set this to false, e.g. for * multiobjective optimization. */ protected boolean getCheckForMaximum() { return this.checkForMaximalFitness; } /** * Returns the highest possible value for the maximum number of innner * operators. */ public int getMaxNumberOfInnerOperators() { return Integer.MAX_VALUE; } /** Returns 0 for the minimum number of innner operators. */ public int getMinNumberOfInnerOperators() { return 1; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global).", -1, Integer.MAX_VALUE, -1)); ParameterType type = new ParameterTypeBoolean(PARAMETER_SHOW_STOP_DIALOG, "Determines if a dialog with a button should be displayed which stops the run: the best individual is returned.", false); type.setExpert(false); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION, "Determines if the user wants to select the final result individual from the last population.", false)); types.add(new ParameterTypeBoolean(PARAMETER_SHOW_POPULATION_PLOTTER, "Determines if the current population should be displayed in performance space.", false)); types.add(new ParameterTypeInt(PARAMETER_PLOT_GENERATIONS, "Update the population plotter in these generations.", 1, Integer.MAX_VALUE, 10)); types.add(new ParameterTypeBoolean(PARAMETER_CONSTRAINT_DRAW_RANGE, "Determines if the draw range of the population plotter should be constrained between 0 and 1.", false)); types.add(new ParameterTypeBoolean(PARAMETER_DRAW_DOMINATED_POINTS, "Determines if only points which are not Pareto dominated should be painted.", true)); types.add(new ParameterTypeFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE, "The path to the file in which the criteria data of the final population should be saved.", "cri", true)); types.add(new ParameterTypeDouble(PARAMETER_MAXIMAL_FITNESS, "The optimization will stop if the fitness reaches the defined maximum.", 0.0d, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)); return types; } }