/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.HashMap; import java.util.Iterator; import java.util.List; import com.rapidminer.datatable.SimpleDataTable; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.gui.dialog.IndividualSelector; import com.rapidminer.gui.dialog.StopDialog; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.ValueString; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.PortPairExtender; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule; import com.rapidminer.operator.ports.metadata.GenerateNewMDRule; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.ports.metadata.PassThroughRule; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.operator.ports.metadata.SimplePrecondition; import com.rapidminer.operator.ports.metadata.SubprocessTransformRule; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeFile; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.conditions.BooleanParameterCondition; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.Tools; /** * This class is the superclass of all feature selection and generation * operators. It provides an easy to use plug-in interface for operators that * modify populations. Subclasses just have to supply lists of * <tt>PopulationOperators</tt> by overriding * <tt>getPreEvalutaionPopulationOperators()</tt> and * <tt>getPostEvalutaionPopulationOperators()</tt> during a loop which will * terminate if <tt>solutionGoodEnough()</tt> returns true. * * @author Simon Fischer, Ingo Mierswa * <br> */ public abstract class FeatureOperator extends OperatorChain { public static final String PARAMETER_NORMALIZE_WEIGHTS = "normalize_weights"; public static final String PARAMETER_SHOW_STOP_DIALOG = "show_stop_dialog"; public static final String PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION = "user_result_individual_selection"; public static final String PARAMETER_SHOW_POPULATION_PLOTTER = "show_population_plotter"; public static final String PARAMETER_PLOT_GENERATIONS = "plot_generations"; public static final String PARAMETER_CONSTRAINT_DRAW_RANGE = "constraint_draw_range"; public static final String PARAMETER_DRAW_DOMINATED_POINTS = "draw_dominated_points"; public static final String PARAMETER_POPULATION_CRITERIA_DATA_FILE = "population_criteria_data_file"; public static final String PARAMETER_MAXIMAL_FITNESS = "maximal_fitness"; private final InputPort exampleSetInput = getInputPorts().createPort("example set in"); private final OutputPort exampleSetOutput = getOutputPorts().createPort("example set out"); private final OutputPort attributeWeightsOutput = getOutputPorts().createPort("weights"); private final OutputPort performanceOutput = getOutputPorts().createPort("performance"); private final OutputPort subprocessExampleOutput = getSubprocess(0).getInnerSources().createPort("example set"); private final InputPort subprocessPerformanceInput = getSubprocess(0).getInnerSinks().createPort("performance"); private final PortPairExtender throughExtender = new PortPairExtender("through", getInputPorts(), getSubprocess(0).getInnerSources()); private ExampleSet exampleSet; private Population population; private PopulationEvaluator populationEvaluator; /** The optimization stops if this maximal fitness was reached. */ private double maximalFitness = Double.POSITIVE_INFINITY; private boolean checkForMaximalFitness = true; private RandomGenerator random; public FeatureOperator(OperatorDescription description) { super(description, "Evaluation Process"); throughExtender.start(); exampleSetInput.addPrecondition(new SimplePrecondition(exampleSetInput, new ExampleSetMetaData())); subprocessPerformanceInput.addPrecondition(new SimplePrecondition(subprocessPerformanceInput, new MetaData(PerformanceVector.class))); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, subprocessExampleOutput, SetRelation.EQUAL) { @Override public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) { return modifyInnerOutputExampleSet(metaData); } }); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.EQUAL) { @Override public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) { return modifyOutputExampleSet(metaData); } }); getTransformer().addRule(throughExtender.makePassThroughRule()); getTransformer().addRule(new SubprocessTransformRule(getSubprocess(0))); getTransformer().addRule(new GenerateNewMDRule(attributeWeightsOutput, new MetaData(AttributeWeights.class))); getTransformer().addRule(new PassThroughRule(subprocessPerformanceInput, performanceOutput, false)); addValue(new ValueDouble("generation", "The number of the current generation.") { @Override public double getDoubleValue() { if (population == null) return 0; return population.getGeneration(); } }); addValue(new ValueDouble("performance", "The performance of the current generation (main criterion).") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; if (population.getCurrentBestPerformance() == null) return Double.NaN; PerformanceVector pv = population.getCurrentBestPerformance(); if (pv == null) return Double.NaN; return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("best", "The performance of the best individual ever (main criterion).") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) return Double.NaN; return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("average_length", "The average number of attributes.") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; else { double lengthSum = 0.0d; for (int i = 0; i < population.getNumberOfIndividuals(); i++) lengthSum += population.get(i).getNumberOfUsedAttributes(); return lengthSum / population.getNumberOfIndividuals(); } } }); addValue(new ValueDouble("best_length", "The number of attributes of the best example set.") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; Individual individual = population.getBestIndividualEver(); if (individual != null) { return individual.getNumberOfUsedAttributes(); } else { return Double.NaN; } } }); addValue(new ValueString("feature_names", "The names of the features of the best individual so far.") { @Override public String getStringValue() { if (population == null) return "?"; Individual individual = population.getBestIndividualEver(); if (individual != null) { double[] weights = individual.getWeights(); String[] names = com.rapidminer.example.Tools.getRegularAttributeNames(exampleSet); StringBuffer result = new StringBuffer(); boolean first = true; for (int i = 0; i < weights.length; i++) { if (weights[i] > 0) { if (!first) { result.append(", "); } result.append(names[i]); first = false; } } return result.toString(); } else { return "?"; } } }); } /** * Subclasses might override this method in order to change the meta data delivered * to the inner operators. */ protected ExampleSetMetaData modifyInnerOutputExampleSet(ExampleSetMetaData metaData) { return metaData; } /** * Subclasses might override this method in order to change the final outputed meta data */ protected ExampleSetMetaData modifyOutputExampleSet(ExampleSetMetaData metaData) { return metaData; } /** * Create an initial population. The example set will be cloned before the * method is invoked. This method is invoked after the pre- and * postevaluation population operators were collected. */ public abstract Population createInitialPopulation(ExampleSet es) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are * applied to the population in their order within the list before the * population is evaluated. Since this methode is invoked only once the list * cannot by dynamically changed during runtime. */ public abstract List<PopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are * applied to the population in their order within the list after the * population is evaluated. Since this methode is invoked only once the list * cannot by dynamically changed during runtime. */ public abstract List<PopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Has to return true if the main loop can be stopped because a solution is * considered to be good enough according to some criterion. */ public abstract boolean solutionGoodEnough(Population pop) throws OperatorException; protected RandomGenerator getRandom() { return random; } protected Population getPopulation() { return population; } /** * Applies the feature operator: * <ol> * <li>collects the pre- and postevaluation operators * <li>create an initial population * <li>evaluate the initial population * <li>loop as long as solution is not good enough * <ol> * <li>apply all pre evaluation operators * <li>evaluate the population * <li>update the population's best individual * <li>apply all post evaluation operators * </ol> * <li>return all generation's best individual * </ol> */ @Override public void doWork() throws OperatorException { // init this.random = RandomGenerator.getRandomGenerator(this); this.maximalFitness = getParameterAsDouble(PARAMETER_MAXIMAL_FITNESS); ExampleSet es = exampleSetInput.getData(ExampleSet.class); // creating population evaluator this.populationEvaluator = getPopulationEvaluator(es); if (es.getAttributes().size() == 0) { throw new UserError(this, 125, 0, 1); } this.exampleSet = es; List preOps = getPreEvaluationPopulationOperators(es); List postOps = getPostEvaluationPopulationOperators(es); // stop dialog boolean userDialogOk = true; StopDialog stopDialog = null; if (getParameterAsBoolean(PARAMETER_SHOW_STOP_DIALOG)) { stopDialog = new StopDialog("Stop Dialog", "<html>Press the stop button to abort the search for best feature space.<br>" + "The best individual found so far is returned.</html>"); stopDialog.setVisible(true); } // create initial population population = createInitialPopulation(es); log("Initial population has " + population.getNumberOfIndividuals() + " individuals."); evaluate(population, exampleSet); // population plotter PopulationPlotter popPlotter = null; // Check must be made to ensure inner operators did not exite prematurely leaving null performance checkForStop(); population.updateEvaluation(); if (getParameterAsBoolean(PARAMETER_SHOW_POPULATION_PLOTTER)) { popPlotter = new PopulationPlotter(exampleSet, getParameterAsInt(PARAMETER_PLOT_GENERATIONS), getParameterAsBoolean(PARAMETER_CONSTRAINT_DRAW_RANGE), getParameterAsBoolean(PARAMETER_DRAW_DOMINATED_POINTS)); popPlotter.operate(population); } inApplyLoop(); // optimization loop while (userDialogOk && !solutionGoodEnough(population) && !isMaximumReached()) { population.nextGeneration(); applyOpList(preOps, population); log(Tools.ordinalNumber(population.getGeneration()) + " generation has " + population.getNumberOfIndividuals() + " individuals."); log("Evaluating " + Tools.ordinalNumber(population.getGeneration()) + " population."); evaluate(population, exampleSet); checkForStop(); population.updateEvaluation(); applyOpList(postOps, population); if (popPlotter != null) { popPlotter.operate(population); } userDialogOk = stopDialog == null ? true : stopDialog.isStillRunning(); inApplyLoop(); } if (stopDialog != null) { stopDialog.setVisible(false); stopDialog.dispose(); } // optimization finished: Check must be made to ensure inner operators did not exite prematurely leaving null performance checkForStop(); applyOpList(postOps, population); // write criteria data of the final population into a file if (isParameterSet(PARAMETER_POPULATION_CRITERIA_DATA_FILE)) { SimpleDataTable finalStatistics = PopulationPlotter.createDataTable(population); PopulationPlotter.fillDataTable(finalStatistics, new HashMap<String, double[]>(), population, getParameterAsBoolean(PARAMETER_DRAW_DOMINATED_POINTS)); File outFile = getParameterAsFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE, true); PrintWriter out = null; try { out = new PrintWriter(new FileWriter(outFile)); finalStatistics.write(out); } catch (IOException e) { throw new UserError(this, e, 303, new Object[] { outFile, e.getMessage() }); } finally { if (out != null) { out.close(); } } } // create result example set Individual bestEver = null; if (getParameterAsBoolean(PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION)) { IndividualSelector selector = new IndividualSelector(exampleSet, population); selector.setVisible(true); bestEver = selector.getSelectedIndividual(); if (bestEver == null) logWarning("No individual selected. Using individual with highest fitness for main criterion..."); } if (bestEver == null) { bestEver = population.getBestIndividualEver(); } // create resulting weights double[] weights = bestEver.getWeights(); int a = 0; AttributeWeights attributeWeights = new AttributeWeights(); for (Attribute attribute : exampleSet.getAttributes()) { double weight = weights[a]; if (Double.isNaN(weight)) weight = 1.0d; attributeWeights.setWeight(attribute.getName(), weight); a++; } // normalize weights if (getParameterAsBoolean(PARAMETER_NORMALIZE_WEIGHTS)) attributeWeights.normalize(); // clean up exampleSetOutput.deliver(createCleanClone(exampleSet, weights)); attributeWeightsOutput.deliver(attributeWeights); performanceOutput.deliver(bestEver.getPerformance()); this.population.clear(); this.population = null; this.exampleSet = null; } public static ExampleSet createCleanClone(ExampleSet exampleSet, double[] weights) { AttributeWeightedExampleSet clone = new AttributeWeightedExampleSet(exampleSet, null); int a = 0; for (Attribute attribute : clone.getAttributes()) { clone.setWeight(attribute, weights[a++]); } return clone.createCleanClone(); } /** Applies all PopulationOperators in opList to the population. */ void applyOpList(List opList, Population population) throws OperatorException { Iterator i = opList.listIterator(); while (i.hasNext()) { PopulationOperator op = (PopulationOperator) i.next(); if (op.performOperation(population.getGeneration())) { try { op.operate(population); for (int k = 0; k < population.getNumberOfIndividuals(); k++) { if (population.get(k).getNumberOfUsedAttributes() <= 0) { logError("Population operator " + op + " has produced an example set without attributes!"); } } } catch (Exception e) { throw new UserError(this, e, 108, e.toString()); } } } } /** * Evaluates all individuals in the population by applying the inner * operators. */ private void evaluate(Population population, ExampleSet originalExampleSet) throws OperatorException { populationEvaluator.evaluate(population); } /** * This method gives access to the subprocess for evaluating an example set * @param exampleSet a weighted exampleSet * @return * @throws OperatorException */ public final PerformanceVector executeEvaluationProcess(ExampleSet exampleSet) throws OperatorException { subprocessExampleOutput.deliver(exampleSet); throughExtender.passDataThrough(); runEvaluationProcess(); return subprocessPerformanceInput.getData(); } protected void runEvaluationProcess() throws OperatorException { getSubprocess(0).execute(); } /** This method checks if the maximum was reached for the main criterion. */ private boolean isMaximumReached() { if (checkForMaximalFitness) { PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) { return false; } else { if (pv.getMainCriterion().getFitness() == Double.POSITIVE_INFINITY) return true; else if (pv.getMainCriterion().getMaxFitness() == pv.getMainCriterion().getFitness()) return true; else return pv.getMainCriterion().getFitness() >= maximalFitness; } } else { return false; } } /** * Sets if the operator should check if the maximum was reached for the main * criterion. Subclasses may want to set this to false, e.g. for * multiobjective optimization. */ protected void setCheckForMaximum(boolean checkForMaximalFitness) { this.checkForMaximalFitness = checkForMaximalFitness; } /** * Returns if the operator should check if the maximum was reached for the * main criterion. Subclasses may want to set this to false, e.g. for * multiobjective optimization. */ protected boolean getCheckForMaximum() { return this.checkForMaximalFitness; } public InputPort getExampleSetInput() { return exampleSetInput; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeBoolean(PARAMETER_NORMALIZE_WEIGHTS, "Indicates if the final weights should be normalized.", true)); types.addAll(RandomGenerator.getRandomGeneratorParameters(this)); ParameterType type = new ParameterTypeBoolean(PARAMETER_SHOW_STOP_DIALOG, "Determines if a dialog with a button should be displayed which stops the run: the best individual is returned.", false); type.setExpert(false); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_USER_RESULT_INDIVIDUAL_SELECTION, "Determines if the user wants to select the final result individual from the last population.", false)); // generation plot parameters types.add(new ParameterTypeBoolean(PARAMETER_SHOW_POPULATION_PLOTTER, "Determines if the current population should be displayed in performance space.", false)); type = new ParameterTypeInt(PARAMETER_PLOT_GENERATIONS, "Update the population plotter in these generations.", 1, Integer.MAX_VALUE, 10); type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SHOW_POPULATION_PLOTTER, false, true)); types.add(type); type = new ParameterTypeBoolean(PARAMETER_CONSTRAINT_DRAW_RANGE, "Determines if the draw range of the population plotter should be constrained between 0 and 1.", false); type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SHOW_POPULATION_PLOTTER, false, true)); types.add(type); type = new ParameterTypeBoolean(PARAMETER_DRAW_DOMINATED_POINTS, "Determines if only points which are not Pareto dominated should be painted.", true); type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SHOW_POPULATION_PLOTTER, false, true)); types.add(type); types.add(new ParameterTypeFile(PARAMETER_POPULATION_CRITERIA_DATA_FILE, "The path to the file in which the criteria data of the final population should be saved.", "cri", true)); types.add(new ParameterTypeDouble(PARAMETER_MAXIMAL_FITNESS, "The optimization will stop if the fitness reaches the defined maximum.", 0.0d, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)); return types; } protected PopulationEvaluator getPopulationEvaluator(ExampleSet exampleSet) throws OperatorException { return new SimplePopulationEvaluator(this, exampleSet); } }