/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.construction; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.gui.dialog.StopDialog; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.operator.ports.metadata.SubprocessTransformRule; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.Tools; /** * This class is the superclass of all feature selection and generation * operators. It provides an easy to use plug-in interface for operators that * modify populations. Subclasses just have to supply lists of * <tt>PopulationOperators</tt> by overriding * <tt>getPreEvalutaionPopulationOperators()</tt> and * <tt>getPostEvalutaionPopulationOperators()</tt> during a loop which will * terminate if <tt>solutionGoodEnough()</tt> returns true. * * @author Ingo Mierswa * <br> */ public abstract class ExampleSetBasedFeatureOperator extends OperatorChain { public static final String PARAMETER_SHOW_STOP_DIALOG = "show_stop_dialog"; public static final String PARAMETER_MAXIMAL_FITNESS = "maximal_fitness"; private ExampleSetBasedPopulation population; /** The optimization stops if this maximal fitness was reached. */ private double maximalFitness = Double.POSITIVE_INFINITY; private boolean checkForMaximalFitness = true; private int evaluationCounter = 0; private int totalEvaluations = 0; private RandomGenerator random; private final InputPort exampleSetInput = getInputPorts().createPort("example set in", ExampleSet.class); private final OutputPort innerExampleSetSource = getSubprocess(0).getInnerSources().createPort("example set source"); private final InputPort innerPerformanceSink = getSubprocess(0).getInnerSinks().createPort("performance sink", PerformanceVector.class); private final OutputPort exampleSetOutput = getOutputPorts().createPort("example set out"); private final OutputPort attributeWeightsOutput = getOutputPorts().createPort("attribute weights out"); private final OutputPort performanceOutput = getOutputPorts().createPort("performance out"); public ExampleSetBasedFeatureOperator(OperatorDescription description) { super(description, "Evaluation Process"); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, innerExampleSetSource, SetRelation.SUBSET)); getTransformer().addRule(new SubprocessTransformRule(getSubprocess(0))); getTransformer().addPassThroughRule(innerPerformanceSink, performanceOutput); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.SUBSET)); getTransformer().addGenerationRule(attributeWeightsOutput, AttributeWeights.class); addValue(new ValueDouble("generation", "The number of the current generation.") { @Override public double getDoubleValue() { if (population == null) return 0; return population.getGeneration(); } }); addValue(new ValueDouble("performance", "The performance of the current generation (main criterion).") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; if (population.getCurrentBestPerformance() == null) return Double.NaN; PerformanceVector pv = population.getCurrentBestPerformance(); if (pv == null) return Double.NaN; return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("best", "The performance of the best individual ever (main criterion).") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) return Double.NaN; return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("average_length", "The average number of attributes.") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; else { double lengthSum = 0.0d; for (int i = 0; i < population.getNumberOfIndividuals(); i++) lengthSum += population.get(i).getExampleSet().getNumberOfUsedAttributes(); return lengthSum / population.getNumberOfIndividuals(); } } }); addValue(new ValueDouble("best_length", "The number of attributes of the best example set.") { @Override public double getDoubleValue() { if (population == null) return Double.NaN; ExampleSetBasedIndividual individual = population.getBestIndividualEver(); if (individual != null) { AttributeWeightedExampleSet eSet = individual.getExampleSet(); if (eSet != null) return eSet.getNumberOfUsedAttributes(); else return Double.NaN; } else { return Double.NaN; } } }); } /** * Create an initial population. The example set will be cloned before the * method is invoked. This method is invoked after the pre- and * post-evaluation population operators were collected. */ public abstract ExampleSetBasedPopulation createInitialPopulation(ExampleSet es) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are * applied to the population in their order within the list before the * population is evaluated. Since this method is invoked only once the list * cannot by dynamically changed during runtime. */ public abstract List<ExampleSetBasedPopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are * applied to the population in their order within the list after the * population is evaluated. Since this method is invoked only once the list * cannot by dynamically changed during runtime. */ public abstract List<ExampleSetBasedPopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Has to return true if the main loop can be stopped because a solution is * considered to be good enough according to some criterion. */ public abstract boolean solutionGoodEnough(ExampleSetBasedPopulation pop) throws OperatorException; protected RandomGenerator getRandom() { return random; } protected ExampleSetBasedPopulation getPopulation() { return population; } /** * Applies the feature operator: * <ol> * <li>collects the pre- and postevaluation operators * <li>create an initial population * <li>evaluate the initial population * <li>loop as long as solution is not good enough * <ol> * <li>apply all pre evaluation operators * <li>evaluate the population * <li>update the population's best individual * <li>apply all post evaluation operators * </ol> * <li>return all generation's best individual * </ol> */ @Override public void doWork() throws OperatorException { // init this.random = RandomGenerator.getRandomGenerator(this); this.evaluationCounter = 0; this.totalEvaluations = 0; this.maximalFitness = getParameterAsDouble(PARAMETER_MAXIMAL_FITNESS); ExampleSet es = exampleSetInput.getData(); if (es.getAttributes().size() == 0) { throw new UserError(this, 125, 0, 1); } List preOps = getPreEvaluationPopulationOperators(es); List postOps = getPostEvaluationPopulationOperators(es); // stop dialog boolean userDialogOk = true; StopDialog stopDialog = null; if (getParameterAsBoolean(PARAMETER_SHOW_STOP_DIALOG)) { stopDialog = new StopDialog("Stop Dialog", "<html>Press the stop button to abort the search for best feature space.<br>" + "The best individual found so far is returned.</html>"); stopDialog.setVisible(true); } // create initial population population = createInitialPopulation(es); log("Initial population has " + population.getNumberOfIndividuals() + " individuals."); evaluate(population); // optimization loop while (userDialogOk && !solutionGoodEnough(population) && !isMaximumReached()) { population.nextGeneration(); applyOpList(preOps, population); log(Tools.ordinalNumber(population.getGeneration()) + " generation has " + population.getNumberOfIndividuals() + " individuals."); log("Evaluating " + Tools.ordinalNumber(population.getGeneration()) + " population."); evaluate(population); population.updateEvaluation(); applyOpList(postOps, population); userDialogOk = stopDialog == null ? true : stopDialog.isStillRunning(); inApplyLoop(); } if (stopDialog != null) { stopDialog.setVisible(false); stopDialog.dispose(); } // optimization finished applyOpList(postOps, population); log("Optimization finished. " + evaluationCounter + " / " + totalEvaluations + " evaluations performed."); // create result example set ExampleSetBasedIndividual bestEver = population.getBestIndividualEver(); // create resulting weights AttributeWeightedExampleSet weightedResultSet = bestEver.getExampleSet(); for (Attribute attribute : weightedResultSet.getAttributes()) { if (Double.isNaN(weightedResultSet.getWeight(attribute))) weightedResultSet.setWeight(attribute, 1.0d); } AttributeWeights weights = weightedResultSet.getAttributeWeights(); Iterator<String> n = weights.getAttributeNames().iterator(); while (n.hasNext()) { String name = n.next(); if (weightedResultSet.getAttributes().get(name) == null) { weights.setWeight(name, 0.0d); } } // normalize weights weights.normalize(); exampleSetOutput.deliver(weightedResultSet.createCleanClone()); attributeWeightsOutput.deliver(weights); performanceOutput.deliver(bestEver.getPerformance()); } /** Applies all PopulationOperators in opList to the population. */ void applyOpList(List opList, ExampleSetBasedPopulation population) throws OperatorException { Iterator i = opList.listIterator(); while (i.hasNext()) { ExampleSetBasedPopulationOperator op = (ExampleSetBasedPopulationOperator) i.next(); if (op.performOperation(population.getGeneration())) { try { op.operate(population); for (int k = 0; k < population.getNumberOfIndividuals(); k++) { if (population.get(k).getExampleSet().getNumberOfUsedAttributes() <= 0) { getLogger().warning("Population operator " + op + " has produced an example set without attributes!"); } } } catch (Exception e) { throw new UserError(this, e, 108, e.toString()); } } } } /** * Evaluates all individuals in the population by applying the inner * operators. */ protected void evaluate(ExampleSetBasedPopulation population) throws OperatorException { for (int i = 0; i < population.getNumberOfIndividuals(); i++) { evaluate(population.get(i)); } } /** * Evaluates the given individual. The performance is set as user data of * the individual and also returned by this method. */ protected PerformanceVector evaluate(ExampleSetBasedIndividual individual) throws OperatorException { totalEvaluations++; if (individual.getPerformance() != null) { return individual.getPerformance(); } else { evaluationCounter++; AttributeWeightedExampleSet clone = individual.getExampleSet().createCleanClone(); innerExampleSetSource.deliver(clone); getSubprocess(0).execute(); PerformanceVector performanceVector = innerPerformanceSink.getData(); individual.setPerformance(performanceVector); return performanceVector; } } /** This method checks if the maximum was reached for the main criterion. */ private boolean isMaximumReached() { if (checkForMaximalFitness) { PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) { return false; } else { if (pv.getMainCriterion().getFitness() == Double.POSITIVE_INFINITY) return true; else if (pv.getMainCriterion().getMaxFitness() == pv.getMainCriterion().getFitness()) return true; else return pv.getMainCriterion().getFitness() >= maximalFitness; } } else { return false; } } /** * Sets if the operator should check if the maximum was reached for the main * criterion. Subclasses may want to set this to false, e.g. for * multiobjective optimization. */ protected void setCheckForMaximum(boolean checkForMaximalFitness) { this.checkForMaximalFitness = checkForMaximalFitness; } /** * Returns if the operator should check if the maximum was reached for the * main criterion. Subclasses may want to set this to false, e.g. for * multiobjective optimization. */ protected boolean getCheckForMaximum() { return this.checkForMaximalFitness; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.addAll(RandomGenerator.getRandomGeneratorParameters(this)); ParameterType type = new ParameterTypeBoolean(PARAMETER_SHOW_STOP_DIALOG, "Determines if a dialog with a button should be displayed which stops the run: the best individual is returned.", false); types.add(type); types.add(new ParameterTypeDouble(PARAMETER_MAXIMAL_FITNESS, "The optimization will stop if the fitness reaches the defined maximum.", 0.0d, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)); return types; } }