/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.construction; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ProcessStoppedException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.operator.ports.metadata.SubprocessTransformRule; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.Tools; /** * This class is the superclass of all feature selection and generation operators. It provides an * easy to use plug-in interface for operators that modify populations. Subclasses just have to * supply lists of <tt>PopulationOperators</tt> by overriding * <tt>getPreEvalutaionPopulationOperators()</tt> and * <tt>getPostEvalutaionPopulationOperators()</tt> during a loop which will terminate if * <tt>solutionGoodEnough()</tt> returns true. * * @author Ingo Mierswa <br> */ public abstract class ExampleSetBasedFeatureOperator extends OperatorChain { public static final String PARAMETER_MAXIMAL_FITNESS = "maximal_fitness"; private ExampleSetBasedPopulation population; /** The optimization stops if this maximal fitness was reached. */ private double maximalFitness = Double.POSITIVE_INFINITY; private boolean checkForMaximalFitness = true; private int evaluationCounter = 0; private int totalEvaluations = 0; private RandomGenerator random; private final InputPort exampleSetInput = getInputPorts().createPort("example set in", ExampleSet.class); private final OutputPort innerExampleSetSource = getSubprocess(0).getInnerSources().createPort("example set source"); private final InputPort innerPerformanceSink = getSubprocess(0).getInnerSinks().createPort("performance sink", PerformanceVector.class); private final OutputPort exampleSetOutput = getOutputPorts().createPort("example set out"); private final OutputPort attributeWeightsOutput = getOutputPorts().createPort("attribute weights out"); private final OutputPort performanceOutput = getOutputPorts().createPort("performance out"); public ExampleSetBasedFeatureOperator(OperatorDescription description) { super(description, "Evaluation Process"); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, innerExampleSetSource, SetRelation.SUBSET)); getTransformer().addRule(new SubprocessTransformRule(getSubprocess(0))); getTransformer().addPassThroughRule(innerPerformanceSink, performanceOutput); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.SUBSET)); getTransformer().addGenerationRule(attributeWeightsOutput, AttributeWeights.class); addValue(new ValueDouble("generation", "The number of the current generation.") { @Override public double getDoubleValue() { if (population == null) { return 0; } return population.getGeneration(); } }); addValue(new ValueDouble("performance", "The performance of the current generation (main criterion).") { @Override public double getDoubleValue() { if (population == null) { return Double.NaN; } if (population.getCurrentBestPerformance() == null) { return Double.NaN; } PerformanceVector pv = population.getCurrentBestPerformance(); if (pv == null) { return Double.NaN; } return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("best", "The performance of the best individual ever (main criterion).") { @Override public double getDoubleValue() { if (population == null) { return Double.NaN; } PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) { return Double.NaN; } return pv.getMainCriterion().getAverage(); } }); addValue(new ValueDouble("average_length", "The average number of attributes.") { @Override public double getDoubleValue() { if (population == null) { return Double.NaN; } else { double lengthSum = 0.0d; for (int i = 0; i < population.getNumberOfIndividuals(); i++) { lengthSum += population.get(i).getExampleSet().getNumberOfUsedAttributes(); } return lengthSum / population.getNumberOfIndividuals(); } } }); addValue(new ValueDouble("best_length", "The number of attributes of the best example set.") { @Override public double getDoubleValue() { if (population == null) { return Double.NaN; } ExampleSetBasedIndividual individual = population.getBestIndividualEver(); if (individual != null) { AttributeWeightedExampleSet eSet = individual.getExampleSet(); if (eSet != null) { return eSet.getNumberOfUsedAttributes(); } else { return Double.NaN; } } else { return Double.NaN; } } }); } /** * Create an initial population. The example set will be cloned before the method is invoked. * This method is invoked after the pre- and post-evaluation population operators were * collected. */ public abstract ExampleSetBasedPopulation createInitialPopulation(ExampleSet es) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are applied to the * population in their order within the list before the population is evaluated. Since this * method is invoked only once the list cannot by dynamically changed during runtime. */ public abstract List<ExampleSetBasedPopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Must return a list of <tt>PopulationOperator</tt>s. All operators are applied to the * population in their order within the list after the population is evaluated. Since this * method is invoked only once the list cannot by dynamically changed during runtime. */ public abstract List<ExampleSetBasedPopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException; /** * Has to return true if the main loop can be stopped because a solution is considered to be * good enough according to some criterion. */ public abstract boolean solutionGoodEnough(ExampleSetBasedPopulation pop) throws OperatorException; protected RandomGenerator getRandom() { return random; } protected ExampleSetBasedPopulation getPopulation() { return population; } /** * Applies the feature operator: * <ol> * <li>collects the pre- and postevaluation operators * <li>create an initial population * <li>evaluate the initial population * <li>loop as long as solution is not good enough * <ol> * <li>apply all pre evaluation operators * <li>evaluate the population * <li>update the population's best individual * <li>apply all post evaluation operators * </ol> * <li>return all generation's best individual * </ol> */ @Override public void doWork() throws OperatorException { // init this.random = RandomGenerator.getRandomGenerator(this); this.evaluationCounter = 0; this.totalEvaluations = 0; this.maximalFitness = getParameterAsDouble(PARAMETER_MAXIMAL_FITNESS); ExampleSet es = exampleSetInput.getData(ExampleSet.class); if (es.getAttributes().size() == 0) { throw new UserError(this, 125, 0, 1); } List<ExampleSetBasedPopulationOperator> preOps = getPreEvaluationPopulationOperators(es); List<ExampleSetBasedPopulationOperator> postOps = getPostEvaluationPopulationOperators(es); // create initial population population = createInitialPopulation(es); log("Initial population has " + population.getNumberOfIndividuals() + " individuals."); evaluate(population); getProgress().setTotal(getMaxGenerations()); getProgress().setCheckForStop(false); // optimization loop while (!solutionGoodEnough(population) && !isMaximumReached()) { population.nextGeneration(); applyOpList(preOps, population); log(Tools.ordinalNumber(population.getGeneration()) + " generation has " + population.getNumberOfIndividuals() + " individuals."); log("Evaluating " + Tools.ordinalNumber(population.getGeneration()) + " population."); evaluate(population); population.updateEvaluation(); applyOpList(postOps, population); applyLoopOperations(); } // optimization finished applyOpList(postOps, population); log("Optimization finished. " + evaluationCounter + " / " + totalEvaluations + " evaluations performed."); // create result example set ExampleSetBasedIndividual bestEver = population.getBestIndividualEver(); // create resulting weights AttributeWeightedExampleSet weightedResultSet = bestEver.getExampleSet(); for (Attribute attribute : weightedResultSet.getAttributes()) { if (Double.isNaN(weightedResultSet.getWeight(attribute))) { weightedResultSet.setWeight(attribute, 1.0d); } } AttributeWeights weights = weightedResultSet.getAttributeWeights(); Iterator<String> n = weights.getAttributeNames().iterator(); while (n.hasNext()) { String name = n.next(); if (weightedResultSet.getAttributes().get(name) == null) { weights.setWeight(name, 0.0d); } } // normalize weights weights.normalize(); exampleSetOutput.deliver(weightedResultSet.createCleanClone()); attributeWeightsOutput.deliver(weights); performanceOutput.deliver(bestEver.getPerformance()); } /** Applies all PopulationOperators in opList to the population. */ void applyOpList(List<ExampleSetBasedPopulationOperator> opList, ExampleSetBasedPopulation population) throws OperatorException { Iterator<ExampleSetBasedPopulationOperator> i = opList.listIterator(); while (i.hasNext()) { ExampleSetBasedPopulationOperator op = i.next(); if (op.performOperation(population.getGeneration())) { try { op.operate(population); for (int k = 0; k < population.getNumberOfIndividuals(); k++) { if (population.get(k).getExampleSet().getNumberOfUsedAttributes() <= 0) { getLogger().warning( "Population operator " + op + " has produced an example set without attributes!"); } } } catch (Exception e) { throw new UserError(this, e, 108, e.toString()); } } } } /** * Evaluates all individuals in the population by applying the inner operators. */ protected void evaluate(ExampleSetBasedPopulation population) throws OperatorException { for (int i = 0; i < population.getNumberOfIndividuals(); i++) { evaluate(population.get(i)); } } /** * Evaluates the given individual. The performance is set as user data of the individual and * also returned by this method. */ protected PerformanceVector evaluate(ExampleSetBasedIndividual individual) throws OperatorException { totalEvaluations++; if (individual.getPerformance() != null) { return individual.getPerformance(); } else { evaluationCounter++; AttributeWeightedExampleSet clone = individual.getExampleSet().createCleanClone(); innerExampleSetSource.deliver(clone); getSubprocess(0).execute(); PerformanceVector performanceVector = innerPerformanceSink.getData(PerformanceVector.class); individual.setPerformance(performanceVector); return performanceVector; } } /** This method checks if the maximum was reached for the main criterion. */ private boolean isMaximumReached() { if (checkForMaximalFitness) { PerformanceVector pv = population.getBestPerformanceEver(); if (pv == null) { return false; } else { if (pv.getMainCriterion().getFitness() == Double.POSITIVE_INFINITY) { return true; } else if (pv.getMainCriterion().getMaxFitness() == pv.getMainCriterion().getFitness()) { return true; } else { return pv.getMainCriterion().getFitness() >= maximalFitness; } } } else { return false; } } /** * Sets if the operator should check if the maximum was reached for the main criterion. * Subclasses may want to set this to false, e.g. for multiobjective optimization. */ protected void setCheckForMaximum(boolean checkForMaximalFitness) { this.checkForMaximalFitness = checkForMaximalFitness; } /** * Returns if the operator should check if the maximum was reached for the main criterion. * Subclasses may want to set this to false, e.g. for multiobjective optimization. */ protected boolean getCheckForMaximum() { return this.checkForMaximalFitness; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.addAll(RandomGenerator.getRandomGeneratorParameters(this)); types.add(new ParameterTypeDouble(PARAMETER_MAXIMAL_FITNESS, "The optimization will stop if the fitness reaches the defined maximum.", 0.0d, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY)); return types; } /** * This method should call {@link #inApplyLoop()} and perform operations which should be done * after each iteration of the inner Process. * * @throws ProcessStoppedException */ protected void applyLoopOperations() throws ProcessStoppedException { inApplyLoop(); } /** * This method returns the number of the maximum generations. This is used to determine the * total progress of the operators progress bar. The default value -1 leads to an alternating * progress bar. This should be overwritten by a subclass, if the number of max generations can * be determined. * * @return Number of maximum generations or -1 (if the max generations cannot be determined) */ protected int getMaxGenerations() { return -1; } }