/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.features.selection;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeWeights;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.AttributeWeightedExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.features.FeatureOperator;
import com.rapidminer.operator.features.Individual;
import com.rapidminer.operator.features.Population;
import com.rapidminer.operator.features.PopulationOperator;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;
/**
* <p>
* This operator uses input attribute weights to determine the order of features
* added to the feature set starting with the feature set containing only the
* feature with highest weight. The inner operators must provide a performance
* vector to determine the fitness of the current feature set, e.g. a cross
* validation of a learning scheme for a wrapper evaluation. Stops if adding the
* last <code>k</code> features does not increase the performance or if all
* features were added. The value of <code>k</code> can be set with the
* parameter <code>generations_without_improval</code>.
* </p>
*
* @author Ingo Mierswa
* @version $Id: WeightGuidedSelectionOperator.java,v 1.1 2006/04/14 11:42:27
* ingomierswa Exp $
*/
public class WeightGuidedSelectionOperator extends FeatureOperator {
/** The parameter name for "Stop after n generations without improval of the performance (-1: stops if the number of features is reached)." */
public static final String PARAMETER_GENERATIONS_WITHOUT_IMPROVAL = "generations_without_improval";
/** The parameter name for "Indicates that the absolute values of the input weights should be used to determine the feature adding order." */
public static final String PARAMETER_USE_ABSOLUTE_WEIGHTS = "use_absolute_weights";
private int generationsWOImp;
private int maxGenerations;
/** A descending sorted array of all attribute names. */
private String[] attributeNames;
public WeightGuidedSelectionOperator(OperatorDescription description) {
super(description);
}
/**
* Returns an example set containing only the feature with the biggest
* weight.
*/
public Population createInitialPopulation(ExampleSet es) throws UndefinedParameterError {
this.generationsWOImp = getParameterAsInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL);
this.maxGenerations = es.getAttributes().size();
Population initP = new Population();
AttributeWeightedExampleSet exampleSet = new AttributeWeightedExampleSet((ExampleSet) es.clone());
exampleSet.deselectAll();
exampleSet.setWeight(exampleSet.getAttributes().getRegular(attributeNames[0]), 1.0d);
initP.add(new Individual(exampleSet));
return initP;
}
/** The operators add the feature with the next highest weight. */
public List<PopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException {
List<PopulationOperator> preOp = new LinkedList<PopulationOperator>();
attributeNames = new String[input.getAttributes().size()];
int index = 0;
for (Attribute attribute : input.getAttributes())
attributeNames[index++] = attribute.getName();
AttributeWeights attributeWeights = getInput(AttributeWeights.class);
attributeWeights.sortByWeight(attributeNames, AttributeWeights.INCREASING, getParameterAsBoolean(PARAMETER_USE_ABSOLUTE_WEIGHTS) ? AttributeWeights.ABSOLUTE_WEIGHTS : AttributeWeights.ORIGINAL_WEIGHTS);
preOp.add(new IterativeFeatureAdding(attributeNames, 1));
return preOp;
}
/** Returns an empty list. */
public List<PopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException {
return new LinkedList<PopulationOperator>();
}
/**
* Returns true if the best individual is not better than the last
* generation's best individual.
*/
public boolean solutionGoodEnough(Population pop) throws OperatorException {
return pop.empty() || ((generationsWOImp > 0) && (pop.getGenerationsWithoutImproval() >= generationsWOImp)) || (pop.getGeneration() >= maxGenerations);
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL, "Stop after n generations without improval of the performance (-1: stops if the number of features is reached).", -1, Integer.MAX_VALUE, 1));
types.add(new ParameterTypeBoolean(PARAMETER_USE_ABSOLUTE_WEIGHTS, "Indicates that the absolute values of the input weights should be used to determine the feature adding order.", true));
return types;
}
}