/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.meta;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.ValueString;
import com.rapidminer.operator.condition.InnerOperatorCondition;
import com.rapidminer.operator.condition.LastInnerOperatorCondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeInt;
/**
* <p>This meta operator iterates through all possible feature subsets within the specified range
* and applies the inner operators on the feature subsets. This might be useful in combination
* with the ProcessLog operator and, for example, a performance evaluation. In contrast
* to the BruteForce feature selection, which performs a similar task, this iterative approach needs
* much less memory and can be performed on larger data sets.</p>
*
* @author Ingo Mierswa
* @version $Id: FeatureSubsetIteration.java,v 1.3 2008/07/13 23:25:24 ingomierswa Exp $
*/
public class FeatureSubsetIteration extends OperatorChain {
public static final String PARAMETER_MAX_NUMBER_OF_ATTRIBUTES = "max_number_of_attributes";
public static final String PARAMETER_MIN_NUMBER_OF_ATTRIBUTES = "min_number_of_attributes";
public static final String PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES = "exact_number_of_attributes";
private int iteration = -1;
private int featureNumber = -1;
private String featureNames = null;
public FeatureSubsetIteration(OperatorDescription description) {
super(description);
addValue(new ValueDouble("iteration", "The current iteration.") {
public double getDoubleValue() {
return iteration;
}
});
addValue(new ValueDouble("feature_number", "The number of used features in the current iteration.") {
public double getDoubleValue() {
return featureNumber;
}
});
addValue(new ValueString("feature_names", "The names of the used features in the current iteration.") {
public String getStringValue() {
return featureNames;
}
});
}
public IOObject[] apply() throws OperatorException {
ExampleSet exampleSet = getInput(ExampleSet.class);
// init
int minNumberOfFeatures = getParameterAsInt(PARAMETER_MIN_NUMBER_OF_ATTRIBUTES);
int maxNumberOfFeatures = getParameterAsInt(PARAMETER_MAX_NUMBER_OF_ATTRIBUTES);
int exactNumberOfFeatures = getParameterAsInt(PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES);
// checks
if (exactNumberOfFeatures > 0) {
log("Using exact number of features for feature subset iteration (" + exactNumberOfFeatures + "), ignoring possibly defined range for the number of features.");
} else {
if ((maxNumberOfFeatures > 0) && (minNumberOfFeatures > maxNumberOfFeatures)) {
throw new UserError(this, 210, PARAMETER_MAX_NUMBER_OF_ATTRIBUTES, PARAMETER_MIN_NUMBER_OF_ATTRIBUTES);
} else {
maxNumberOfFeatures = exampleSet.getAttributes().size();
}
}
// run
this.iteration = 0;
this.featureNumber = 0;
this.featureNames = "?";
Attribute[] allAttributes = exampleSet.getAttributes().createRegularAttributeArray();
if (exactNumberOfFeatures > 0) {
applyOnAllWithExactNumber(exampleSet, allAttributes, exactNumberOfFeatures);
} else {
applyOnAllInRange(exampleSet, allAttributes, minNumberOfFeatures, maxNumberOfFeatures);
}
return new IOObject[] { exampleSet };
}
private void applyInnerOperators(ExampleSet exampleSet) throws OperatorException {
IOContainer input = new IOContainer(exampleSet);
for (int i = 0; i < getNumberOfOperators(); i++) {
input = getOperator(i).apply(input);
}
}
/** Add all attribute combinations with a fixed size to the population. */
private void applyOnAllWithExactNumber(ExampleSet exampleSet, Attribute[] allAttributes, int exactNumberOfFeatures) throws OperatorException {
ExampleSet workingSet = (ExampleSet)exampleSet.clone();
this.featureNumber = exactNumberOfFeatures;
if (exactNumberOfFeatures == 1) {
for (int i = 0; i < allAttributes.length; i++) {
workingSet.getAttributes().clearRegular();
workingSet.getAttributes().addRegular(allAttributes[i]);
// apply inner
this.iteration++;
this.featureNames = allAttributes[i].getName();
applyInnerOperators(workingSet);
}
} else {
for (int start = 0; start < allAttributes.length - exactNumberOfFeatures + 1; start++) {
// initialization
AtomicInteger[] indices = new AtomicInteger[exactNumberOfFeatures];
for (int i = 0; i < indices.length; i++) {
indices[i] = new AtomicInteger(start + i);
}
for (int c = indices[indices.length - 1].get(); c < allAttributes.length; c++) {
// create current example set
StringBuffer nameBuffer = new StringBuffer();
boolean first = true;
workingSet.getAttributes().clearRegular();
for (AtomicInteger index : indices) {
Attribute attribute = allAttributes[index.get()];
workingSet.getAttributes().addRegular(attribute);
if (!first)
nameBuffer.append(", ");
nameBuffer.append(attribute.getName());
first = false;
}
// apply inner
this.iteration++;
this.featureNames = nameBuffer.toString();
applyInnerOperators(workingSet);
if (indices[indices.length - 1].get() < allAttributes.length - 1)
indices[indices.length - 1].incrementAndGet();
}
}
}
}
/** Recursive method to add all attribute combinations to the population. */
private void applyOnAllInRange(ExampleSet exampleSet, Attribute[] allAttributes, int minNumberOfFeatures, int maxNumberOfFeatures) throws OperatorException {
for (int i = minNumberOfFeatures; i <= maxNumberOfFeatures; i++) {
applyOnAllWithExactNumber(exampleSet, allAttributes, i);
}
}
public InnerOperatorCondition getInnerOperatorCondition() {
return new LastInnerOperatorCondition(new Class[] { ExampleSet.class }, new Class[0]);
}
public int getMaxNumberOfInnerOperators() {
return Integer.MAX_VALUE;
}
public int getMinNumberOfInnerOperators() {
return 1;
}
public Class<?>[] getInputClasses() {
return new Class[0];
}
public Class<?>[] getOutputClasses() {
return new Class[0];
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeInt(PARAMETER_MIN_NUMBER_OF_ATTRIBUTES, "Determines the minimum number of features used for the combinations.", 1, Integer.MAX_VALUE, 1);
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_MAX_NUMBER_OF_ATTRIBUTES, "Determines the maximum number of features used for the combinations (-1: try all combinations up to possible maximum)", -1, Integer.MAX_VALUE, -1);
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_EXACT_NUMBER_OF_ATTRIBUTES, "Determines the exact number of features used for the combinations (-1: use the feature range defined by min and max).", -1, Integer.MAX_VALUE, -1);
type.setExpert(false);
types.add(type);
return types;
}
}