/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.validation;
import java.util.BitSet;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.InputDescription;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.performance.EstimatedPerformance;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.tools.WekaInstancesAdaptor;
import com.rapidminer.tools.WekaTools;
import weka.attributeSelection.ConsistencySubsetEval;
import weka.core.Instances;
/**
* <p>
* Consistency attribute subset evaluator. For more information see: <br/> Liu,
* H., and Setiono, R., (1996). A probabilistic approach to feature selection -
* A filter solution. In 13th International Conference on Machine Learning
* (ICML'96), July 1996, pp. 319-327. Bari, Italy.
* </p>
*
* <p>
* This operator evaluates the worth of a subset of attributes by the level of
* consistency in the class values when the training instances are projected
* onto the subset of attributes. Consistency of any subset can never be lower
* than that of the full set of attributes, hence the usual practice is to use
* this subset evaluator in conjunction with a Random or Exhaustive search which
* looks for the smallest subset with consistency equal to that of the full set
* of attributes.
* </p>
*
* <p>
* This operator can only be applied for classification data sets, i.e. where
* the label attribute is nominal.
* </p>
*
* @author Ingo Mierswa
* @version $Id: ConsistencyFeatureSetEvaluator.java,v 1.4 2006/04/05 09:42:02
* ingomierswa Exp $
*/
public class ConsistencyFeatureSetEvaluator extends Operator {
public ConsistencyFeatureSetEvaluator(OperatorDescription description) {
super(description);
}
/** Shows a parameter keep_example_set with default value "false". */
public InputDescription getInputDescription(Class cls) {
if (ExampleSet.class.isAssignableFrom(cls)) {
return new InputDescription(cls, false, true);
} else {
return super.getInputDescription(cls);
}
}
public IOObject[] apply() throws OperatorException {
ExampleSet exampleSet = getInput(ExampleSet.class);
Instances instances = WekaTools.toWekaInstances(exampleSet, "TempInstances", WekaInstancesAdaptor.LEARNING);
double performance = 0.0d;
try {
ConsistencySubsetEval evaluator = new ConsistencySubsetEval();
evaluator.buildEvaluator(instances);
BitSet bitSet = new BitSet(exampleSet.getAttributes().size());
bitSet.flip(0, exampleSet.getAttributes().size());
performance = evaluator.evaluateSubset(bitSet);
} catch (Exception e) {
throw new UserError(this, e, 905, new Object[] { "ConsistencySubsetEval", e.getMessage() });
}
PerformanceVector result = new PerformanceVector();
result.addCriterion(new EstimatedPerformance("ConsistencyFS", performance, 1, false));
return new IOObject[] { result };
}
public Class<?>[] getInputClasses() {
return new Class[] { ExampleSet.class };
}
public Class<?>[] getOutputClasses() {
return new Class[] { PerformanceVector.class };
}
}