/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.rules; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Statistics; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.learner.AbstractLearner; import com.rapidminer.operator.learner.LearnerCapability; import com.rapidminer.operator.learner.tree.SplitCondition; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeDouble; /** * This operator builds an unpruned rule set of classification rules. It is based on * the paper Cendrowska, 1987: PRISM: An algorithm for inducing modular rules. * * @author Sebastian Land, Ingo Mierswa * @version $Id: SimpleRuleLearner.java,v 1.8 2008/05/09 19:23:14 ingomierswa Exp $ */ public class SimpleRuleLearner extends AbstractLearner { public static final String PARAMETER_PURENESS = "pureness"; public SimpleRuleLearner(OperatorDescription description) { super(description); } public Model learn(ExampleSet exampleSet) throws OperatorException { Attribute label = exampleSet.getAttributes().getLabel(); RuleModel ruleModel = new RuleModel(exampleSet); double pureness = getParameterAsDouble(PARAMETER_PURENESS); TermDetermination termDetermination = new TermDetermination(new AccuracyCriterion(), 0.5d); ExampleSet trainingSet = (ExampleSet)exampleSet.clone(); for (String labelName : label.getMapping().getValues()) { trainingSet.recalculateAttributeStatistics(label); int oldSize = -1; while ((trainingSet.size() > 0) && (trainingSet.size() != oldSize) && (trainingSet.getStatistics(label, Statistics.COUNT, labelName) > 0)) { Rule rule = new Rule(labelName); ExampleSet oldTrainingSet = (ExampleSet)trainingSet.clone(); // grow rule int growOldSize = -1; ExampleSet growSet = (ExampleSet)trainingSet.clone(); while ((growSet.size() > 0) && (growSet.size() != growOldSize) && (!rule.isPure(growSet, pureness)) && (growSet.getAttributes().size() > 0)) { SplitCondition term = termDetermination.getBestTerm(growSet, labelName); if (term == null) break; rule.addTerm(term); Attribute splitAttribute = growSet.getAttributes().get(term.getAttributeName()); growSet.getAttributes().remove(splitAttribute); growOldSize = growSet.size(); growSet = rule.getCovered(growSet); } // add rule if not empty if (rule.getTerms().size() > 0) { growSet = rule.getCovered(trainingSet); growSet.recalculateAttributeStatistics(label); int[] frequencies = new int[label.getMapping().size()]; int counter = 0; for (String value : label.getMapping().getValues()) frequencies[counter++] = (int)growSet.getStatistics(label, Statistics.COUNT, value); rule.setFrequencies(frequencies); ruleModel.addRule(rule); oldSize = trainingSet.size(); trainingSet = rule.removeCovered(oldTrainingSet); } else { break; // no other terms found for this class --> next class } trainingSet.recalculateAttributeStatistics(label); } checkForStop(); } // training set not empty? add default rule if (trainingSet.size() > 0) { trainingSet.recalculateAttributeStatistics(label); int index = (int)trainingSet.getStatistics(label, Statistics.MODE); String defaultLabel = label.getMapping().mapIndex(index); Rule defaultRule = new Rule(defaultLabel); int[] frequencies = new int[label.getMapping().size()]; int counter = 0; for (String value : label.getMapping().getValues()) frequencies[counter++] = (int)trainingSet.getStatistics(label, Statistics.COUNT, value); defaultRule.setFrequencies(frequencies); ruleModel.addRule(defaultRule); } return ruleModel; } public boolean supportsCapability(LearnerCapability capability) { if (capability == com.rapidminer.operator.learner.LearnerCapability.BINOMINAL_ATTRIBUTES) return true; if (capability == com.rapidminer.operator.learner.LearnerCapability.POLYNOMINAL_ATTRIBUTES) return true; if (capability == com.rapidminer.operator.learner.LearnerCapability.NUMERICAL_ATTRIBUTES) return true; if (capability == com.rapidminer.operator.learner.LearnerCapability.POLYNOMINAL_CLASS) return true; if (capability == com.rapidminer.operator.learner.LearnerCapability.BINOMINAL_CLASS) return true; if (capability == com.rapidminer.operator.learner.LearnerCapability.WEIGHTED_EXAMPLES) return true; return false; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeDouble(PARAMETER_PURENESS, "The desired pureness, i.e. the necessary amount of the major class in a covered subset in order become pure.", 0.0d, 1.0d, 0.9d)); return types; } }