/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.rules; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; /** * The info gain criterion for rule learning. * * @author Ingo Mierswa * @version $Id: InfoGainCriterion.java,v 1.7 2008/05/09 19:23:13 ingomierswa Exp $ */ public class InfoGainCriterion extends AbstractCriterion { private static double LOG_FACTOR = 1d / Math.log(2); public double[] getBenefit(ExampleSet coveredSet, ExampleSet uncoveredSet, String labelName) { double coveredAccuracy = 0.0d; double coveredCoverage = 0.0d; Attribute weightAttribute = coveredSet.getAttributes().getWeight(); Attribute labelAttribute = coveredSet.getAttributes().getLabel(); int labelIndex = labelAttribute.getMapping().getIndex(labelName); for (Example e : coveredSet) { double weight = 1; if (weightAttribute != null) { weight = e.getValue(weightAttribute); } coveredCoverage += weight; if (e.getValue(labelAttribute) == labelIndex) coveredAccuracy += weight; } double uncoveredAccuracy = 0.0d; double uncoveredCoverage = 0.0d; weightAttribute = uncoveredSet.getAttributes().getWeight(); labelAttribute = uncoveredSet.getAttributes().getLabel(); labelIndex = labelAttribute.getMapping().getIndex(labelName); for (Example e : uncoveredSet) { double weight = 1; if (weightAttribute != null) { weight = e.getValue(weightAttribute); } uncoveredCoverage += weight; if (e.getValue(labelAttribute) == labelIndex) uncoveredAccuracy += weight; } double defaultAccuracy = (coveredAccuracy + uncoveredAccuracy) / (coveredCoverage + uncoveredCoverage); double infoGain = coveredAccuracy * (log2((coveredAccuracy + 1.0d) / (coveredCoverage + 1.0d)) - log2(defaultAccuracy)); return new double[] { infoGain, coveredSet.size() }; } public double[] getOnlineBenefit(Example example, int labelIndex) { double coveredAccuracy = labelWeights[labelIndex]; double coveredWeight = weight; double uncoveredAccuracy = totalLabelWeights[labelIndex] - labelWeights[labelIndex]; double uncoveredWeight = totalWeight - weight; double defaultAccuracy = (coveredAccuracy + uncoveredAccuracy) / (coveredWeight + uncoveredWeight); double infoGain = coveredAccuracy * (log2((coveredAccuracy + 1.0d) / (coveredWeight + 1.0d)) - log2(defaultAccuracy)); double reverseInfoGain = uncoveredAccuracy * (log2((uncoveredAccuracy + 1.0d) / (uncoveredWeight + 1.0d)) - log2(defaultAccuracy)); return new double[] { infoGain, coveredWeight, reverseInfoGain, uncoveredWeight}; } private double log2(double value) { return Math.log(value) * LOG_FACTOR; } }