/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.associations;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.associations.fpgrowth.FPGrowth;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
/**
* <p>
* This operator generates association rules from frequent item sets. In RapidMiner, the process of
* frequent item set mining is divided into two parts: first, the generation of frequent item sets
* and second, the generation of association rules from these sets.
* </p>
*
* <p>
* For the generation of frequent item sets, you can use for example the operator {@link FPGrowth}.
* The result will be a set of frequent item sets which could be used as input for this operator.
* </p>
*
* @author Sebastian Land, Ingo Mierswa
*/
public class AssociationRuleGenerator extends Operator {
private InputPort itemSetsInput = getInputPorts().createPort("item sets", FrequentItemSets.class);
private OutputPort rulesOutput = getOutputPorts().createPort("rules");
private OutputPort itemSetsOutput = getOutputPorts().createPort("item sets");
public static final String PARAMETER_CRITERION = "criterion";
public static final String PARAMETER_MIN_CONFIDENCE = "min_confidence";
public static final String PARAMETER_MIN_CRITERION_VALUE = "min_criterion_value";
public static final String PARAMETER_GAIN_THETA = "gain_theta";
public static final String PARAMETER_LAPLACE_K = "laplace_k";
public static final String[] CRITERIA = { "confidence", "lift", "conviction", "ps", "gain", "laplace" };
public static final int CONFIDENCE = 0;
public static final int LIFT = 1;
public static final int CONVICTION = 2;
public static final int PS = 3;
public static final int GAIN = 4;
public static final int LAPLACE = 5;
public AssociationRuleGenerator(OperatorDescription description) {
super(description);
getTransformer().addRule(new GenerateNewMDRule(rulesOutput, AssociationRules.class));
getTransformer().addPassThroughRule(itemSetsInput, itemSetsOutput);
}
@Override
public void doWork() throws OperatorException {
double minValue = getParameterAsDouble(PARAMETER_MIN_CONFIDENCE);
if (getParameterAsInt(PARAMETER_CRITERION) != CONFIDENCE) {
minValue = getParameterAsDouble(PARAMETER_MIN_CRITERION_VALUE);
}
double theta = getParameterAsDouble(PARAMETER_GAIN_THETA);
double laplaceK = getParameterAsDouble(PARAMETER_LAPLACE_K);
int criterion = getParameterAsInt(PARAMETER_CRITERION);
FrequentItemSets sets = itemSetsInput.getData(FrequentItemSets.class);
AssociationRules rules = new AssociationRules();
HashMap<Collection<Item>, Integer> setFrequencyMap = new HashMap<Collection<Item>, Integer>();
int numberOfTransactions = sets.getNumberOfTransactions();
// iterating sorted over every frequent Set, generating every possible rule and building
// frequency map
sets.sortSets();
int progressCounter = 0;
getProgress().setTotal(sets.size());
for (FrequentItemSet set : sets) {
setFrequencyMap.put(set.getItems(), set.getFrequency());
// generating rule by splitting set in every two parts for head and body of rule
if (set.getItems().size() > 1) {
PowerSet<Item> powerSet = new PowerSet<Item>(set.getItems());
for (Collection<Item> premises : powerSet) {
if (premises.size() > 0 && premises.size() < set.getItems().size()) {
Collection<Item> conclusion = powerSet.getComplement(premises);
int totalFrequency = set.getFrequency();
int preconditionFrequency = setFrequencyMap.get(premises);
int conclusionFrequency = setFrequencyMap.get(conclusion);
double value = getCriterionValue(totalFrequency, preconditionFrequency, conclusionFrequency,
numberOfTransactions, theta, laplaceK, criterion);
if (value >= minValue) {
AssociationRule rule = new AssociationRule(premises, conclusion,
getSupport(totalFrequency, numberOfTransactions));
rule.setConfidence(getConfidence(totalFrequency, preconditionFrequency));
rule.setLift(getLift(totalFrequency, preconditionFrequency, conclusionFrequency,
numberOfTransactions));
rule.setConviction(getConviction(totalFrequency, preconditionFrequency, conclusionFrequency,
numberOfTransactions));
rule.setPs(
getPs(totalFrequency, preconditionFrequency, conclusionFrequency, numberOfTransactions));
rule.setGain(getGain(theta, totalFrequency, preconditionFrequency, conclusionFrequency,
numberOfTransactions));
rule.setLaplace(getLaPlace(laplaceK, totalFrequency, preconditionFrequency, conclusionFrequency,
numberOfTransactions));
rules.addItemRule(rule);
}
}
}
}
if (++progressCounter % 100 == 0) {
getProgress().step(100);
}
}
rules.sort();
rulesOutput.deliver(rules);
itemSetsOutput.deliver(sets);
}
private double getCriterionValue(int totalFrequency, int preconditionFrequency, int conclusionFrequency,
int numberOfTransactions, double theta, double laplaceK, int criterion) {
switch (criterion) {
case LIFT:
return getLift(totalFrequency, preconditionFrequency, conclusionFrequency, numberOfTransactions);
case CONVICTION:
return getConviction(totalFrequency, preconditionFrequency, conclusionFrequency, numberOfTransactions);
case PS:
return getPs(totalFrequency, preconditionFrequency, conclusionFrequency, numberOfTransactions);
case GAIN:
return getGain(theta, totalFrequency, preconditionFrequency, conclusionFrequency, numberOfTransactions);
case LAPLACE:
return getLaPlace(laplaceK, totalFrequency, preconditionFrequency, conclusionFrequency,
numberOfTransactions);
case CONFIDENCE:
default:
return getConfidence(totalFrequency, preconditionFrequency);
}
}
private double getGain(double theta, int totalFrequency, int preconditionFrequency, int conclusionFrequency,
int numberOfTransactions) {
return getSupport(totalFrequency, numberOfTransactions)
- theta * getSupport(preconditionFrequency, numberOfTransactions);
}
private double getLift(int totalFrequency, int preconditionFrequency, int conclusionFrequency,
int numberOfTransactions) {
return (double) totalFrequency * (double) numberOfTransactions
/ ((double) preconditionFrequency * conclusionFrequency);
}
private double getPs(int totalFrequency, int preconditionFrequency, int conclusionFrequency, int numberOfTransactions) {
return getSupport(totalFrequency, numberOfTransactions) - getSupport(preconditionFrequency, numberOfTransactions)
* getSupport(conclusionFrequency, numberOfTransactions);
}
private double getLaPlace(double k, int totalFrequency, int preconditionFrequency, int conclusionFrequency,
int numberOfTransactions) {
return (getSupport(totalFrequency, numberOfTransactions) + 1d)
/ (getSupport(preconditionFrequency, numberOfTransactions) + k);
}
private double getConviction(int totalFrequency, int preconditionFrequency, int conclusionFrequency,
int numberOfTransactions) {
double numerator = preconditionFrequency * (numberOfTransactions - conclusionFrequency);
double denumerator = numberOfTransactions * (preconditionFrequency - totalFrequency);
return numerator / denumerator;
}
private double getConfidence(int totalFrequency, int preconditionFrequency) {
return (double) totalFrequency / (double) preconditionFrequency;
}
private double getSupport(int frequency, int completeSize) {
return (double) frequency / (double) completeSize;
}
@Override
public boolean shouldAutoConnect(OutputPort port) {
if (port == itemSetsOutput) {
return getParameterAsBoolean("keep_frequent_item_sets");
} else {
return super.shouldAutoConnect(port);
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory(PARAMETER_CRITERION,
"The criterion which is used for the selection of rules", CRITERIA, 0);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_MIN_CONFIDENCE, "The minimum confidence of the rules", 0.0d, 1.0d, 0.8d);
type.setExpert(false);
type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_CRITERION, CRITERIA, true, CONFIDENCE));
types.add(type);
type = new ParameterTypeDouble(PARAMETER_MIN_CRITERION_VALUE,
"The minimum value of the rules for the selected criterion", Double.NEGATIVE_INFINITY,
Double.POSITIVE_INFINITY, 0.8d);
type.setExpert(false);
type.registerDependencyCondition(
new EqualTypeCondition(this, PARAMETER_CRITERION, CRITERIA, true, LIFT, CONVICTION, PS, GAIN, LAPLACE));
types.add(type);
type = new ParameterTypeDouble(PARAMETER_GAIN_THETA, "The Parameter Theta in Gain calculation",
Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 2d);
type.setExpert(true);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_LAPLACE_K, "The Parameter k in LaPlace function calculation", 1,
Double.POSITIVE_INFINITY, 1d);
type.setExpert(true);
types.add(type);
return types;
}
}