/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.associations;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.MissingIOObjectException;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.tools.Tupel;
/**
* This operator compares a number of FrequentItemSet sets and removes every
* not unique FrequentItemSet.
*
* @author Sebastian Land
* @version $Id: FrequentItemSetUnificator.java,v 1.5 2008/07/07 07:06:46 ingomierswa Exp $
*/
public class FrequentItemSetUnificator extends Operator {
// private static final String PARAMETER_MINIMAL_DIFFERENCE = "minimal_support_difference";
// private static final String PARAMETER_MINIMAL_SUPPORT = "minimal_support";
private class FrequencyIgnoringSetComparator implements Comparator<FrequentItemSet> {
public int compare(FrequentItemSet o1, FrequentItemSet o2) {
// compare size
Collection<Item> items = o1.getItems();
Collection<Item> hisItems = o2.getItems();
if (items.size() < hisItems.size()) {
return -1;
} else if (items.size() > hisItems.size()) {
return 1;
} else {
// compare items
Iterator<Item> iterator = hisItems.iterator();
for (Item myCurrentItem : items) {
int relation = myCurrentItem.toString().compareTo(iterator.next().toString());
if (relation != 0) {
return relation;
}
}
// equal sets
return 0;
}
}
}
private class TupelComparator implements Comparator<Tupel<FrequentItemSet, Iterator<FrequentItemSet>>> {
public int compare(Tupel<FrequentItemSet, Iterator<FrequentItemSet>> o1, Tupel<FrequentItemSet, Iterator<FrequentItemSet>> o2) {
FrequencyIgnoringSetComparator comparator = new FrequencyIgnoringSetComparator();
return comparator.compare(o1.getFirst(), o2.getFirst());
}
}
public FrequentItemSetUnificator(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
// double exampleSetSize = getInput(ExampleSet.class).size();
// double minDifference = getParameterAsDouble(PARAMETER_MINIMAL_DIFFERENCE);
// double minSupport = getParameterAsDouble(PARAMETER_MINIMAL_SUPPORT);
ArrayList<FrequentItemSets> sets = new ArrayList<FrequentItemSets>();
try {
int i = 0;
while (true) {
FrequentItemSets set = getInput(FrequentItemSets.class, 0);
set.sortSets(new FrequencyIgnoringSetComparator());
sets.add(set);
i++;
}
} catch (MissingIOObjectException e) {
}
ArrayList<Tupel<FrequentItemSet, Iterator<FrequentItemSet>>> iteratorTupels = new ArrayList<Tupel<FrequentItemSet, Iterator<FrequentItemSet>>>(2);
for (FrequentItemSets classSets: sets) {
Iterator<FrequentItemSet> iterator = classSets.iterator();
iteratorTupels.add(new Tupel<FrequentItemSet, Iterator<FrequentItemSet>>(iterator.next(), iterator));
}
// running through itterators
while(haveNext(iteratorTupels)) {
// filling set to test if all frequent item sets are equall
Set<FrequentItemSet> currentSets = new TreeSet<FrequentItemSet>(new FrequencyIgnoringSetComparator());
for (Tupel<FrequentItemSet, Iterator<FrequentItemSet>> tupel: iteratorTupels) {
currentSets.add(tupel.getFirst());
}
if (currentSets.size() == 1) {
// not unique: deletion
ArrayList<Tupel<FrequentItemSet, Iterator<FrequentItemSet>>> newTupels = new ArrayList<Tupel<FrequentItemSet, Iterator<FrequentItemSet>>>(2);
for (Tupel<FrequentItemSet, Iterator<FrequentItemSet>> tupel: iteratorTupels) {
Iterator<FrequentItemSet> currentIterator = tupel.getSecond();
currentIterator.remove();
if (currentIterator.hasNext())
newTupels.add(new Tupel<FrequentItemSet, Iterator<FrequentItemSet>>(currentIterator.next(), currentIterator));
}
iteratorTupels = newTupels;
} else {
// unique: no deletion but forward smallest iterator
Collections.sort(iteratorTupels, new TupelComparator());
Iterator<FrequentItemSet> currentIterator = iteratorTupels.get(0).getSecond();
if (currentIterator.hasNext())
iteratorTupels.add(new Tupel<FrequentItemSet, Iterator<FrequentItemSet>>(currentIterator.next(), currentIterator));
iteratorTupels.remove(0);
}
}
IOObject[] objects = new IOObject[sets.size()];
int i = 0;
for (FrequentItemSets currentSets: sets) {
objects[i] = currentSets;
i++;
}
return (objects);
}
private boolean haveNext(ArrayList<Tupel<FrequentItemSet, Iterator<FrequentItemSet>>> iterators) {
boolean hasNext = iterators.size() > 0;
for (Tupel<FrequentItemSet, Iterator<FrequentItemSet>> iterator: iterators) {
hasNext = hasNext || iterator.getSecond().hasNext();
}
return hasNext;
}
public Class<?>[] getInputClasses() {
return new Class[] {FrequentItemSets.class, ExampleSet.class};
}
public Class<?>[] getOutputClasses() {
return new Class[] {FrequentItemSets.class};
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> parameters = super.getParameterTypes();
// parameters.add(new ParameterTypeDouble(PARAMETER_MINIMAL_DIFFERENCE, "specifies the support threshold to regard two itemsets as different", 0, Double.POSITIVE_INFINITY, 0.2));
// parameters.add(new ParameterTypeDouble(PARAMETER_MINIMAL_SUPPORT, "specifies the support threshold to keep an itemset", 0, Double.POSITIVE_INFINITY, 0.3));
return parameters;
}
}