/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.fpm.pfpgrowth;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.google.common.collect.Lists;
import org.apache.commons.lang.mutable.MutableLong;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.Parameters;
import org.apache.mahout.fpm.pfpgrowth.convertors.ContextStatusUpdater;
import org.apache.mahout.fpm.pfpgrowth.convertors.ContextWriteOutputCollector;
import org.apache.mahout.fpm.pfpgrowth.convertors.integer.IntegerStringOutputConverter;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.apache.mahout.math.list.IntArrayList;
import org.apache.mahout.math.map.OpenLongObjectHashMap;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
/**
* takes each group of transactions and runs Vanilla FPGrowth on it and
* outputs the the Top K frequent Patterns for each group.
*
*/
public class ParallelFPGrowthReducer extends Reducer<LongWritable,TransactionTree,Text,TopKStringPatterns> {
private final List<String> featureReverseMap = Lists.newArrayList();
private final OpenObjectIntHashMap<String> fMap = new OpenObjectIntHashMap<String>();
private final OpenLongObjectHashMap<IntArrayList> groupFeatures = new OpenLongObjectHashMap<IntArrayList>();
private int maxHeapSize = 50;
private int minSupport = 3;
@Override
protected void reduce(LongWritable key, Iterable<TransactionTree> values, Context context) throws IOException {
TransactionTree cTree = new TransactionTree();
for (TransactionTree tr : values) {
for (Pair<List<Integer>,Long> p : tr) {
cTree.addPattern(p.getFirst(), p.getSecond());
}
}
List<Pair<Integer,Long>> localFList = Lists.newArrayList();
for (Entry<Integer,MutableLong> fItem : cTree.generateFList().entrySet()) {
localFList.add(new Pair<Integer,Long>(fItem.getKey(), fItem.getValue().toLong()));
}
Collections.sort(localFList, new CountDescendingPairComparator<Integer,Long>());
FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
fpGrowth.generateTopKFrequentPatterns(
cTree.iterator(),
localFList,
minSupport,
maxHeapSize,
new HashSet<Integer>(groupFeatures.get(key.get()).toList()),
new IntegerStringOutputConverter(
new ContextWriteOutputCollector<LongWritable,TransactionTree,Text,TopKStringPatterns>(context),
featureReverseMap),
new ContextStatusUpdater<LongWritable,TransactionTree,Text,TopKStringPatterns>(context));
}
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
int i = 0;
for (Pair<String,Long> e : PFPGrowth.readFList(context.getConfiguration())) {
featureReverseMap.add(e.getFirst());
fMap.put(e.getFirst(), i++);
}
Map<String,Long> gList = PFPGrowth.readGList(context.getConfiguration());
for (Entry<String,Long> entry : gList.entrySet()) {
IntArrayList groupList = groupFeatures.get(entry.getValue());
Integer itemInteger = fMap.get(entry.getKey());
if (groupList != null) {
groupList.add(itemInteger);
} else {
groupList = new IntArrayList();
groupList.add(itemInteger);
groupFeatures.put(entry.getValue(), groupList);
}
}
maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50"));
minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3"));
}
}