/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.fpm.pfpgrowth;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.Pair;
import org.apache.mahout.fpm.pfpgrowth.convertors.ContextStatusUpdater;
import org.apache.mahout.fpm.pfpgrowth.convertors.SequenceFileOutputCollector;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.StringOutputConverter;
import org.apache.mahout.fpm.pfpgrowth.convertors.string.TopKStringPatterns;
import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth;
import org.junit.Test;
public final class FPGrowthTest extends MahoutTestCase {
@Test
public void testMaxHeapFPGrowth() throws Exception {
FPGrowth<String> fp = new FPGrowth<String>();
Collection<Pair<List<String>,Long>> transactions = Lists.newArrayList();
transactions.add(new Pair<List<String>,Long>(Arrays.asList("E", "A", "D", "B"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("D", "A", "C", "E", "B"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("C", "A", "B", "E"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("B", "A", "D"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("D"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("D", "B"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("A", "D", "E"), 1L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("B", "C"), 1L));
Path path = getTestTempFilePath("fpgrowthTest.dat");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Writer writer =
new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
try {
fp.generateTopKFrequentPatterns(
transactions.iterator(),
fp.generateFList(transactions.iterator(), 3),
3,
100,
new HashSet<String>(),
new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
new ContextStatusUpdater(null));
} finally {
Closeables.closeQuietly(writer);
}
List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, path);
assertEquals(
"[(C,([B, C],3)), "
+ "(E,([A, E],4), ([A, B, E],3), ([A, D, E],3)), "
+ "(A,([A],5), ([A, D],4), ([A, E],4), ([A, B],4), ([A, B, E],3), ([A, D, E],3), ([A, B, D],3)), "
+ "(D,([D],6), ([B, D],4), ([A, D],4), ([A, D, E],3), ([A, B, D],3)), "
+ "(B,([B],6), ([A, B],4), ([B, D],4), ([A, B, D],3), ([A, B, E],3), ([B, C],3))]",
frequentPatterns.toString());
}
/**
* Trivial test for MAHOUT-617
*/
@Test
public void testMaxHeapFPGrowthData1() throws Exception {
FPGrowth<String> fp = new FPGrowth<String>();
Collection<Pair<List<String>,Long>> transactions = Lists.newArrayList();
transactions.add(new Pair<List<String>,Long>(Arrays.asList("X"), 12L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("Y"), 4L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("X", "Y"), 10L));
Path path = getTestTempFilePath("fpgrowthTestData1.dat");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
System.out.println(fp.generateFList(transactions.iterator(), 2));
SequenceFile.Writer writer =
new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
try {
fp.generateTopKFrequentPatterns(
transactions.iterator(),
fp.generateFList(transactions.iterator(), 2),
2,
100,
new HashSet<String>(),
new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
new ContextStatusUpdater(null));
} finally {
Closeables.closeQuietly(writer);
}
List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, path);
assertEquals(
"[(Y,([Y],14), ([X, Y],10)), (X,([X],22), ([X, Y],10))]", frequentPatterns.toString());
}
/**
* Trivial test for MAHOUT-617
*/
@Test
public void testMaxHeapFPGrowthData2() throws Exception {
FPGrowth<String> fp = new FPGrowth<String>();
Collection<Pair<List<String>,Long>> transactions = Lists.newArrayList();
transactions.add(new Pair<List<String>,Long>(Arrays.asList("X"), 12L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("Y"), 4L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("X", "Y"), 10L));
transactions.add(new Pair<List<String>,Long>(Arrays.asList("X", "Y", "Z"), 11L));
Path path = getTestTempFilePath("fpgrowthTestData2.dat");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
System.out.println(fp.generateFList(transactions.iterator(), 2));
SequenceFile.Writer writer =
new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);
try {
fp.generateTopKFrequentPatterns(
transactions.iterator(),
fp.generateFList(transactions.iterator(), 2),
2,
100,
new HashSet<String>(),
new StringOutputConverter(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)),
new ContextStatusUpdater(null));
} finally {
Closeables.closeQuietly(writer);
}
List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, path);
assertEquals(
"[(Z,([X, Y, Z],11)), (Y,([Y],25), ([X, Y],21), ([X, Y, Z],11)), (X,([X],33), ([X, Y],21), ([X, Y, Z],11))]",
frequentPatterns.toString());
}
/**
* Trivial test for MAHOUT-355
*/
@Test
public void testNoNullPointerExceptionWhenReturnableFeaturesIsNull() throws Exception {
FPGrowth<String> fp = new FPGrowth<String>();
Collection<Pair<List<String>,Long>> transactions = Lists.newArrayList();
transactions.add(new Pair<List<String>,Long>(Arrays.asList("E", "A", "D", "B"), 1L));
OutputCollector<String, List<Pair<List<String>, Long>>> noOutput =
new OutputCollector<String,List<Pair<List<String>,Long>>>() {
@Override
public void collect(String arg0, List<Pair<List<String>, Long>> arg1) {
}
};
fp.generateTopKFrequentPatterns(
transactions.iterator(),
fp.generateFList(transactions.iterator(), 3),
3,
100,
null,
noOutput,
new ContextStatusUpdater(null));
}
}