package com.skp.experiment.cf.evaluate.hadoop; import java.io.IOException; import java.util.Comparator; import java.util.Iterator; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.mahout.cf.taste.common.TopK; import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.common.AbstractJob; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import com.google.common.collect.Lists; import com.google.common.primitives.Floats; public class TopKRecommendationsJob extends AbstractJob { private static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser"; private static final String DELIMETER = ","; @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption(RECOMMENDATIONS_PER_USER, "k", "recommendations per user."); if (parseArguments(args) == null) { return -1; } Job job = prepareJob(getInputPath(), getOutputPath(), SequenceFileInputFormat.class, TopKRecommendationsMapper.class, IntWritable.class, Text.class, SequenceFileOutputFormat.class); Configuration conf = job.getConfiguration(); conf.setInt(RECOMMENDATIONS_PER_USER, Integer.parseInt(getOption(RECOMMENDATIONS_PER_USER))); job.waitForCompletion(true); return 0; } private static final Comparator<RecommendedItem> BY_PREFERENCE_VALUE = new Comparator<RecommendedItem>() { @Override public int compare(RecommendedItem one, RecommendedItem two) { return Floats.compare(one.getValue(), two.getValue()); } }; public static class TopKRecommendationsMapper extends Mapper<IntWritable, VectorWritable, IntWritable, Text> { private static Text outValue = new Text(); private static int recommendationsPerUser = 0; @Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); recommendationsPerUser = conf.getInt(RECOMMENDATIONS_PER_USER, 0); } @Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector v = value.get(); Iterator<Vector.Element> iter = v.iterateNonZero(); TopK<RecommendedItem> topKItems = new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE); while (iter.hasNext()) { Vector.Element e = iter.next(); topKItems.offer(new GenericRecommendedItem(e.index(), (float)e.get())); } List<RecommendedItem> recommendedItems = Lists.newArrayListWithExpectedSize(recommendationsPerUser); for (RecommendedItem topItem : topKItems.retrieve()) { recommendedItems.add(new GenericRecommendedItem(topItem.getItemID(), topItem.getValue())); } if (recommendedItems.size() > 0) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < recommendedItems.size(); i++) { RecommendedItem item = recommendedItems.get(i); if (i != 0) { sb.append(DELIMETER); } sb.append(item.getItemID()).append(DELIMETER).append(item.getValue()); } outValue.set(sb.toString()); context.write(key, outValue); } } } }