package com.skp.experiment.cf.als.hadoop; import java.util.Map; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.common.AbstractJob; import com.skp.experiment.cf.evaluate.hadoop.EvaluatorJob; import com.skp.experiment.common.KeyValuesCountJob; import com.skp.experiment.common.join.ImprovedRepartitionJoinAndFilterJob; /** * <h1>Evaluate recommendations using MAP, precision, recall, mean average percentage metrics </h1> * <p>Just wrapper job to run evaluation on recommendation verses probeSet. </p> * * <p>Command line arguments specific to this class are:</p> * * <ol> * <li>--input (path): recommendations. </li> * <li>--output (path): path where output should go</li> * <li>--probeSet (path): path to probeSet. probeSet should be converted into index since recommendations is also converted.</li> * <li>--topK (int): topK threshold for recommendations per key.</li> * </ol> */ public class RecommendEvaluator extends AbstractJob { public static void main(String[] args) throws Exception { ToolRunner.run(new RecommendEvaluator(), args); } @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("probeSet", null, "path to test set."); addOption("topK", null, "threshold for evaluation", String.valueOf(100)); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Path probeSet = new Path(getOption("probeSet")); int topK = getOption("topK") == null || getOption("topK").equals("") ? 100 : Integer.parseInt(getOption("topK")); /** step 1. get user-item_count */ KeyValuesCountJob userItemCntJob = new KeyValuesCountJob(); userItemCntJob.setConf(getConf()); if (userItemCntJob.run(new String[]{ "-i", probeSet.toString(), "-o", getTempPath("userItemCnts").toString(), "-inType", "text", "-outType", "text"}) != 0) { return -1; } // <user_id:string, item_count:integer> /** step 2. inner join with recommendations and probeSet */ ImprovedRepartitionJoinAndFilterJob joinJob = new ImprovedRepartitionJoinAndFilterJob(); joinJob.setConf(getConf()); if (joinJob.run(new String[]{ "-i", getInputPath().toString(), "-o", getTempPath("recAndTest").toString(), "-sidx", "0", "-tgt", getTempPath("userItemCnts").toString() + ":0:0:1:inner"}) != 0) { return -1; } // <user_id, item_id, score, reason_id, date, item_count> /** step 3. left outer join with rec and recAndTest */ ImprovedRepartitionJoinAndFilterJob outerJob = new ImprovedRepartitionJoinAndFilterJob(); outerJob.setConf(getConf()); if (outerJob.run(new String[]{ "-i", getTempPath("recAndTest").toString(), "-o", getTempPath("recAndTestMerged").toString(), "-sidx", "0,1", "-tgt", probeSet.toString() + ":0,1:0,1:3:outer", "--defaultValue", "-1.0"}) != 0) { return -1; } // <user_id, item_id, score, reason_id, date, item_count, flag(0 or 1)> EvaluatorJob evalJob = new EvaluatorJob(); evalJob.setConf(getConf()); if (evalJob.run(new String[]{ "-i", getTempPath("recAndTestMerged").toString(), "-o", getTempPath("getOutput").toString(), "--topK", String.valueOf(topK)}) != 0) { return -1; } return 0; } }