/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.example.kddcup.track1;
import java.io.File;
import java.util.Collection;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.collect.Lists;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.DataModelBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.example.kddcup.DataFileIterable;
import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.eval.AbstractDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.common.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Attempts to run an evaluation just like that dictated for Yahoo's KDD Cup, Track 1.
* It will compute the RMSE of a validation data set against the predicted ratings from
* the training data set.
*/
public final class Track1RecommenderEvaluator extends AbstractDifferenceRecommenderEvaluator {
private static final Logger log = LoggerFactory.getLogger(Track1RecommenderEvaluator.class);
private RunningAverage average;
private final File dataFileDirectory;
public Track1RecommenderEvaluator(File dataFileDirectory) {
setMaxPreference(100.0f);
setMinPreference(0.0f);
average = new FullRunningAverage();
this.dataFileDirectory = dataFileDirectory;
}
@Override
public double evaluate(RecommenderBuilder recommenderBuilder,
DataModelBuilder dataModelBuilder,
DataModel dataModel,
double trainingPercentage,
double evaluationPercentage) throws TasteException {
Recommender recommender = recommenderBuilder.buildRecommender(dataModel);
Collection<Callable<Void>> estimateCallables = Lists.newArrayList();
AtomicInteger noEstimateCounter = new AtomicInteger();
for (Pair<PreferenceArray,long[]> userData :
new DataFileIterable(KDDCupDataModel.getValidationFile(dataFileDirectory))) {
PreferenceArray validationPrefs = userData.getFirst();
long userID = validationPrefs.get(0).getUserID();
estimateCallables.add(
new PreferenceEstimateCallable(recommender, userID, validationPrefs, noEstimateCounter));
}
RunningAverageAndStdDev timing = new FullRunningAverageAndStdDev();
execute(estimateCallables, noEstimateCounter, timing);
double result = computeFinalEvaluation();
log.info("Evaluation result: {}", result);
return result;
}
// Use RMSE scoring:
@Override
protected void reset() {
average = new FullRunningAverage();
}
@Override
protected void processOneEstimate(float estimatedPreference, Preference realPref) {
double diff = realPref.getValue() - estimatedPreference;
average.addDatum(diff * diff);
}
@Override
protected double computeFinalEvaluation() {
return Math.sqrt(average.getAverage());
}
}