/* * Copyright (C) 2012 Sebastian Schelter <sebastian.schelter [at] tu-berlin.de> * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package de.tuberlin.dima.recsys.ssnmm.ratingprediction; import de.tuberlin.dima.recsys.ssnmm.Utils; import org.apache.mahout.cf.taste.impl.common.FullRunningAverage; import org.apache.mahout.cf.taste.impl.common.RunningAverage; import java.io.File; import java.io.FilenameFilter; import java.util.regex.Pattern; /** * Compute the average rating from the trainingset in a streaming fashion */ public class AverageRating { public static void main(String[] args) { File dir = new File("/home/ssc/Entwicklung/datasets/yahoo-songs/"); File[] trainingFiles = dir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.startsWith("train_"); } }); Pattern sep = Pattern.compile("\t"); RunningAverage avg = new FullRunningAverage(); int ratingsProcessed = 0; for (File trainingFile : trainingFiles) { for (String line : Utils.readLines(trainingFile)) { int rating = Integer.parseInt(sep.split(line)[2]); avg.addDatum(rating); if (++ratingsProcessed % 10000000 == 0) { System.out.println(ratingsProcessed + " ratings processed"); } } } System.out.println("average rating " + avg.getAverage()); } }