/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.cf.taste.impl.transforms; import java.util.Collection; import com.google.common.base.Preconditions; import org.apache.mahout.cf.taste.common.Refreshable; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.common.Cache; import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev; import org.apache.mahout.cf.taste.impl.common.RefreshHelper; import org.apache.mahout.cf.taste.impl.common.Retriever; import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.Preference; import org.apache.mahout.cf.taste.model.PreferenceArray; import org.apache.mahout.cf.taste.transforms.PreferenceTransform; /** * <p> * Normalizes preference values for a user by converting them to <a * href="http://mathworld.wolfram.com/z-Score.html">"z-scores"</a>. This process normalizes preference values * to adjust for variation in mean and variance of a user's preferences. * </p> * * <p> * Imagine two users, one who tends to rate every movie he/she sees four or five stars, and another who uses * the full one to five star range when assigning ratings. This transform normalizes away the difference in * scale used by the two users so that both have a mean preference of 0.0 and a standard deviation of 1.0. * </p> */ public final class ZScore implements PreferenceTransform { private final DataModel dataModel; private final Cache<Long,RunningAverageAndStdDev> meanAndStdevs; public ZScore(DataModel dataModel) { this.dataModel = Preconditions.checkNotNull(dataModel); this.meanAndStdevs = new Cache<Long,RunningAverageAndStdDev>(new MeanStdevRetriever()); refresh(null); } @Override public float getTransformedValue(Preference pref) throws TasteException { RunningAverageAndStdDev meanAndStdev = meanAndStdevs.get(pref.getUserID()); if (meanAndStdev.getCount() > 1) { double stdev = meanAndStdev.getStandardDeviation(); if (stdev > 0.0) { return (float) ((pref.getValue() - meanAndStdev.getAverage()) / stdev); } } return 0.0f; } @Override public void refresh(Collection<Refreshable> alreadyRefreshed) { meanAndStdevs.clear(); alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed); RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel); } @Override public String toString() { return "ZScore"; } private class MeanStdevRetriever implements Retriever<Long,RunningAverageAndStdDev> { @Override public RunningAverageAndStdDev get(Long userID) throws TasteException { RunningAverageAndStdDev running = new FullRunningAverageAndStdDev(); PreferenceArray prefs = dataModel.getPreferencesFromUser(userID); int size = prefs.length(); for (int i = 0; i < size; i++) { running.addDatum(prefs.getValue(i)); } return running; } } }