/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.impl.recommender.knn;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.common.LongPair;
/**
* <p>
* The weights to compute the final predicted preferences are calculated using linear interpolation, through
* an {@link Optimizer}. This algorithm is based in the paper of Robert M. Bell and Yehuda Koren in ICDM '07.
* </p>
*/
public final class KnnItemBasedRecommender extends GenericItemBasedRecommender {
private static final double BETA = 500.0;
private final Optimizer optimizer;
private final int neighborhoodSize;
public KnnItemBasedRecommender(DataModel dataModel,
ItemSimilarity similarity,
Optimizer optimizer,
CandidateItemsStrategy candidateItemsStrategy,
MostSimilarItemsCandidateItemsStrategy mostSimilarItemsCandidateItemsStrategy,
int neighborhoodSize) {
super(dataModel, similarity, candidateItemsStrategy, mostSimilarItemsCandidateItemsStrategy);
this.optimizer = optimizer;
this.neighborhoodSize = neighborhoodSize;
}
public KnnItemBasedRecommender(DataModel dataModel,
ItemSimilarity similarity,
Optimizer optimizer,
int neighborhoodSize) {
this(dataModel, similarity, optimizer, getDefaultCandidateItemsStrategy(),
getDefaultMostSimilarItemsCandidateItemsStrategy(), neighborhoodSize);
}
private List<RecommendedItem> mostSimilarItems(long itemID,
LongPrimitiveIterator possibleItemIDs,
int howMany,
Rescorer<LongPair> rescorer) throws TasteException {
TopItems.Estimator<Long> estimator = new MostSimilarEstimator(itemID, getSimilarity(), rescorer);
return TopItems.getTopItems(howMany, possibleItemIDs, null, estimator);
}
private double[] getInterpolations(long itemID,
long[] itemNeighborhood,
Collection<Long> usersRatedNeighborhood) throws TasteException {
int length = 0;
for (int i = 0; i < itemNeighborhood.length; i++) {
if (itemNeighborhood[i] == itemID) {
itemNeighborhood[i] = -1;
length = itemNeighborhood.length - 1;
break;
}
}
int k = length;
double[][] aMatrix = new double[k][k];
double[] b = new double[k];
int i = 0;
DataModel dataModel = getDataModel();
int numUsers = usersRatedNeighborhood.size();
for (long iitem : itemNeighborhood) {
if (iitem == -1) {
break;
}
int j = 0;
double value = 0.0;
for (long jitem : itemNeighborhood) {
if (jitem == -1) {
continue;
}
for (long user : usersRatedNeighborhood) {
float prefVJ = dataModel.getPreferenceValue(user, iitem);
float prefVK = dataModel.getPreferenceValue(user, jitem);
value += prefVJ * prefVK;
}
aMatrix[i][j] = value/numUsers;
j++;
}
i++;
}
i = 0;
for (long jitem : itemNeighborhood) {
if (jitem == -1) {
break;
}
double value = 0.0;
for (long user : usersRatedNeighborhood) {
float prefVJ = dataModel.getPreferenceValue(user, jitem);
float prefVI = dataModel.getPreferenceValue(user, itemID);
value += prefVJ * prefVI;
}
b[i] = value / numUsers;
i++;
}
// Find the larger diagonal and calculate the average
double avgDiagonal = 0.0;
if (k > 1) {
double diagonalA = 0.0;
for (i = 0; i < k; i++) {
diagonalA += aMatrix[i][i];
}
double diagonalB = 0.0;
for (i = k - 1; i >= 0; i--) {
for (int j = 0; j < k; j++) {
diagonalB += aMatrix[i--][j];
}
}
avgDiagonal = Math.max(diagonalA, diagonalB) / k;
}
// Calculate the average of non-diagonal values
double avgMatrixA = 0.0;
double avgVectorB = 0.0;
for (i = 0; i < k; i++) {
for (int j = 0; j < k; j++) {
if (i != j || k <= 1) {
avgMatrixA += aMatrix[i][j];
}
}
avgVectorB += b[i];
}
if (k > 1) {
avgMatrixA /= k * k - k;
}
avgVectorB /= k;
double numUsersPlusBeta = numUsers + BETA;
for (i = 0; i < k; i++) {
for (int j = 0; j < k; j++) {
double average;
if (i == j && k > 1) {
average = avgDiagonal;
} else {
average = avgMatrixA;
}
aMatrix[i][j] = (numUsers * aMatrix[i][j] + BETA * average) / numUsersPlusBeta;
}
b[i] = (numUsers * b[i] + BETA * avgVectorB) / numUsersPlusBeta;
}
return optimizer.optimize(aMatrix, b);
}
@Override
protected float doEstimatePreference(long theUserID, PreferenceArray preferencesFromUser, long itemID)
throws TasteException {
DataModel dataModel = getDataModel();
int size = preferencesFromUser.length();
FastIDSet possibleItemIDs = new FastIDSet(size);
for (int i = 0; i < size; i++) {
possibleItemIDs.add(preferencesFromUser.getItemID(i));
}
possibleItemIDs.remove(itemID);
List<RecommendedItem> mostSimilar = mostSimilarItems(itemID, possibleItemIDs.iterator(),
neighborhoodSize, null);
long[] theNeighborhood = new long[mostSimilar.size() + 1];
theNeighborhood[0] = -1;
List<Long> usersRatedNeighborhood = new ArrayList<Long>();
int nOffset = 0;
for (RecommendedItem rec : mostSimilar) {
theNeighborhood[nOffset++] = rec.getItemID();
}
if (!mostSimilar.isEmpty()) {
theNeighborhood[mostSimilar.size()] = itemID;
for (int i = 0; i < theNeighborhood.length; i++) {
PreferenceArray usersNeighborhood = dataModel.getPreferencesForItem(theNeighborhood[i]);
int size1 = usersRatedNeighborhood.isEmpty() ? usersNeighborhood.length() : usersRatedNeighborhood.size();
for (int j = 0; j < size1; j++) {
if (i == 0) {
usersRatedNeighborhood.add(usersNeighborhood.getUserID(j));
} else {
if (j >= usersRatedNeighborhood.size()) {
break;
}
long index = usersRatedNeighborhood.get(j);
if (!usersNeighborhood.hasPrefWithUserID(index) || index == theUserID) {
usersRatedNeighborhood.remove(index);
j--;
}
}
}
}
}
double[] weights = null;
if (!mostSimilar.isEmpty()) {
weights = getInterpolations(itemID, theNeighborhood, usersRatedNeighborhood);
}
int i = 0;
double preference = 0.0;
double totalSimilarity = 0.0;
for (long jitem : theNeighborhood) {
Float pref = dataModel.getPreferenceValue(theUserID, jitem);
if (pref != null) {
double weight = weights[i];
preference += pref * weight;
totalSimilarity += weight;
}
i++;
}
return totalSimilarity == 0.0 ? Float.NaN : (float) (preference / totalSimilarity);
}
}