/* * Copyright (C) 2016 RankSys http://ranksys.org * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package es.uam.eps.ir.ranksys.nn.sim; import es.uam.eps.ir.ranksys.fast.preference.FastPreferenceData; import static java.lang.Math.pow; import static java.lang.Math.sqrt; import static org.apache.mahout.math.stats.LogLikelihood.logLikelihoodRatio; /** * Static methods from constructing similarities. * * @author Saúl Vargas (Saul@VargasSandoval.es) */ public class Similarities { /** * Set cosine similarity. * * @param preferences preference data * @param dense true for array-based calculations, false to map-based * @param alpha asymmetry factor, set to 0.5 to standard cosine. * @return similarity */ public static SetSimilarity setCosine(FastPreferenceData<?, ?> preferences, boolean dense, double alpha) { return new SetSimilarity(preferences, dense) { @Override protected double sim(int intersectionSize, int na, int nb) { return intersectionSize / (pow(na, alpha) * pow(nb, 1.0 - alpha)); } }; } /** * Set Jaccard similarity. * * @param preferences preference data * @param dense true for array-based calculations, false to map-based * @return similarity */ public static SetSimilarity setJaccard(FastPreferenceData<?, ?> preferences, boolean dense) { return new SetSimilarity(preferences, dense) { @Override protected double sim(int intersectionSize, int na, int nb) { return intersectionSize / (double) (na + nb - intersectionSize); } }; } /** * Vector cosine similarity. * * @param preferences preference data * @param dense true for array-based calculations, false to map-based * @return similarity */ public static VectorSimilarity vectorCosine(FastPreferenceData<?, ?> preferences, boolean dense) { return new VectorSimilarity(preferences, dense) { @Override protected double sim(double product, double norm2A, double norm2B) { return product / sqrt(norm2A * norm2B); } }; } /** * Vector Jaccard similarity. * * @param preferences preference data * @param dense true for array-based calculations, false to map-based * @return similarity */ public static VectorSimilarity vectorJaccard(FastPreferenceData<?, ?> preferences, boolean dense) { return new VectorSimilarity(preferences, dense) { @Override protected double sim(double product, double norm2A, double norm2B) { return product / (norm2A + norm2B - product); } }; } /** * Log likelihood similarity. * * @param preferences preference data * @param dense true for array-based calculations, false to map-based * @return similarity */ public static SetSimilarity logLikelihood(FastPreferenceData<?, ?> preferences, boolean dense) { return new SetSimilarity(preferences, dense) { @Override protected double sim(int intersectionSize, int na, int nb) { double logLikelihood = logLikelihoodRatio((long) intersectionSize, (long) (nb - intersectionSize), (long) (na - intersectionSize), (long) (data.numItems() - na - nb + intersectionSize)); return 1.0 - 1.0 / (1.0 + logLikelihood); } }; } }