/*
* Copyright (C) 2016 RankSys http://ranksys.org
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
package es.uam.eps.ir.ranksys.nn.sim;
import es.uam.eps.ir.ranksys.fast.preference.FastPreferenceData;
import static java.lang.Math.pow;
import static java.lang.Math.sqrt;
import static org.apache.mahout.math.stats.LogLikelihood.logLikelihoodRatio;
/**
* Static methods from constructing similarities.
*
* @author Saúl Vargas (Saul@VargasSandoval.es)
*/
public class Similarities {
/**
* Set cosine similarity.
*
* @param preferences preference data
* @param dense true for array-based calculations, false to map-based
* @param alpha asymmetry factor, set to 0.5 to standard cosine.
* @return similarity
*/
public static SetSimilarity setCosine(FastPreferenceData<?, ?> preferences, boolean dense, double alpha) {
return new SetSimilarity(preferences, dense) {
@Override
protected double sim(int intersectionSize, int na, int nb) {
return intersectionSize / (pow(na, alpha) * pow(nb, 1.0 - alpha));
}
};
}
/**
* Set Jaccard similarity.
*
* @param preferences preference data
* @param dense true for array-based calculations, false to map-based
* @return similarity
*/
public static SetSimilarity setJaccard(FastPreferenceData<?, ?> preferences, boolean dense) {
return new SetSimilarity(preferences, dense) {
@Override
protected double sim(int intersectionSize, int na, int nb) {
return intersectionSize / (double) (na + nb - intersectionSize);
}
};
}
/**
* Vector cosine similarity.
*
* @param preferences preference data
* @param dense true for array-based calculations, false to map-based
* @return similarity
*/
public static VectorSimilarity vectorCosine(FastPreferenceData<?, ?> preferences, boolean dense) {
return new VectorSimilarity(preferences, dense) {
@Override
protected double sim(double product, double norm2A, double norm2B) {
return product / sqrt(norm2A * norm2B);
}
};
}
/**
* Vector Jaccard similarity.
*
* @param preferences preference data
* @param dense true for array-based calculations, false to map-based
* @return similarity
*/
public static VectorSimilarity vectorJaccard(FastPreferenceData<?, ?> preferences, boolean dense) {
return new VectorSimilarity(preferences, dense) {
@Override
protected double sim(double product, double norm2A, double norm2B) {
return product / (norm2A + norm2B - product);
}
};
}
/**
* Log likelihood similarity.
*
* @param preferences preference data
* @param dense true for array-based calculations, false to map-based
* @return similarity
*/
public static SetSimilarity logLikelihood(FastPreferenceData<?, ?> preferences, boolean dense) {
return new SetSimilarity(preferences, dense) {
@Override
protected double sim(int intersectionSize, int na, int nb) {
double logLikelihood = logLikelihoodRatio((long) intersectionSize,
(long) (nb - intersectionSize),
(long) (na - intersectionSize),
(long) (data.numItems() - na - nb + intersectionSize));
return 1.0 - 1.0 / (1.0 + logLikelihood);
}
};
}
}