/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math.hadoop.similarity.cooccurrence.measures;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.junit.Test;
public class VectorSimilarityMeasuresTest extends MahoutTestCase {
static double distributedSimilarity(double[] one,
double[] two,
Class<? extends VectorSimilarityMeasure> similarityMeasureClass) {
VectorSimilarityMeasure similarityMeasure = ClassUtils.instantiateAs(similarityMeasureClass,
VectorSimilarityMeasure.class);
Vector oneNormalized = similarityMeasure.normalize(asSparseVector(one));
Vector twoNormalized = similarityMeasure.normalize(asSparseVector(two));
double normOne = similarityMeasure.norm(oneNormalized);
double normTwo = similarityMeasure.norm(twoNormalized);
double dot = 0;
for (int n = 0; n < one.length; n++) {
if (oneNormalized.get(n) != 0 && twoNormalized.get(n) != 0) {
dot += similarityMeasure.aggregate(oneNormalized.get(n), twoNormalized.get(n));
}
}
return similarityMeasure.similarity(dot, normOne, normTwo, one.length);
}
static Vector asSparseVector(double[] values) {
Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE);
for (int dim = 0; dim < values.length; dim++) {
if (values[dim] != 0) {
vector.setQuick(dim, values[dim]);
}
}
return vector;
}
@Test
public void testCooccurrenceCountSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0 },
new double[] { 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, CooccurrenceCountSimilarity.class);
assertEquals(5.0, similarity, 0);
}
@Test
public void testTanimotoCoefficientSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0 },
new double[] { 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, TanimotoCoefficientSimilarity.class);
assertEquals(0.454545455, similarity, EPSILON);
}
@Test
public void testCityblockSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0 },
new double[] { 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, CityBlockSimilarity.class);
assertEquals(0.142857143, similarity, EPSILON);
}
@Test
public void testLoglikelihoodSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0 },
new double[] { 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, LoglikelihoodSimilarity.class);
assertEquals(0.03320155369284261, similarity, EPSILON);
}
@Test
public void testCosineSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 2, 0, 0, 8, 3, 0, 6, 0, 1, 2, 2, 0 },
new double[] { 3, 0, 0, 0, 7, 0, 2, 2, 1, 3, 2, 1, 1 }, CosineSimilarity.class);
assertEquals(0.769846046, similarity, EPSILON);
}
@Test
public void testPearsonCorrelationSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 2, 0, 0, 8, 3, 0, 6, 0, 1, 1, 2, 1 },
new double[] { 3, 0, 0, 0, 7, 0, 2, 2, 1, 3, 2, 4, 3 }, PearsonCorrelationSimilarity.class);
assertEquals(0.5303300858899108, similarity, EPSILON);
}
@Test
public void testEuclideanDistanceSimilarity() {
double similarity = distributedSimilarity(
new double[] { 0, 2, 0, 0, 8, 3, 0, 6, 0, 1, 1, 2, 1 },
new double[] { 3, 0, 0, 0, 7, 0, 2, 2, 1, 3, 2, 4, 4 }, EuclideanDistanceSimilarity.class);
assertEquals(0.11268865367232477, similarity, EPSILON);
}
}