/* * Copyright (C) 2015 Information Retrieval Group at Universidad Autónoma * de Madrid, http://ir.ii.uam.es * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package es.uam.eps.ir.ranksys.nn.sim; import es.uam.eps.ir.ranksys.fast.preference.FastPreferenceData; import es.uam.eps.ir.ranksys.fast.preference.IdxPref; import it.unimi.dsi.fastutil.ints.Int2IntMap; import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; import java.util.function.IntToDoubleFunction; import static java.util.stream.IntStream.range; import java.util.stream.Stream; import org.ranksys.core.util.tuples.Tuple2id; import static org.ranksys.core.util.tuples.Tuples.tuple; /** * Set similarity. Based on the intersection of item/user profiles as sets. * * @author Saúl Vargas (saul.vargas@uam.es) */ public abstract class SetSimilarity implements Similarity { /** * User-item preferences. */ protected final FastPreferenceData<?, ?> data; /** * If true dense vectors are used to calculate similarities. */ protected final boolean dense; /** * Constructor. * * @param data preference data * @param dense true for array-based calculations, false to map-based */ public SetSimilarity(FastPreferenceData<?, ?> data, boolean dense) { this.data = data; this.dense = dense; } @Override public IntToDoubleFunction similarity(int idx1) { IntSet set = new IntOpenHashSet(); data.getUidxPreferences(idx1).map(IdxPref::v1).forEach(set::add); return idx2 -> { int coo = (int) data.getUidxPreferences(idx2) .map(IdxPref::v1) .filter(set::contains) .count(); return sim(coo, set.size(), data.numItems(idx2)); }; } private Int2IntMap getIntersectionMap(int idx1) { Int2IntOpenHashMap intersectionMap = new Int2IntOpenHashMap(); intersectionMap.defaultReturnValue(0); data.getUidxPreferences(idx1) .forEach(ip -> data.getIidxPreferences(ip.v1) .forEach(up -> intersectionMap.addTo(up.v1, 1))); intersectionMap.remove(idx1); return intersectionMap; } private int[] getIntersectionArray(int idx1) { int[] intersectionMap = new int[data.numUsers()]; data.getUidxPreferences(idx1) .forEach(ip -> data.getIidxPreferences(ip.v1) .forEach(up -> intersectionMap[up.v1]++)); intersectionMap[idx1] = 0; return intersectionMap; } private Int2IntMap getFasterIntersectionMap(int uidx) { Int2IntOpenHashMap intersectionMap = new Int2IntOpenHashMap(); intersectionMap.defaultReturnValue(0); IntIterator iidxs = data.getUidxIidxs(uidx); while (iidxs.hasNext()) { IntIterator vidxs = data.getIidxUidxs(iidxs.nextInt()); while (vidxs.hasNext()) { intersectionMap.addTo(vidxs.nextInt(), 1); } } intersectionMap.remove(uidx); return intersectionMap; } private int[] getFasterIntersectionArray(int uidx) { int[] intersectionMap = new int[data.numUsers()]; IntIterator iidxs = data.getUidxIidxs(uidx); while (iidxs.hasNext()) { IntIterator vidxs = data.getIidxUidxs(iidxs.nextInt()); while (vidxs.hasNext()) { intersectionMap[vidxs.nextInt()]++; } } intersectionMap[uidx] = 0; return intersectionMap; } @Override public Stream<Tuple2id> similarElems(int idx1) { int na = data.numItems(idx1); if (data.useIteratorsPreferentially()) { if (dense) { int[] intersectionMap = getFasterIntersectionArray(idx1); return range(0, intersectionMap.length) .filter(i -> intersectionMap[i] != 0) .mapToObj(i -> tuple(i, sim(intersectionMap[i], na, data.numItems(i)))); } else { return getFasterIntersectionMap(idx1).int2IntEntrySet().stream() .map(e -> { int idx2 = e.getIntKey(); int coo = e.getIntValue(); int nb = data.numItems(idx2); return tuple(idx2, sim(coo, na, nb)); }); } } else { if (dense) { int[] intersectionMap = getIntersectionArray(idx1); return range(0, intersectionMap.length) .filter(i -> intersectionMap[i] != 0) .mapToObj(i -> tuple(i, sim(intersectionMap[i], na, data.numItems(i)))); } else { return getIntersectionMap(idx1).int2IntEntrySet().stream() .map(e -> { int idx2 = e.getIntKey(); int coo = e.getIntValue(); int nb = data.numItems(idx2); return tuple(idx2, sim(coo, na, nb)); }); } } } /** * Calculates the similarity value. * * @param intersectionSize size of the intersection of sets * @param na size of the first set * @param nb size of the second set * @return similarity value */ protected abstract double sim(int intersectionSize, int na, int nb); }