package ldbc.snb.datagen.generator.tools; import ldbc.snb.datagen.objects.Knows; import java.util.ArrayList; import java.util.Random; import java.util.Set; /** * Created by aprat on 17/06/15. */ public class MinHash<T> { private int numHashes_ = 0; private int a[]; private int b[]; private int p[]; private Random random_; public MinHash( int numHashes, int seed ) { numHashes_ = numHashes; random_ =new Random(); a = new int[numHashes]; b = new int[numHashes]; p = new int[numHashes]; for(int i = 0; i < numHashes; ++i ) { a[i] = random_.nextInt(); b[i] = random_.nextInt(); p[i] = random_.nextInt(); } } public ArrayList<Long> minHash( Set<Long> set ) { ArrayList<Long> minHashes = new ArrayList<Long>(); for(int i = 0; i < numHashes_; ++i ) { long min = Long.MAX_VALUE; long minl = 0; for( Long l : set ) { long hash = (a[i]*l+b[i]) % p[i]; if(hash <= min) { min = hash; minl = l; } } minHashes.add(minl); } return minHashes; } }