/* * Seldon -- open source prediction engine * ======================================= * Copyright 2011-2015 Seldon Technologies Ltd and Rummble Ltd (http://www.seldon.io/) * ********************************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ********************************************************************************************** */ package io.seldon.stream.itemsim; import io.seldon.stream.itemsim.minhash.Hasher; import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; public class RollingWindowedMinHash implements MinHasher { List<MinHashEntry> hashes = new ArrayList<>(); Hasher hasher; long window; public RollingWindowedMinHash(Hasher h,long window) { this.hasher = h; this.window = window; } @Override public synchronized void add(long id,long time) { long hash = hasher.hash(id); int idx = hashes.size() - 1; int cSum = 0; while(idx >= 0 && hash < hashes.get(idx).minHash) { cSum += hashes.get(idx).count; hashes.remove(idx); idx--; } if (idx > -1) { int lastIdx = hashes.size() - 1; if (hash == hashes.get(lastIdx).minHash) { hashes.get(lastIdx).time = time; hashes.get(lastIdx).count = hashes.get(lastIdx).count + cSum + 1; } else hashes.add(new MinHashEntry(hash,time,cSum+1)); } else { hashes = new ArrayList<>(); hashes.add(new MinHashEntry(hash,time,cSum + 1)); } this.removeOldEntries(time); //System.out.println(hashes.size()); } private void removeOldEntries(long time) { final long start_t = time - window; hashes.removeIf(new Predicate<MinHashEntry>() { @Override public boolean test(MinHashEntry t) { return (t.time <= start_t); } }); } public static class MinHashEntry { long minHash; long time; int count; public MinHashEntry(long minHash, long time, int count) { super(); this.minHash = minHash; this.time = time; this.count = count; } @Override public String toString() { return "MinHashEntry [minHash=" + minHash + ", time=" + time + ", count=" + count + "]"; } } @Override public synchronized Long getMinHash(long time) { this.removeOldEntries(time); if (hashes.size() > 0) return hashes.get(0).minHash; else return null; } @Override public synchronized int getCount(long time) { this.removeOldEntries(time); int sum = 0; for(MinHashEntry e : hashes) { sum += e.count; } return sum; } @Override public String toString() { return "RollingWindowedMinHash [hashes=" + hashes + ", window=" + window + "]"; } }