package org.streaminer.stream.frequency;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.streaminer.stream.frequency.util.CountEntry;
/**
*
* This is a simple implementation of a stream-counting model. The model is
* updatable and will - for a given threshold <code>k</code> - approximate the
* counts of the top-k elements within a example-set/stream.
*
* @author Christian Bockermann <christian.bockermann@udo.edu>
*
*/
public class SimpleTopKCounting<T> extends BaseFrequency<T> {
private static final long serialVersionUID = 4365995573179300743L;
private static final Logger LOG = LoggerFactory.getLogger(SimpleTopKCounting.class);
/** This map holds the list of monitored items and their counters */
private HashMap<T, Long> dataStructure;
/**
* This is the maximum number of observed items within the
* stream/example-set
*/
private int k;
/** The number of elements that have been processed */
private Long elementsCounted = 0L;
/**
* This initially creates a counting model for streams. The model will not
* use more than the last <code>k</code> elements in order to approximate
* the item counts within the stream.
* @param k The maximum number of items that may be tracked/monitored by
* the model.
*/
public SimpleTopKCounting(int k) {
this.k = k;
this.dataStructure = new HashMap<T, Long>();
LOG.debug("Creating top-k counter with k = {}", k);
}
/**
* This method actually does all the work when learning from the stream. It
* will update the inner structures to reflect the incoporation of the given
* example.
*
* @param item
* @param incrementCount
* @return
* @throws org.streaminer.stream.frequency.FrequencyException
*/
@Override
public boolean add(T item, long incrementCount) throws FrequencyException {
boolean newItem = true;
elementsCounted++;
if (elementsCounted % 100 == 0)
LOG.debug(" space used: {}/{}", dataStructure.size(), k);
// is the element already in the list of our top-k monitored items?
if (dataStructure.get(item) != null) {
LOG.debug("Incrementing count of top-k element {}", item);
// LogService.getGlobal().logNote( "Current top-k list is:\n" +
// this.toResultString() );
Long count = dataStructure.get(item) + incrementCount;
dataStructure.put(item, count);
newItem = false;
} else {
// we must not monitor more than k elements
if (dataStructure.size() >= k) {
LOG.debug("Need to replace the most in-frequent top-k element with {}", item);
// LogService.getGlobal().logNote("Current top-k list is:\n" +
// this.toResultString() );
//
// find the one with the smallest count and replace it
//
Long min = 0L;
T leastElement = null;
for (T key : dataStructure.keySet()) {
if (leastElement == null) {
min = dataStructure.get(key);
leastElement = key;
} else {
if (dataStructure.get(key) < min) {
min = dataStructure.get(key);
leastElement = key;
}
}
}
Long newCount = min + incrementCount;
dataStructure.remove(leastElement);
dataStructure.put(item, newCount);
} else {
// ok, there is space left in our monitor-list
LOG.debug("Enough space to add new element {}", item);
LOG.debug(" space used: {}/{}", dataStructure.size(), k);
if (dataStructure.get(item) != null)
LOG.warn("Overwriting existing element with count {}", dataStructure.get(item));
dataStructure.put(item, 1L);
}
}
return newItem;
}
@Override
public long estimateCount(T item) {
if (dataStructure.containsKey(item))
return dataStructure.get(item);
return 0L;
}
public boolean contains(T item) {
return dataStructure.containsKey(item);
}
/**
* @return
* @see stream.counter.CountModel#getTotalCount()
*/
@Override
public long size() {
return elementsCounted;
}
/**
* @return
* @see stream.counter.CountModel#keySet()
*/
@Override
public Set<T> keySet() {
return dataStructure.keySet();
}
public List<CountEntry<T>> getFrequentItems(double minSupport) {
List<CountEntry<T>> result = new ArrayList<CountEntry<T>>();
for (Map.Entry<T, Long> entry : dataStructure.entrySet()) {
result.add(new CountEntry<T>(entry.getKey(), entry.getValue()));
}
return result;
}
}