package org.streaminer.stream.frequency;
import org.streaminer.stream.frequency.util.CountEntry;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
/**
* <p>
* A naive counter implementation. Simply contains a counter
* for each element which will be incremented in a deterministic way within the learn
* method. Of course the purpose of this implementation is not a "live environment". Instead
* it is intended to be used as "the truth" for evaluation intents.
* </p>
*
* @author Benedikt Kulmann, office@kulmann.biz
* @param <T>
*/
public class RealCounting<T> extends BaseFrequency<T> {
/**
* Top-K parameter for the invokation of {@link #getTopK()}.
*/
private int k;
/**
* Threshold value for the invokation of {@link #getFrequentItems()}.
*/
private double minSupport;
/**
* Total number of occurences of all elements so far.
*/
private long elementsCounted;
/**
* Internal data structure for the count frequencies of each element.
*/
private Map<T, CountEntry<T>> dataStructure;
public RealCounting() {
this(0.0);
}
public RealCounting(double minSupport) {
this(minSupport, 0);
}
public RealCounting(int k) {
this(0.0, k);
}
public RealCounting(double minSupport, int k) {
super(minSupport);
this.minSupport = minSupport;
this.k = k;
elementsCounted = 0;
dataStructure = new ConcurrentHashMap<T, CountEntry<T>>();
}
@Override
public boolean add(T item, long incrementCount) {
if(containsItem(item)) {
incrementCount(item, incrementCount);
return false;
} else {
insertItem(item, incrementCount);
return true;
}
}
@Override
public long estimateCount(T item) {
if (dataStructure.containsKey(item)) {
return dataStructure.get(item).frequency;
} else {
return 0L;
}
}
public boolean contains(T item) {
return dataStructure.containsKey(item);
}
@Override
public long size() {
return elementsCounted;
}
@Override
public Set<T> keySet() {
return dataStructure.keySet();
}
public List<CountEntry<T>> getFrequentItems(double minSupport) {
List<CountEntry<T>> frequentItems = new ArrayList<CountEntry<T>>();
for (CountEntry<T> entry : dataStructure.values()) {
if (isFrequent(entry.frequency, minSupport)) {
frequentItems.add(entry);
}
}
return frequentItems;
}
/**
* Returns whether the internal data structure already contains a counter for
* the provided item.
*
* @param item The item for which the existence of a counter is in question.
* @return <code>true</code> if a counter for the provided item already exists, false otherwise.
*/
private boolean containsItem(T item) {
return dataStructure.containsKey(item);
}
/**
* Increments the counter of the provided item by 1.
*
* @param item Ttem in question.
*/
private void incrementCount(T item, long incrementCount) {
dataStructure.get(item).frequency += incrementCount;
elementsCounted++;
}
/**
* Creates a counter for the provided item and sets its initial frequency to 1.
*
* @param item The item to insert into the internal data structure.
*/
private void insertItem(T item, long incrementCount) {
dataStructure.put(item, new CountEntry<T>(item, incrementCount));
elementsCounted++;
}
/**
* Determines whether a frequency is currently (i.e. in relation to the current total number
* of elements) said to be frequent given a specific threshold.
*
* @param frequency The frequency in question
* @param minSupport The threshold for determining whether a frequency is deemed to be frequent
* @return
*/
private boolean isFrequent(long frequency, double minSupport) {
return frequency >= minSupport * elementsCounted;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("RealCountingModel[");
for(T key : dataStructure.keySet()) {
sb.append(key).append(" ").append(dataStructure.get(key)).append(";");
}
sb.append("]");
return sb.toString();
}
}