/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.filter.value;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import com.addthis.codec.annotations.FieldConfig;
/**
* This {@link AbstractValueFilter ValueFilter} <span class="hydra-summary">filters values that occur fewer than {@link #minHits minHits} times
* or more than {@link #maxHits maxHits} times</span>.
* <p/>
* <p>A value is emitted only after it has been observed for {@link #minHits minHits} instances.
* The value will continue to be emitted until it has been observed for {@link #maxHits maxHits} instances.
* At most {@link #maxKeys maxKeys} can be tracked at any one time. Assigning {@link #maxHits maxHits}
* a value of 0 will disable filtering on the upper bound. Assigning {@link #maxKeys maxKeys} a value
* of 0 will disable the bound on the number of unique keys. The {@link #whitelist whitelist} can be used
* to specify values that are never to be filtered.</p>
* <p>Example:</p>
* <pre>
* {from:"IP", band-pass {maxKeys:100000, minHits:10, maxHits:100}}
* </pre>
*
* @user-reference
*/
@SuppressWarnings("serial")
public class ValueFilterBandPass extends StringFilter {
/**
* The minimum number of times a value is observed before it is emitted. Default is 0.
*/
@FieldConfig(codable = true)
private int minHits;
/**
* The maximum number of times a value can be observed. Default is 0 which disables the upper
* bound.
*/
@FieldConfig(codable = true)
private int maxHits;
/**
* The maximum number of unique values that can be tracked.
* When this threshold is exceeded, then the oldest observed value
* resets its count information. Default is 0 which allows an unbounded number of keys.
*/
@FieldConfig(codable = true)
private int maxKeys;
/**
* Stores a set of values that are not to be filtered.
*/
@FieldConfig(codable = true)
private HashSet<String> whitelist;
private LinkedHashMap<String, Integer> map = new LinkedHashMap<String, Integer>() {
protected boolean removeEldestEntry(Map.Entry<String, Integer> eldest) {
return maxKeys > 0 && size() > maxKeys;
}
};
@Override
public String filter(String value) {
if (value == null) {
return value;
}
if (whitelist != null && whitelist.contains(value)) {
return value;
}
synchronized (map) {
Integer i = map.get(value);
if (i == null) {
i = 0;
}
map.put(value, i + 1);
return (minHits == 0 || i >= minHits) && (maxHits == 0 || i < maxHits) ? value : null;
}
}
}