/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.util;
import javax.annotation.Nonnull;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import com.addthis.basis.util.Varint;
import com.addthis.codec.annotations.FieldConfig;
import com.addthis.codec.codables.BytesCodable;
import com.addthis.codec.codables.Codable;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.PooledByteBufAllocator;
import io.netty.buffer.Unpooled;
/**
* Class that helps maintain a top N list for any String Map.
*/
public final class KeyTopper implements Codable, BytesCodable {
private static final byte[] EMPTY = new byte[0];
public KeyTopper() {
}
@FieldConfig(codable = true, required = true)
private HashMap<String, Long> map;
/**
* Minimum value in the data structure. Not serialized
* to byte representation. Regenerated as needed.
*/
@FieldConfig(codable = true)
private long minVal;
/**
* Minimum key in the data structure. Not serialized
* to byte representation. Regenerated as needed.
*/
@FieldConfig(codable = true)
private String minKey;
@FieldConfig(codable = true)
private boolean lossy;
/**
* Error estimates are only supported in the BytesCodable
* serialization format. They are not supported
* in the older serialization format to preserve
* serialization compatibility.
*/
@FieldConfig(codable = false)
private HashMap<String, Long> errors;
@Override
public String toString() {
return "topper(min:" + minKey + "=" + minVal + "->" + map.toString() + ",lossy:" + lossy + ")";
}
public KeyTopper init() {
map = new HashMap<>();
return this;
}
public KeyTopper setLossy(boolean isLossy) {
lossy = isLossy;
return this;
}
public KeyTopper enableErrors(boolean enable) {
if (enable) {
errors = new HashMap<>();
} else {
errors = null;
}
return this;
}
public boolean hasErrors() {
return errors != null;
}
public boolean isLossy() {
return lossy;
}
public int size() {
return map.size();
}
public Long get(@Nonnull String key) {
return map.get(key);
}
/**
* Retrieve an upper bound on the error
* associated with an estimate or null
* if errors have not been enabled.
*
* @param key input key
* @return error estimate or null
*/
public Long getError(@Nonnull String key) {
if (errors == null) {
return null;
}
Long error = errors.get(key);
if (error != null) {
return error;
} else {
return 0L;
}
}
private static final Comparator<Map.Entry<String,Long>> ENTRIES_COMPARATOR =
(arg0, arg1) -> Long.compare(arg1.getValue(), arg0.getValue());
/**
* returns the list sorted by greatest to least count.
*/
@SuppressWarnings("unchecked")
public Map.Entry<String, Long>[] getSortedEntries() {
Map.Entry<String,Long>[] e = new Map.Entry[map.size()];
e = map.entrySet().toArray(e);
Arrays.sort(e, ENTRIES_COMPARATOR);
return e;
}
/**
* Recreate the minimum key and minimum value if the map
* contains one or more elements and current minimum key is null
* or the {@code force} parameter is true. Use {@code force}
* when the minimum key has been evicted from the data structure
* or the count associated with the minimum key has been updated.
*
* Postcondition: Either the top N is empty or the minimum key
* is a non-null value.
*
* @param force if true then always recreate minimum key and value
*/
private void recreateMinimum(boolean force) {
if (map.size() > 0 && (minKey == null || force)) {
minVal = Long.MAX_VALUE;
for (Map.Entry<String, Long> e : this.map.entrySet()) {
if (e.getValue() < minVal) {
minKey = e.getKey();
minVal = e.getValue();
}
}
}
assert((minKey != null) ^ (map.size() == 0));
}
/**
* Adds 'ID' the top N if: 1) there are more empty slots or 2) count >
* smallest top count in the list
*
* @param id
* @return element dropped from top or null if accepted into top with no
* drops
*/
public String increment(@Nonnull String id, int maxsize) {
return increment(id, 1, maxsize);
}
/**
* Adds 'ID' the top N if: 1) there are more empty slots or 2) count >
* smallest top count in the list
* This one increments weight
*
* @param id
* @param weight
* @return element dropped from top or null if accepted into top with no
* drops
*/
public String increment(@Nonnull String id, int weight, int maxsize) {
Long count = map.get(id);
if (count == null) {
if (lossy && map.size() >= maxsize) {
recreateMinimum(false);
count = minVal;
} else {
count = 0L;
}
}
return update(id, count + weight, maxsize);
}
/**
* Increments the count for 'ID' in the top map if 'ID' already exists in
* the map. This method is used if you want to increment a lossy top without
* removing an element. Used when there is a two stage update for new data
* elements
*
* @param id the id to increment if it already exists in the map
* @return whether the element was in the map
*/
public boolean incrementExisting(@Nonnull String id) {
Long value = map.get(id);
if (value != null) {
map.put(id, value + 1L);
if (id.equals(minKey)) {
recreateMinimum(true);
}
return true;
}
return false;
}
/**
* Adds 'id' the top N if: (1) there are more empty slots or
* (2) value > minimum value in the top N.
*
* @param id key to insert or update
* @param value count to associate with the key
* @return element dropped from top or null if accepted into top with no
* drops. returns the offered key if it was rejected for update
* or inclusion in the top.
*/
public String update(@Nonnull String id, long value, int maxsize) {
Preconditions.checkArgument(value >= 0, "Argument was %s but expected nonnegative", value);
Preconditions.checkArgument(maxsize > 0, "Argument was %s but expected positive integer", maxsize);
/** There is guaranteed capacity to update or insert value */
if (map.size() < maxsize) {
map.put(id, value);
/** new minimum key has been identified */
if (value < minVal) {
minKey = id;
minVal = value;
/** recalculate min if the minimum key was updated */
} else if (id.equals(minKey)) {
recreateMinimum(true);
}
return null;
}
/** compute minimum key and value if they are missing */
recreateMinimum(false);
/** insert or update key. Evict if necessary */
if (value >= minVal) {
String result = null;
/** only remove if topN is full and we're not updating an existing entry */
boolean remove = !map.containsKey(id) && (minKey != null);
if (remove) {
map.remove(minKey);
if (hasErrors()) {
errors.remove(minKey);
errors.put(id, minVal);
}
result = minKey;
}
/** update or add entry */
map.put(id, value);
/** recalculate min *only* if the min entry was removed or updated */
if (remove || id.equals(minKey)) {
recreateMinimum(true);
}
return result;
}
/** not eligible for top */
else {
return id;
}
}
/**
* Encode the data structure into a serialized representation.
* Encode the number of elements followed by each (key, value)
* pair. If the error estimation is used then encode the special
* byte value 0 (since we will never encode 0 as the size
* of a non-empty map) at the head of the byte array.
* @param version
* @return
*/
@Override public byte[] bytesEncode(long version) {
if (map.size() == 0) {
return EMPTY;
}
byte[] retBytes = null;
ByteBuf byteBuf = PooledByteBufAllocator.DEFAULT.buffer();
try {
if (hasErrors()) {
byteBuf.writeByte(0);
}
Varint.writeUnsignedVarInt(map.size(), byteBuf);
for (Map.Entry<String, Long> mapEntry : map.entrySet()) {
String key = mapEntry.getKey();
if (key == null) {
throw new NullPointerException("KeyTopper decoded null key");
}
byte[] keyBytes = key.getBytes("UTF-8");
Varint.writeUnsignedVarInt(keyBytes.length, byteBuf);
byteBuf.writeBytes(keyBytes);
Varint.writeUnsignedVarLong(mapEntry.getValue(), byteBuf);
if (hasErrors()) {
Long error = errors.get(key);
if (error != null) {
Varint.writeUnsignedVarLong(error, byteBuf);
} else {
Varint.writeUnsignedVarLong(0, byteBuf);
}
}
}
retBytes = new byte[byteBuf.readableBytes()];
byteBuf.readBytes(retBytes);
} catch (UnsupportedEncodingException e) {
throw Throwables.propagate(e);
} finally {
byteBuf.release();
}
return retBytes;
}
@Override
public void bytesDecode(byte[] b, long version) {
errors = null;
if (b.length == 0) {
map = new HashMap<>();
return;
}
ByteBuf byteBuf = Unpooled.wrappedBuffer(b);
try {
byte marker = byteBuf.getByte(byteBuf.readerIndex());
if (marker == 0) {
errors = new HashMap<>();
// Consume the sentinel byte value
byteBuf.readByte();
}
int mapSize = Varint.readUnsignedVarInt(byteBuf);
try {
if (mapSize > 0) {
map = new HashMap<>(mapSize);
for (int i = 0; i < mapSize; i++) {
int keyLength = Varint.readUnsignedVarInt(byteBuf);
byte[] keybytes = new byte[keyLength];
byteBuf.readBytes(keybytes);
String k = new String(keybytes, "UTF-8");
long value = Varint.readUnsignedVarLong(byteBuf);
map.put(k, value);
if (hasErrors()) {
long error = Varint.readUnsignedVarLong(byteBuf);
if (error != 0) {
errors.put(k, error);
}
}
}
} else {
map = new HashMap<>();
}
} catch (Exception e) {
throw Throwables.propagate(e);
}
} finally {
byteBuf.release();
}
}
public long getMinVal() {
return minVal;
}
public String getMinKey() {
return minKey;
}
}