/*************************************************************************** * Copyright (C) 2011 by H-Store Project * * Brown University * * Massachusetts Institute of Technology * * Yale University * * * * http://hstore.cs.brown.edu/ * * * * Permission is hereby granted, free of charge, to any person obtaining * * a copy of this software and associated documentation files (the * * "Software"), to deal in the Software without restriction, including * * without limitation the rights to use, copy, modify, merge, publish, * * distribute, sublicense, and/or sell copies of the Software, and to * * permit persons to whom the Software is furnished to do so, subject to * * the following conditions: * * * * The above copyright notice and this permission notice shall be * * included in all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * * OTHER DEALINGS IN THE SOFTWARE. * ***************************************************************************/ package edu.brown.statistics; import java.io.File; import java.io.IOException; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.log4j.Logger; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONStringer; import org.voltdb.VoltType; import org.voltdb.VoltTypeException; import org.voltdb.catalog.Database; import org.voltdb.utils.VoltTypeUtil; import edu.brown.utils.CollectionUtil; import edu.brown.utils.JSONUtil; /** * A very nice and simple generic Histogram * * @author svelagap * @author pavlo */ public class ObjectHistogram<X> implements Histogram<X> { private static final Logger LOG = Logger.getLogger(ObjectHistogram.class); public enum Members { VALUE_TYPE, HISTOGRAM, NUM_SAMPLES, KEEP_ZERO_ENTRIES, } protected VoltType value_type = VoltType.INVALID; protected final Map<X, Long> histogram = new HashMap<X, Long>(); protected int num_samples = 0; private transient boolean dirty = false; private transient Map<Object, String> debug_names; private transient boolean debug_percentages = false; /** * The Min/Max values are the smallest/greatest values we have seen based * on some natural ordering */ private transient Comparable<X> min_value; protected transient Comparable<X> max_value; /** * The Min/Max counts are the values that have the smallest/greatest number of * occurrences in the histogram */ protected transient long min_count = 0; protected transient List<X> min_count_values; protected transient long max_count = 0; protected transient List<X> max_count_values; /** * A switchable flag that determines whether non-zero entries are kept or removed */ protected boolean keep_zero_entries = false; /** * Constructor */ public ObjectHistogram() { // Nothing... } /** * Constructor * @param keepZeroEntries */ public ObjectHistogram(boolean keepZeroEntries) { this.keep_zero_entries = keepZeroEntries; } /** * Copy Constructor. * This is the same as calling putHistogram() * @param other */ public ObjectHistogram(ObjectHistogram<X> other) { assert(other != null); this.put(other); } @Override public boolean equals(Object obj) { if (obj instanceof ObjectHistogram<?>) { ObjectHistogram<?> other = (ObjectHistogram<?>)obj; return (this.histogram.equals(other.histogram)); } return (false); } @Override public ObjectHistogram<X> setKeepZeroEntries(boolean flag) { // When this option is disabled, we need to remove all of the zeroed entries if (!flag && this.keep_zero_entries) { synchronized (this) { Iterator<X> it = this.histogram.keySet().iterator(); int ctr = 0; while (it.hasNext()) { X key = it.next(); if (this.histogram.get(key) == 0) { it.remove(); ctr++; this.dirty = true; } } // WHILE if (ctr > 0) LOG.debug("Removed " + ctr + " zero entries from histogram"); } // SYNCHRONIZED } this.keep_zero_entries = flag; return (this); } @Override public boolean isZeroEntriesEnabled() { return this.keep_zero_entries; } /** * The main method that updates a value in the histogram with a given sample count * This should be called by one of the public interface methods that are synchronized * This method is not synchronized on purpose for performance * @param value * @param count * @return Return the new count for the given key */ private long _put(X value, long count) { // If we're giving a null value, then the count will always be zero if (value == null) return (0); // HACK: Try to infer the internal type if we don't have it already if (this.value_type == VoltType.INVALID) { try { this.value_type = VoltType.typeFromClass(value.getClass()); } catch (VoltTypeException ex) { this.value_type = VoltType.NULL; } } this.num_samples += count; // If we already have this value in our histogram, then add the new // count to its existing total Long existing = this.histogram.get(value); if (existing != null) { count += existing.longValue(); } // We can't have a negative value if (count < 0) { String msg = String.format("Invalid negative count for key '%s' [count=%d]", value, count); throw new IllegalArgumentException(msg); } // If the new count is zero, then completely remove it if we're not // allowed to have zero entries else if (count == 0 && this.keep_zero_entries == false) { this.histogram.remove(value); } // Otherwise throw it into our map else { this.histogram.put(value, Long.valueOf(count)); } // Mark ourselves as dirty so that we will always recompute // internal values (min/max) when they ask for them this.dirty = true; return (count); } /** * Recalculate the min/max count value sets * Since this is expensive, this should only be done whenever that information is needed */ @SuppressWarnings("unchecked") private synchronized void calculateInternalValues() { // Do this before we check before we're dirty if (this.min_count_values == null) this.min_count_values = new ArrayList<X>(); if (this.max_count_values == null) this.max_count_values = new ArrayList<X>(); if (this.dirty == false) return; // New Min/Max Counts // The reason we have to loop through and check every time is that our // value may be the current min/max count and thus it may or may not still // be after the count is changed this.max_count = 0; this.min_count = Long.MAX_VALUE; this.min_value = null; this.max_value = null; for (Entry<X, Long> e : this.histogram.entrySet()) { X value = e.getKey(); long cnt = e.getValue().longValue(); // Is this value the new min/max values? if (this.min_value == null || this.min_value.compareTo(value) > 0) { this.min_value = (Comparable<X>)value; } else if (this.max_value == null || this.max_value.compareTo(value) < 0) { this.max_value = (Comparable<X>)value; } if (cnt <= this.min_count) { if (cnt < this.min_count) this.min_count_values.clear(); this.min_count_values.add(value); this.min_count = cnt; } if (cnt >= this.max_count) { if (cnt > this.max_count) this.max_count_values.clear(); this.max_count_values.add(value); this.max_count = cnt; } } // FOR this.dirty = false; } /** * Get the number of samples entered into the histogram using the put methods * @return */ @Override public int getSampleCount() { return (this.num_samples); } /** * Get the number of unique values entered into the histogram * @return */ @Override public int getValueCount() { return (this.histogram.values().size()); } /** * Return the internal variable for what we "think" the type is for this * Histogram Use this at your own risk * * @return */ public VoltType getEstimatedType() { return (this.value_type); } @Override public Collection<X> values() { return (Collections.unmodifiableCollection(this.histogram.keySet())); } @Override public Collection<X> getValuesForCount(long count) { Set<X> ret = new HashSet<X>(); for (Entry<X, Long> e : this.histogram.entrySet()) { if (e.getValue().longValue() == count) ret.add(e.getKey()); } // FOR return (ret); } @Override public synchronized void clear() { this.histogram.clear(); this.num_samples = 0; this.min_count = 0; if (this.min_count_values != null) this.min_count_values.clear(); this.min_value = null; this.max_count = 0; if (this.max_count_values != null) this.max_count_values.clear(); this.max_value = null; assert(this.histogram.isEmpty()); this.dirty = true; } @Override public synchronized void clearValues() { if (this.keep_zero_entries) { Long zero = Long.valueOf(0); for (Entry<X, Long> e : this.histogram.entrySet()) { this.histogram.put(e.getKey(), zero); } // FOR this.num_samples = 0; this.min_count = 0; if (this.min_count_values != null) this.min_count_values.clear(); this.min_value = null; this.max_count = 0; if (this.max_count_values != null) this.max_count_values.clear(); this.max_value = null; } else { this.clear(); } this.dirty = true; } @Override public boolean isEmpty() { return (this.histogram.isEmpty()); } // ---------------------------------------------------------------------------- // PUT METHODS // ---------------------------------------------------------------------------- @Override public synchronized long put(X value, long delta) { return this._put(value, delta); } @Override public synchronized long put(X value) { return this._put(value, 1); } @Override public void putAll() { this.put(this.histogram.keySet(), 1); } @Override public void put(Collection<X> values) { this.put(values, 1); } @Override public synchronized void put(Collection<X> values, long count) { for (X v : values) { this._put(v, count); } // FOR } @Override public synchronized void put(Histogram<X> other) { if (other == this || other == null) return; if (other instanceof ObjectHistogram) { ObjectHistogram<X> objHistogram = (ObjectHistogram<X>)other; for (Entry<X, Long> e : objHistogram.histogram.entrySet()) { if (e.getValue().longValue() > 0) this._put(e.getKey(), e.getValue()); } // FOR } else { for (X value : other.values()) { this._put(value, other.get(value)); } // FOR } } // ---------------------------------------------------------------------------- // DECREMENT METHODS // ---------------------------------------------------------------------------- @Override public synchronized long dec(X value, long delta) { assert(this.histogram.containsKey(value)); return this._put(value, delta * -1); } @Override public synchronized long dec(X value) { return this._put(value, -1); } @Override public synchronized void dec(Collection<X> values) { this.dec(values, 1); } @Override public synchronized void dec(Collection<X> values, long delta) { for (X v : values) { this._put(v, -1 * delta); } // FOR } @Override public synchronized void dec(Histogram<X> other) { if (other instanceof ObjectHistogram) { ObjectHistogram<X> objHistogram = (ObjectHistogram<X>)other; for (Entry<X, Long> e : objHistogram.histogram.entrySet()) { if (e.getValue().longValue() > 0) { this._put(e.getKey(), -1 * e.getValue().longValue()); } } // FOR } else { for (X value : other.values()) { this._put(value, -1 * other.get(value)); } // FOR } } // ---------------------------------------------------------------------------- // MIN/MAX METHODS // ---------------------------------------------------------------------------- @SuppressWarnings("unchecked") @Override public X getMinValue() { this.calculateInternalValues(); return ((X)this.min_value); } @SuppressWarnings("unchecked") @Override public X getMaxValue() { this.calculateInternalValues(); return ((X)this.max_value); } @Override public long getMinCount() { this.calculateInternalValues(); return (this.min_count); } @Override public Collection<X> getMinCountValues() { this.calculateInternalValues(); return (this.min_count_values); } @Override public long getMaxCount() { this.calculateInternalValues(); return (this.max_count); } @Override public Collection<X> getMaxCountValues() { this.calculateInternalValues(); return (this.max_count_values); } // ---------------------------------------------------------------------------- // UTILITY METHODS // ---------------------------------------------------------------------------- @Override public synchronized long set(X value, long i) { Long orig = this.get(value); if (orig != null && orig != i) { i = (orig > i ? -1*(orig - i) : i - orig); } return this._put(value, i); } @Override public synchronized long remove(X value) { Long cnt = this.histogram.get(value); if (cnt != null && cnt.longValue() > 0) { return this._put(value, cnt * -1); } return 0l; } // ---------------------------------------------------------------------------- // GET METHODS // ---------------------------------------------------------------------------- /** * Returns the current count for the given value * If the value was never entered into the histogram, then the count will be null * @param value * @return */ public Long get(X value) { return (this.histogram.get(value)); } /** * Returns the current count for the given value. * If that value was nevered entered in the histogram, then the value returned will be value_if_null * @param value * @param value_if_null * @return */ public long get(X value, long value_if_null) { Long count = this.histogram.get(value); return (count == null ? value_if_null : count.longValue()); } /** * Returns true if this histogram contains the specified key. * @param value * @return */ public boolean contains(X value) { return (this.histogram.containsKey(value)); } // ---------------------------------------------------------------------------- // DEBUG METHODS // ---------------------------------------------------------------------------- @Override public Histogram<X> setDebugLabels(Map<?, String> names_map) { if (names_map == null) { this.debug_names = null; } else { if (this.debug_names == null) { synchronized (this) { if (this.debug_names == null) { this.debug_names = new HashMap<Object, String>(); } } // SYNCH } this.debug_names.putAll(names_map); } return (this); } @Override public boolean hasDebugLabels() { return (this.debug_names != null && this.debug_names.isEmpty() == false); } @Override public Map<Object, String> getDebugLabels() { return (this.debug_names); } @Override public String getDebugLabel(Object key) { return (this.debug_names.get(key)); } @Override public void enablePercentages() { this.debug_percentages = true; } @Override public boolean hasDebugPercentages() { return (this.debug_percentages); } @Override public String toString() { return HistogramUtil.toString(this); } @Override public String toString(int max_chars) { return HistogramUtil.toString(this, max_chars); } @Override public String toString(int max_chars, int max_len) { return HistogramUtil.toString(this, max_chars, max_len); } // ---------------------------------------------------------------------------- // SERIALIZATION METHODS // ---------------------------------------------------------------------------- public void load(File input_path) throws IOException { JSONUtil.load(this, null, input_path); } @Override public void load(File input_path, Database catalog_db) throws IOException { JSONUtil.load(this, catalog_db, input_path); } @Override public void save(File output_path) throws IOException { JSONUtil.save(this, output_path); } @Override public String toJSONString() { return (JSONUtil.toJSONString(this)); } @Override public void toJSON(JSONStringer stringer) throws JSONException { for (Members element : ObjectHistogram.Members.values()) { try { Field field = ObjectHistogram.class.getDeclaredField(element.toString().toLowerCase()); switch (element) { case HISTOGRAM: { if (this.histogram.isEmpty() == false) { stringer.key(element.name()).object(); synchronized (this) { for (Object value : this.histogram.keySet()) { stringer.key(value.toString()) .value(this.histogram.get(value)); } // FOR } // SYNCH stringer.endObject(); } break; } case KEEP_ZERO_ENTRIES: { if (this.keep_zero_entries) { stringer.key(element.name()) .value(this.keep_zero_entries); } break; } case VALUE_TYPE: { VoltType vtype = (VoltType)field.get(this); stringer.key(element.name()).value(vtype.name()); break; } default: stringer.key(element.name()) .value(field.get(this)); } // SWITCH } catch (Exception ex) { throw new RuntimeException("Failed to serialize '" + element + "'", ex); } } // FOR } @Override public void fromJSON(JSONObject object, Database catalog_db) throws JSONException { this.value_type = VoltType.typeFromString(object.get(Members.VALUE_TYPE.name()).toString()); assert (this.value_type != null); if (object.has(Members.KEEP_ZERO_ENTRIES.name())) { this.setKeepZeroEntries(object.getBoolean(Members.KEEP_ZERO_ENTRIES.name())); } // This code sucks ass... for (Members element : ObjectHistogram.Members.values()) { if (element == Members.VALUE_TYPE || element == Members.KEEP_ZERO_ENTRIES) continue; try { String field_name = element.toString().toLowerCase(); Field field = ObjectHistogram.class.getDeclaredField(field_name); if (element == Members.HISTOGRAM) { if (object.has(element.name()) == false) { continue; } JSONObject jsonObject = object.getJSONObject(element.name()); for (String key_name : CollectionUtil.iterable(jsonObject.keys())) { Object key_value = VoltTypeUtil.getObjectFromString(this.value_type, key_name); Long count = Long.valueOf(jsonObject.getLong(key_name)); @SuppressWarnings("unchecked") X x = (X)key_value; this.histogram.put(x, count); } // WHILE } else if (field_name.endsWith("_count_value")) { @SuppressWarnings("unchecked") Set<Object> set = (Set<Object>) field.get(this); JSONArray arr = object.getJSONArray(element.name()); for (int i = 0, cnt = arr.length(); i < cnt; i++) { Object val = VoltTypeUtil.getObjectFromString(this.value_type, arr.getString(i)); set.add(val); } // FOR } else if (field_name.endsWith("_value")) { if (object.isNull(element.name())) { field.set(this, null); } else { Object value = object.get(element.name()); field.set(this, VoltTypeUtil.getObjectFromString(this.value_type, value.toString())); } } else { field.set(this, object.getInt(element.name())); } } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } } // FOR this.dirty = true; this.calculateInternalValues(); } }