package edu.brown.statistics; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONStringer; import org.voltdb.catalog.Database; import edu.brown.utils.CollectionUtil; import edu.brown.utils.JSONUtil; /** * Fixed-size histogram that only stores integers that are greater than zero. * It uses a long array to maintain counts. It can automatically * grow as you add new values that are greater than the size of the internal array. * @author pavlo */ public class FastIntHistogram implements Histogram<Integer> { public enum Members { HISTOGRAM, DEBUG, } private static final int NULL_COUNT = -1; private static final int GROW_INCREMENT = 10; private long histogram[]; private int num_values = 0; private int num_samples = 0; private transient Map<Object, String> debug_names; private transient boolean debug_percentages = false; private boolean keep_zero_entries = false; // ---------------------------------------------------------------------------- // INITIALIZATION // ---------------------------------------------------------------------------- public FastIntHistogram(boolean keepZeroEntries) { this(keepZeroEntries, GROW_INCREMENT); // HACK } public FastIntHistogram() { this(false, GROW_INCREMENT); // HACK } public FastIntHistogram(int size) { this(false, size); } public FastIntHistogram(boolean keepZeroEntries, int size) { this.keep_zero_entries = keepZeroEntries; this.histogram = new long[size]; this.clearValues(); } /** * Copy Constructor * @param copy */ public FastIntHistogram(FastIntHistogram copy) { this(copy.keep_zero_entries, copy.histogram.length); this.debug_percentages = copy.debug_percentages; if (copy.hasDebugLabels()) this.setDebugLabels(copy.debug_names); this.put(copy); } // ---------------------------------------------------------------------------- // INTERNAL METHODS // ---------------------------------------------------------------------------- private void grow(int newSize) { assert(newSize >= this.histogram.length); long temp[] = new long[newSize + GROW_INCREMENT]; Arrays.fill(temp, this.histogram.length, temp.length, NULL_COUNT); System.arraycopy(this.histogram, 0, temp, 0, this.histogram.length); this.histogram = temp; } @Override public boolean equals(Object obj) { if (obj instanceof FastIntHistogram) { FastIntHistogram other = (FastIntHistogram) obj; if (this.histogram.length != other.histogram.length) return (false); if (this.num_values != other.num_values || this.num_samples != other.num_samples) return (false); for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != other.histogram[i]) return (false); } // FOR return (true); } return (false); } public void ensureSize(int size) { if (this.histogram.length < size) { this.grow(size); } } // ---------------------------------------------------------------------------- // FAST ACCESS METHODS // ---------------------------------------------------------------------------- /** * Return the internal capacity of this histogram. * You probably don't want this value and instead want getSampleCount() * @return */ protected int size() { return (this.histogram.length); } /** * Return an array of the values in this histogram * @return */ public int[] fastValues() { int num_values = 0; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != NULL_COUNT) num_values++; } int values[] = new int[num_values]; int idx = 0; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != NULL_COUNT) { values[idx++] = i; } } // FOR return (values); } // ---------------------------------------------------------------------------- // INTERNAL DATA CONTROL METHODS // ---------------------------------------------------------------------------- @Override public Histogram<Integer> setKeepZeroEntries(boolean flag) { this.keep_zero_entries = flag; return (this); } @Override public boolean isZeroEntriesEnabled() { return (this.keep_zero_entries); } @Override public int getSampleCount() { return (this.num_samples); } @Override public boolean isEmpty() { return (this.num_samples == 0); } // ---------------------------------------------------------------------------- // VALUE METHODS // ---------------------------------------------------------------------------- public long get(int value) { if (value >= this.histogram.length) { return (NULL_COUNT); } return (this.histogram[value] != NULL_COUNT ? this.histogram[value] : NULL_COUNT); } public long get(int idx, long value_if_null) { if (idx >= this.histogram.length) { return (value_if_null); } else if (this.histogram[idx] == NULL_COUNT) { return (value_if_null); } else { return (this.histogram[idx]); } } @Override public Long get(Integer value) { return Long.valueOf(this.get(value.intValue())); } @Override public long get(Integer value, long value_if_null) { return this.get(value.intValue(), value_if_null); } @Override public int getValueCount() { return this.num_values; } @Override public Collection<Integer> values() { List<Integer> values = new ArrayList<Integer>(); for (int idx : this.fastValues()) { values.add(idx); } // FOR return (values); } @Override public Collection<Integer> getValuesForCount(long count) { List<Integer> values = new ArrayList<Integer>(); for (int idx : this.fastValues()) { if (this.get(idx) == count) { values.add(idx); } } // FOR return (values); } // ---------------------------------------------------------------------------- // PUT METHODS // ---------------------------------------------------------------------------- public long put(int value) { return this.put(value, 1); } public long put(int value, long delta) { if (value >= this.histogram.length) { this.grow(value); } if (this.histogram[value] == NULL_COUNT) { this.histogram[value] = delta; this.num_values++; } else { this.histogram[value] += delta; } this.num_samples += delta; if (this.histogram[value] == 0 && this.keep_zero_entries == false) { this.histogram[value] = NULL_COUNT; this.num_values--; } return (this.histogram[value]); } public void put(FastIntHistogram fast) { if (fast.histogram.length >= this.histogram.length) { this.grow(fast.histogram.length); } for (int value = 0; value < fast.histogram.length; value++) { if (fast.histogram[value] != NULL_COUNT) { if (this.histogram[value] == NULL_COUNT) { this.histogram[value] = fast.histogram[value]; this.num_values++; } else { this.histogram[value] += fast.histogram[value]; } this.num_samples += fast.histogram[value]; } } // FOR } @Override public long put(Integer value) { return this.put(value.intValue()); } @Override public long put(Integer value, long delta) { return this.put(value.intValue(), delta); } @Override public void put(Collection<Integer> values) { for (Integer v : values) this.put(v.intValue()); } public void put(int values[]) { for (int idx : values) { this.put(idx, 1); } // FOR } @Override public void put(Collection<Integer> values, long delta) { for (Integer value : values) this.put(value.intValue(), delta); } public void put(int values[], long delta) { for (int value : values) { this.put(value, delta); } // FOR } @Override public void put(Histogram<Integer> other) { if (other instanceof FastIntHistogram) { this.put((FastIntHistogram)other); } else { for (Integer value : other.values()) { Long cnt = other.get(value); if (cnt != null) { this.put(value.intValue(), cnt.longValue()); } } // FOR } } @Override public void putAll() { for (int value = 0; value < this.histogram.length; value++) { if (this.histogram[value] != NULL_COUNT) { this.histogram[value]++; this.num_samples++; } } // FOR } // ---------------------------------------------------------------------------- // DECREMENT METHODS // ---------------------------------------------------------------------------- public long dec(int idx) { return this.dec(idx, 1); } public long dec(int idx, long count) { if (this.histogram[idx] == NULL_COUNT || this.histogram.length <= idx) { throw new IllegalArgumentException("No value exists for " + idx); } else if (this.histogram[idx] < count) { throw new IllegalArgumentException("Count for " + idx + " cannot be negative"); } this.histogram[idx] -= count; if (this.histogram[idx] == 0 && this.keep_zero_entries == false) { this.histogram[idx] = NULL_COUNT; this.num_values--; } this.num_samples -= count; return (this.histogram[idx]); } @Override public long dec(Integer value) { return this.dec(value, 1); } @Override public long dec(Integer value, long count) { return this.dec(value.intValue(), count); } @Override public void dec(Collection<Integer> values) { this.dec(values, 1); } @Override public void dec(Collection<Integer> values, long delta) { this.dec(values, delta); } public void dec(Histogram<Integer> other) { if (other instanceof FastIntHistogram) { this.dec((FastIntHistogram)other); } else { for (Integer v : other.values()) { long cnt = other.get(v, NULL_COUNT); if (cnt != NULL_COUNT) this.dec(v.intValue(), cnt); } // FOR } } public void dec(FastIntHistogram fast) { for (int i = 0; i < fast.histogram.length; i++) { if (fast.histogram[i] != NULL_COUNT && this.histogram[i] != NULL_COUNT) { if (this.histogram[i] <= fast.histogram[i]) { this.num_samples -= this.histogram[i]; if (this.keep_zero_entries) { this.histogram[i] = 0; } else { this.histogram[i] = NULL_COUNT; this.num_values--; } } else { this.num_samples -= fast.histogram[i]; this.histogram[i] -= fast.histogram[i]; } } } // FOR } // ---------------------------------------------------------------------------- // CLEAR METHODS // ---------------------------------------------------------------------------- @Override public void clear() { Arrays.fill(this.histogram, NULL_COUNT); this.num_values = 0; this.num_samples = 0; } @Override public void clearValues() { if (this.keep_zero_entries) { for (int i = 0; i < this.histogram.length; i++) { this.histogram[i] = 0; } // FOR } else { this.clear(); } this.num_samples = 0; } @Override public long remove(Integer value) { return this.remove(value.intValue()); } public long remove(int value) { if (value < this.histogram.length) { this.histogram[value] = NULL_COUNT; } return (0); } // ---------------------------------------------------------------------------- // MIN/MAX METHODS // ---------------------------------------------------------------------------- @Override public Integer getMinValue() { for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != -1) return (i); } // FOR return (null); } @Override public long getMinCount() { long min_cnt = Integer.MAX_VALUE; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != -1 && this.histogram[i] < min_cnt) { min_cnt = this.histogram[i]; } } // FOR return (min_cnt); } @Override public Collection<Integer> getMinCountValues() { List<Integer> min_values = new ArrayList<Integer>(); long min_cnt = Integer.MAX_VALUE; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != NULL_COUNT) { if (this.histogram[i] == min_cnt) { min_values.add(i); } else if (this.histogram[i] < min_cnt) { min_values.clear(); min_values.add(i); min_cnt = this.histogram[i]; } } } // FOR return (min_values); } @Override public Integer getMaxValue() { for (int i = this.histogram.length - 1; i >= 0; i--) { if (this.histogram[i] != NULL_COUNT) return (i); } // FOR return (null); } @Override public long getMaxCount() { long max_cnt = 0; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != -1 && this.histogram[i] > max_cnt) { max_cnt = this.histogram[i]; } } // FOR return (max_cnt); } @Override public Collection<Integer> getMaxCountValues() { List<Integer> max_values = new ArrayList<Integer>(); long max_cnt = 0; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != -1) { if (this.histogram[i] == max_cnt) { max_values.add(i); } else if (this.histogram[i] > max_cnt) { max_values.clear(); max_values.add(i); max_cnt = this.histogram[i]; } } } // FOR return (max_values); } // ---------------------------------------------------------------------------- // UTILITY METHODS // ---------------------------------------------------------------------------- @Override public long set(Integer value, long i) { return (this.histogram[value.intValue()] = i); } public boolean contains(int idx) { return (this.histogram[idx] != NULL_COUNT); } @Override public boolean contains(Integer value) { return (this.histogram[value.intValue()] != NULL_COUNT); } // ---------------------------------------------------------------------------- // DEBUG METHODS // ---------------------------------------------------------------------------- @Override public String toString() { return HistogramUtil.toString(this); } @Override public String toString(int max_chars) { return HistogramUtil.toString(this, max_chars); } @Override public String toString(int max_chars, int max_len) { return HistogramUtil.toString(this, max_chars, max_len); } @Override public Histogram<Integer> setDebugLabels(Map<?, String> names_map) { if (names_map == null) { this.debug_names = null; } else { if (this.debug_names == null) { synchronized (this) { if (this.debug_names == null) { this.debug_names = new HashMap<Object, String>(); } } // SYNCH } this.debug_names.putAll(names_map); } return (this); } @Override public boolean hasDebugLabels() { return (this.debug_names != null && this.debug_names.isEmpty() == false); } @Override public Map<Object, String> getDebugLabels() { return (this.debug_names); } @Override public String getDebugLabel(Object key) { return (this.debug_names.get(key)); } @Override public void enablePercentages() { this.debug_percentages = true; } @Override public boolean hasDebugPercentages() { return (this.debug_percentages); } // ---------------------------------------------------------------------------- // SERIALIZATION METHODS // ---------------------------------------------------------------------------- public void load(File input_path) throws IOException { JSONUtil.load(this, null, input_path); } @Override public void load(File input_path, Database catalog_db) throws IOException { JSONUtil.load(this, catalog_db, input_path); } @Override public void save(File output_path) throws IOException { JSONUtil.save(this, output_path); } @Override public String toJSONString() { return (JSONUtil.toJSONString(this)); } @Override public void toJSON(JSONStringer stringer) throws JSONException { // Go through once and find the greatest position where // there are no more non-null values int maxSize = 0; for (int i = 0; i < this.histogram.length; i++) { if (this.histogram[i] != NULL_COUNT) { maxSize = i; } } // FOR stringer.key(Members.HISTOGRAM.name()).array(); for (int i = 0; i <= maxSize; i++) { stringer.value(this.histogram[i]); } // FOR stringer.endArray(); if (this.debug_names != null && this.debug_names.isEmpty() == false) { stringer.key(Members.DEBUG.name()).object(); for (Entry<Object, String> e : this.debug_names.entrySet()) { stringer.key(e.getKey().toString()) .value(e.getValue().toString()); } // FOR stringer.endObject(); } } @Override public void fromJSON(JSONObject object, Database catalog_db) throws JSONException { JSONArray jsonArr = object.getJSONArray(Members.HISTOGRAM.name()); this.histogram = new long[jsonArr.length()]; this.clear(); for (int i = 0; i < this.histogram.length; i++) { long delta = jsonArr.getLong(i); if (delta != NULL_COUNT) this.put(i, delta); } // FOR if (object.has(Members.DEBUG.name())) { if (this.debug_names == null) { this.debug_names = new TreeMap<Object, String>(); } else { this.debug_names.clear(); } JSONObject jsonObj = object.getJSONObject(Members.DEBUG.name()); for (String key : CollectionUtil.iterable(jsonObj.keys())) { String label = jsonObj.getString(key); this.debug_names.put(Integer.valueOf(key), label); } } } }