/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.util; import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; /** * This is a basic histogram class. */ public class Histogram<T> { Map<T, Integer> hist = new HashMap<T, Integer>(); public void increment(T val) { if (hist.containsKey(val)) { int i = hist.get(val); i++; hist.put(val, i); } else { hist.put(val, 1); } } /** * Create a histogram slot bin for value val, and give it a count of 0. */ public void reset(T val) { hist.put(val, 0); } public void incrementN(T val, int j) { assert (j > 0); if (hist.containsKey(val)) { int i = hist.get(val); i += j; hist.put(val, i); } else { hist.put(val, j); } } /** * Merge values,counts from h2 into this histogram */ public void merge(Histogram<T> h2) { if (h2 == null) return; for (T k : h2.keys()) { incrementN(k, h2.get(k)); } } public int get(T val) { assert (val != null); if (hist.containsKey(val)) { return hist.get(val); } return 0; } public Set<T> keys() { return hist.keySet(); } /** * Returns the histogram in frequency sorted order * * @return */ public SortedSet<Pair<T, Integer>> sorted() { // TODO (jon) change to use an array and Arrays.sort(); SortedSet<Pair<T, Integer>> sort = new TreeSet<Pair<T, Integer>>( new Comparator<Pair<T, Integer>>() { @SuppressWarnings("unchecked") @Override public int compare(Pair<T, Integer> o1, Pair<T, Integer> o2) { int delta = o2.getRight() - o1.getRight(); if (delta != 0) return delta; // TODO (jon) this is gross -- Move this comparable code into a // static class in Pair. if (o1 instanceof Comparable && o2 instanceof Comparable) { Comparable c1 = ((Comparable) o1.getLeft()); Comparable c2 = ((Comparable) o2.getLeft()); return c1.compareTo(c2); } else { // this is a performance killer for VBytes return o1.getLeft().toString().compareTo(o2.getLeft().toString()); } } }); for (Map.Entry<T, Integer> ent : hist.entrySet()) { sort.add(new Pair<T, Integer>(ent.getKey(), ent.getValue())); } return sort; } /** * Return the number of keys present in the histogram */ public int size() { return hist.size(); } /** * Return the total sum of counts present in the histogram */ public int total() { int hits1 = 0; for (int v : hist.values()) { hits1 += v; } return hits1; } @Override public String toString() { StringBuilder b = new StringBuilder(); for (T key : hist.keySet()) { b.append(String.format("%6d :: %s,\n", hist.get(key), key)); } return b.toString(); } }