/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.plot;
import smile.math.Math;
/**
* A histogram is a graphical display of tabulated frequencies, shown as bars.
* It shows what proportion of cases fall into each of several categories:
* it is a form of data binning. The categories are usually specified as
* non-overlapping intervals of some variable. The categories (bars) must
* be adjacent. The intervals (or bands, or bins) are generally of the same
* size, and are most easily interpreted if they are.
*
* @author Haifeng Li
*/
public class Histogram extends BarPlot {
/**
* If true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
private boolean prob;
/**
* Constructor. The number of bins will be determined by square-root rule
* and the y-axis will be in the probability scale.
* @param data a sample set.
*/
public Histogram(int[] data) {
this(data, true);
}
/**
* Constructor. The number of bins will be determined by square-root rule.
* @param data a sample set.
* @param prob if true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
public Histogram(int[] data, boolean prob) {
this(data, smile.math.Histogram.bins(data.length), prob);
}
/**
* Constructor. The number of bins will be determined by square-root rule
* and the y-axis will be in the probability scale.
* @param data a sample set.
* @param k the number of bins.
*/
public Histogram(int[] data, int k) {
this(data, k, true);
}
/**
* Constructor.
* @param data a sample set.
* @param k the number of bins.
* @param prob if true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
public Histogram(int[] data, int k, boolean prob) {
super(histogram(data, k, prob));
this.prob = prob;
}
/**
* Constructor. The number of bins will be determined by square-root rule
* and the y-axis will be in the probability scale.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public Histogram(int[] data, double[] breaks) {
this(data, breaks, true);
}
/**
* Constructor.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
* @param prob if true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
public Histogram(int[] data, double[] breaks, boolean prob) {
super(histogram(data, breaks, prob));
this.prob = prob;
}
/**
* Constructor. The number of bins will be determined by square-root rule
* and the y-axis will be in the probability scale.
* @param data a sample set.
*/
public Histogram(double[] data) {
this(data, true);
}
/**
* Constructor. The number of bins will be determined by square-root rule.
* @param data a sample set.
* @param prob if true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
public Histogram(double[] data, boolean prob) {
this(data, smile.math.Histogram.bins(data.length), prob);
}
/**
* Constructor. The y-axis will be in the probability scale.
* @param data a sample set.
* @param k the number of bins.
*/
public Histogram(double[] data, int k) {
this(data, k, true);
}
/**
* Constructor.
* @param data a sample set.
* @param k the number of bins.
* @param prob if true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
public Histogram(double[] data, int k, boolean prob) {
super(histogram(data, k, prob));
this.prob = prob;
}
/**
* Constructor. The y-axis will be in the probability scale.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public Histogram(double[] data, double[] breaks) {
this(data, breaks, true);
}
/**
* Constructor.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
* @param prob if true, the y-axis will be in the probability scale.
* Otherwise, y-axis will be in the frequency scale.
*/
public Histogram(double[] data, double[] breaks, boolean prob) {
super(histogram(data, breaks, prob));
this.prob = prob;
}
/**
* Returns the bin centers and frequencies/probabilities.
* @return a n x 2 array, where n is the number of bins. a[][0] are the
* centers of bins and a[][1] are frequencies or probabilities.
*/
public double[][] getHistogram() {
return data;
}
/**
* Returns the number of bins in the histogram.
* @return the number of bins.
*/
public int getNumBins() {
return data.length;
}
@Override
public String getToolTip(double[] coord) {
for (int i = 0; i < data.length; i++) {
if (coord[0] < rightBottom[i][0] && coord[0] > leftBottom[i][0] && coord[1] < rightTop[i][1] && coord[1] > rightBottom[i][1]) {
double lower = leftBottom[i][0];
double upper = rightBottom[i][0];
int precision = (int) Math.round(Math.log10(Math.abs(upper - lower)));
if (precision > 0) {
precision = 0;
} else {
precision = -precision + 1;
}
String format = String.format(" in [%%.%df, %%.%df]", precision, precision);
if (prob) {
return String.format("%.1f%%" + format, 100.0 * data[i][1], lower, upper);
} else {
return String.format("%d" + format, (int) data[i][1], lower, upper);
}
}
}
return null;
}
/**
* Generate the histogram of k bins.
*
* @param k the number of bins.
*/
private static double[][] histogram(int[] data, int k, boolean prob) {
double[][] hist = smile.math.Histogram.histogram(data, k);
// The number of bins may be extended to cover all data.
k = hist[0].length;
double[][] freq = new double[k][2];
for (int i = 0; i < k; i++) {
freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
freq[i][1] = hist[2][i];
}
if (prob) {
double n = data.length;
for (int i = 0; i < k; i++) {
freq[i][1] /= n;
}
}
return freq;
}
/**
* Generate the histogram of k bins.
*
* @param k the number of bins.
*/
private static double[][] histogram(double[] data, int k, boolean prob) {
double[][] hist = smile.math.Histogram.histogram(data, k);
// The number of bins may be extended to cover all data.
k = hist[0].length;
double[][] freq = new double[k][2];
for (int i = 0; i < k; i++) {
freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
freq[i][1] = hist[2][i];
}
if (prob) {
double n = data.length;
for (int i = 0; i < k; i++) {
freq[i][1] /= n;
}
}
return freq;
}
/**
* Generate the histogram of k bins.
*
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
private static double[][] histogram(int[] data, double[] breaks, boolean prob) {
int k = breaks.length - 1;
if (k <= 1) {
throw new IllegalArgumentException("Invalid number of bins: " + k);
}
double[][] hist = smile.math.Histogram.histogram(data, breaks);
double[][] freq = new double[k][2];
for (int i = 0; i < k; i++) {
freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
freq[i][1] = hist[2][i];
}
if (prob) {
double n = data.length;
for (int i = 0; i < k; i++) {
freq[i][1] /= n;
}
}
return freq;
}
/**
* Generate the histogram of k bins.
*
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
private static double[][] histogram(double[] data, double[] breaks, boolean prob) {
int k = breaks.length - 1;
if (k <= 1) {
throw new IllegalArgumentException("Invalid number of bins: " + k);
}
double[][] hist = smile.math.Histogram.histogram(data, breaks);
double[][] freq = new double[k][2];
for (int i = 0; i < k; i++) {
freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
freq[i][1] = hist[2][i];
}
if (prob) {
double n = data.length;
for (int i = 0; i < k; i++) {
freq[i][1] /= n;
}
}
return freq;
}
/**
* Create a plot canvas with the histogram plot.
* @param data a sample set.
*/
public static PlotCanvas plot(double[] data) {
return plot((String) null, data);
}
/**
* Create a plot canvas with the histogram plot.
* @param id the id of the plot.
* @param data a sample set.
*/
public static PlotCanvas plot(String id, double[] data) {
Histogram histogram = new Histogram(data);
histogram.setID(id);
double[] lowerBound = {Math.min(data), 0};
double[] upperBound = {Math.max(data), 0};
double[][] freq = histogram.getHistogram();
for (int i = 0; i < freq.length; i++) {
if (freq[i][1] > upperBound[1]) {
upperBound[1] = freq[i][1];
}
}
PlotCanvas canvas = new PlotCanvas(lowerBound, upperBound);
canvas.getAxis(0).setGridVisible(false);
canvas.add(histogram);
return canvas;
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param data a sample set.
* @param k the number of bins.
*/
public static PlotCanvas plot(double[] data, int k) {
return plot(null, data, k);
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param id the id of the plot.
* @param data a sample set.
* @param k the number of bins.
*/
public static PlotCanvas plot(String id, double[] data, int k) {
Histogram histogram = new Histogram(data, k);
histogram.setID(id);
double[] lowerBound = {Math.min(data), 0};
double[] upperBound = {Math.max(data), 0};
double[][] freq = histogram.getHistogram();
for (int i = 0; i < freq.length; i++) {
if (freq[i][1] > upperBound[1]) {
upperBound[1] = freq[i][1];
}
}
PlotCanvas canvas = new PlotCanvas(lowerBound, upperBound);
canvas.getAxis(0).setGridVisible(false);
canvas.add(histogram);
return canvas;
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public static PlotCanvas plot(double[] data, double[] breaks) {
return plot(null, data, breaks);
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param id the id of the plot.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public static PlotCanvas plot(String id, double[] data, double[] breaks) {
return plot(id, data, breaks, true);
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param id the id of the plot.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public static PlotCanvas plot(String id, double[] data, double[] breaks, boolean prob) {
Histogram histogram = new Histogram(data, breaks, prob);
histogram.setID(id);
double[] lowerBound = {Math.min(data), 0};
double[] upperBound = {Math.max(data), 0};
double[][] freq = histogram.getHistogram();
for (int i = 0; i < freq.length; i++) {
if (freq[i][1] > upperBound[1]) {
upperBound[1] = freq[i][1];
}
}
PlotCanvas canvas = new PlotCanvas(lowerBound, upperBound);
canvas.getAxis(0).setGridVisible(false);
canvas.add(histogram);
return canvas;
}
/**
* Create a plot canvas with the histogram plot.
* @param data a sample set.
*/
public static PlotCanvas plot(int[] data) {
return plot((String) null, data);
}
/**
* Create a plot canvas with the histogram plot.
* @param id the id of the plot.
* @param data a sample set.
*/
public static PlotCanvas plot(String id, int[] data) {
Histogram histogram = new Histogram(data);
histogram.setID(id);
double[] lowerBound = {Math.min(data) - 0.5, 0};
double[] upperBound = {Math.max(data) + 0.5, 0};
double[][] freq = histogram.getHistogram();
for (int i = 0; i < freq.length; i++) {
if (freq[i][1] > upperBound[1]) {
upperBound[1] = freq[i][1];
}
}
PlotCanvas canvas = new PlotCanvas(lowerBound, upperBound);
canvas.getAxis(0).setGridVisible(false);
canvas.add(histogram);
return canvas;
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param data a sample set.
* @param k the number of bins.
*/
public static PlotCanvas plot(int[] data, int k) {
return plot(null, data, k);
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param id the id of the plot.
* @param data a sample set.
* @param k the number of bins.
*/
public static PlotCanvas plot(String id, int[] data, int k) {
Histogram histogram = new Histogram(data, k);
histogram.setID(id);
double[] lowerBound = {Math.min(data) - 0.5, 0};
double[] upperBound = {Math.max(data) + 0.5, 0};
double[][] freq = histogram.getHistogram();
for (int i = 0; i < freq.length; i++) {
if (freq[i][1] > upperBound[1]) {
upperBound[1] = freq[i][1];
}
}
PlotCanvas canvas = new PlotCanvas(lowerBound, upperBound);
canvas.getAxis(0).setGridVisible(false);
canvas.add(histogram);
return canvas;
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public static PlotCanvas plot(int[] data, double[] breaks) {
return plot(null, data, breaks);
}
/**
* Create a plot canvas with the histogram plot of given data.
* @param id the id of the plot.
* @param data a sample set.
* @param breaks an array of size k+1 giving the breakpoints between
* histogram cells. Must be in ascending order.
*/
public static PlotCanvas plot(String id, int[] data, double[] breaks) {
Histogram histogram = new Histogram(data, breaks);
histogram.setID(id);
double[] lowerBound = {Math.min(data) - 0.5, 0};
double[] upperBound = {Math.max(data) + 0.5, 0};
double[][] freq = histogram.getHistogram();
for (int i = 0; i < freq.length; i++) {
if (freq[i][1] > upperBound[1]) {
upperBound[1] = freq[i][1];
}
}
PlotCanvas canvas = new PlotCanvas(lowerBound, upperBound);
canvas.getAxis(0).setGridVisible(false);
canvas.add(histogram);
return canvas;
}
}