/** * Copyright 2000-2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.util.math; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.text.DecimalFormat; public class Histogram { // private data used internally by this class. private double[] m_hist; private double[] m_data; private double[] m_binCenters; private String m_name; private double m_min; private double m_max; private int m_nbins; private int m_entries; private double m_overflow; private double m_underflow; private boolean m_debug; private double m_bandwidth; public Histogram(double[] data) { double min = MathUtils.min(data); double max = MathUtils.max(data); setHistogram(data, 15, min, max); } /** * A simple constructor * * @param data * data * @param nbins * nbins */ public Histogram(double[] data, int nbins) { double min = MathUtils.min(data); double max = MathUtils.max(data); setHistogram(data, nbins, min, max); } /** * Constructor which sets name, number of bins, and range. * * @param data * samples * @param nbins * the number of bins the histogram should have. The range specified by min and max will be divided up into this * many bins. * @param min * the minimum of the range covered by the histogram bins * @param max * the maximum value of the range covered by the histogram bins */ public Histogram(double[] data, int nbins, double min, double max) { setHistogram(data, nbins, min, max); } /** * Settings to Histogram * * @param data * data * @param nbins * nbins * @param min * min * @param max * max */ public void setHistogram(double[] data, int nbins, double min, double max) { m_nbins = nbins; m_min = min; m_max = max; m_data = data; m_hist = new double[m_nbins]; m_binCenters = new double[m_nbins]; m_underflow = 0; m_overflow = 0; setBandWidth(); for (double x : m_data) { fill(x); } m_binCenters = this.setSampleArray(); } public void changeSettings(int nbins) { m_nbins = nbins; m_binCenters = new double[m_nbins]; m_hist = new double[m_nbins]; setBandWidth(); for (double x : m_data) { fill(x); } m_binCenters = this.setSampleArray(); } /** * Enter data into the histogram. The fill method takes the given value, works out which bin this corresponds to, and * increments this bin by one. * * @param x * is the value to add in to the histogram */ public void fill(double x) { // use findBin method to work out which bin x falls in BinInfo bin = findBin(x); // check the result of findBin in case it was an overflow or underflow if (bin.isUnderflow) { m_underflow++; } if (bin.isOverflow) { m_overflow++; } if (bin.isInRange) { m_hist[bin.index]++; } // print out some debug information if the flag is set if (m_debug) { System.out.println("debug: fill: value " + x + " # underflows " + m_underflow + " # overflows " + m_overflow + " bin index " + bin.index); } // count the number of entries made by the fill method m_entries++; } /** * Private class used internally to store info about which bin of the histogram to use for a number to be filled. */ private class BinInfo { public int index; public boolean isUnderflow; public boolean isOverflow; public boolean isInRange; } /** * Private internal utility method to figure out which bin of the histogram a number falls in. * * @return info on which bin x falls in. */ private BinInfo findBin(double x) { BinInfo bin = new BinInfo(); bin.isInRange = false; bin.isUnderflow = false; bin.isOverflow = false; // first check if x is outside the range of the normal histogram bins if (x < m_min) { bin.isUnderflow = true; } else if (x > m_max) { bin.isOverflow = true; } else { // search for histogram bin into which x falls double binWidth = this.getBandWidth(); // (m_max - m_min)/m_nbins; for (int i = 0; i < m_nbins; i++) { double highEdge = m_min + (i + 1) * binWidth; if (x <= highEdge) { bin.isInRange = true; bin.index = i; break; } } } return bin; } public void setBandWidth() { m_bandwidth = (m_max - m_min) / m_nbins; } public double getBandWidth() { return m_bandwidth; } /** * Save the histogram data to a file. The file format is very simple, human-readable text so it can be imported into Excel or * cut and pasted into other applications. * * @param fileName * name of the file to write the histogram to. Note this must be valid for your operating system, e.g. a unix * filename might not work under windows * @exception IOException * if file cannot be opened or written to. */ public void writeToFile(String fileName) throws IOException { PrintWriter outfile = new PrintWriter(new FileOutputStream(fileName)); outfile.println("// Output from Histogram class"); outfile.println("// metaData: "); // outfile.println("name \"" + m_name + "\""); outfile.println("bins " + m_nbins); outfile.println("min " + m_min); outfile.println("max " + m_max); outfile.println("totalEntries " + m_entries); outfile.println("underflow " + m_underflow); outfile.println("overflow " + m_overflow); outfile.println("// binData:"); for (int i = 0; i < m_nbins; i++) { outfile.println(i + " " + m_binCenters[i] + " " + m_hist[i]); } outfile.println("// end."); outfile.close(); } /** * Print the histogram data to the console. Output is only basic, intended for debugging purposes. A good example of formatted * output. */ public void show() { DecimalFormat df = new DecimalFormat(" ##0.00;-##0.00"); double binWidth = (m_max - m_min) / m_nbins; // System.out.println ("Histogram \"" + m_name + // "\", " + m_entries + " entries"); System.out.println(" bin range height"); for (int i = 0; i < m_nbins; i++) { double binLowEdge = m_min + i * binWidth; double binHighEdge = binLowEdge + binWidth; System.out.println(df.format(binLowEdge) + " to " + df.format(binHighEdge) + " " + df.format(m_hist[i])); } } public double[] setSampleArray() { double[] binCenters = new double[m_nbins]; double binWidth = (m_max - m_min) / m_nbins; for (int i = 0; i < m_nbins; i++) { double binLowEdge = m_min + i * binWidth; double binHighEdge = binLowEdge + binWidth; binCenters[i] = (binLowEdge + binHighEdge) / 2.0; } return binCenters; } /** * Get number of entries in the histogram. This should correspond to the number of times the fill method has been used. * * @return number of entries */ public int entries() { return m_entries; } /** * Get the name of the histogram. The name is an arbitrary label for the user, and is set by the constructor. * * @return histogram name */ public String name() { return m_name; } /** * Get the number of bins in the histogram. The range of the histogram defined by min and max is divided into this many bins. * * @return number of bins */ public int numberOfBins() { return m_nbins; } /** * Get lower end of histogram range * * @return minimum x value covered by histogram */ public double min() { return m_min; } public double mean() { return MathUtils.mean(m_data); } public double variance() { return MathUtils.variance(m_data); } public double stdDev() { return MathUtils.standardDeviation(m_data); } /** * Get upper end of histogram range * * @return maximum x value covered by histogram */ public double max() { return m_max; } /** * Get the height of the overflow bin. Any value passed to the fill method which falls above the range of the histogram will * be counted in the overflow bin. * * @return number of overflows */ public double overflow() { return m_overflow; } /** * Get the height of the underflow bin. Any value passed to the fill method which falls below the range of the histogram will * be counted in the underflow bin. * * @return number of underflows */ public double underflow() { return m_underflow; } /** * This method gives you the bin contents in the form of an array. It might be useful for example if you want to use the * histogram in some other way, for example to pass to a plotting package. * * @return array of bin heights */ public double[] getHistArray() { return m_hist; } public double[] getSampleArray() { return m_binCenters; } public double[] getDataArray() { return m_data; } /** * Set debug flag. * * @param flag * debug flag (true or false) */ public void setDebug(boolean flag) { m_debug = flag; } /** * Get debug flag. * * @return value of debug flag (true or false) */ public boolean getDebug() { return m_debug; } }