/*
* Copyright (c) 2012 Diamond Light Source Ltd.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package uk.ac.diamond.scisoft.analysis.dataset.function;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.eclipse.dawnsci.analysis.dataset.impl.function.DatasetToDatasetFunction;
import org.eclipse.january.dataset.Comparisons;
import org.eclipse.january.dataset.Dataset;
import org.eclipse.january.dataset.DatasetFactory;
import org.eclipse.january.dataset.DatasetUtils;
import org.eclipse.january.dataset.DoubleDataset;
import org.eclipse.january.dataset.IDataset;
import org.eclipse.january.dataset.IndexIterator;
import org.eclipse.january.dataset.IntegerDataset;
import org.eclipse.january.dataset.Maths;
/**
* Find histogram of each dataset and return pairs of 1D integer dataset of bin counts
* and 1D double dataset of bin edges (including rightmost edge).
* <p>
* By default, outliers are ignored.
*/
public class Histogram implements DatasetToDatasetFunction {
private int nbins;
private boolean ignoreOutliers = true;
private Double min = null;
private Double max = null;
private DoubleDataset bins = null;
private boolean useEqualSpanBins = true;
/**
* Constructor of the Histogram
* @param numBins number of bins
*/
public Histogram(int numBins)
{
nbins = numBins;
ignoreOutliers = true;
}
/**
* Constructor of the Histogram
* @param numBins number of bins
* @param lower minimum value of histogram range
* @param upper maximum value of histogram range
*/
public Histogram(int numBins, double lower, double upper)
{
this(numBins);
min = lower;
max = upper;
if (min > max) {
throw new IllegalArgumentException("Given lower bound was higher than upper bound");
}
bins = (DoubleDataset) DatasetFactory.createLinearSpace(min, max, nbins + 1, Dataset.FLOAT64);
}
/**
* Constructor of the Histogram
* @param numBins number of bins
* @param lower minimum value of histogram range
* @param upper maximum value of histogram range
* @param ignore if true, outliers will be ignored
*/
public Histogram(int numBins, double lower, double upper, boolean ignore)
{
this(numBins, lower, upper);
ignoreOutliers = ignore;
}
/**
* Constructor of the Histogram, ignoring outliers
* @param edges bin edges including rightmost edge
*/
public Histogram(IDataset edges) {
this(edges, true);
}
/**
* Constructor of the Histogram
* @param edges bin edges including rightmost edge
* @param ignore if true, outliers will be ignored
*/
public Histogram(IDataset edges, boolean ignore)
{
if (edges.getRank() != 1) {
throw new IllegalArgumentException("Bin edges should be given as 1D dataset");
}
bins = (DoubleDataset) DatasetUtils.cast(DatasetUtils.convertToDataset(edges), Dataset.FLOAT64);
// check for increasing order
Dataset sorted = DatasetUtils.sort(bins);
if (!Comparisons.allTrue(Comparisons.almostEqualTo(bins, sorted, 1e-8, 1e-8))) {
throw new IllegalArgumentException("Bin edges should be given in increasing order");
}
// check for equal spans
Dataset diff = Maths.difference(bins, 2, 0);
useEqualSpanBins = Comparisons.allTrue(Comparisons.almostEqualTo(diff, 0, 1e-8, 1e-8));
nbins = edges.getSize() - 1;
ignoreOutliers = ignore;
}
/**
* @param datasets input datasets
* @return a list of 1D datasets which are histograms and bins
*/
@Override
public List<Dataset> value(IDataset... datasets) {
if (datasets.length == 0)
return null;
List<Dataset> result = new ArrayList<Dataset>();
if (useEqualSpanBins) {
for (IDataset ds : datasets) {
if (bins == null) {
bins = (DoubleDataset) DatasetFactory.createLinearSpace(ds.min().doubleValue(), ds.max().doubleValue(), nbins + 1, Dataset.FLOAT64);
}
final double[] edges = bins.getData();
final double lo = edges[0];
final double hi = edges[nbins];
final double span = (hi - lo)/nbins;
IntegerDataset histo = DatasetFactory.zeros(IntegerDataset.class, nbins);
final int[] h = histo.getData();
if (span <= 0) {
h[0] = ds.getSize();
result.add(histo);
result.add(bins);
continue;
}
Dataset a = DatasetUtils.convertToDataset(ds);
IndexIterator iter = a.getIterator();
while (iter.hasNext()) {
final double val = a.getElementDoubleAbs(iter.index);
if (val < lo) {
if (ignoreOutliers)
continue;
h[0]++;
} else if (val >= hi) {
if (val > hi && ignoreOutliers)
continue;
h[nbins-1]++;
} else {
if(((int) ((val-lo)/span))<h.length)
h[(int) ((val-lo)/span)]++;
}
}
result.add(histo);
result.add(bins);
}
} else {
for (IDataset ds : datasets) {
if (bins == null) {
bins = (DoubleDataset) DatasetFactory.createLinearSpace(ds.min().doubleValue(), ds.max().doubleValue(), nbins + 1, Dataset.FLOAT64);
}
final double[] edges = bins.getData();
final double lo = edges[0];
final double hi = edges[nbins];
IntegerDataset histo = DatasetFactory.zeros(IntegerDataset.class, nbins);
final int[] h = histo.getData();
if (lo >= hi) {
h[0] = ds.getSize();
result.add(histo);
result.add(bins);
continue;
}
Dataset a = DatasetUtils.convertToDataset(ds);
IndexIterator iter = a.getIterator();
while (iter.hasNext()) {
final double val = a.getElementDoubleAbs(iter.index);
if (val < lo) {
if (ignoreOutliers)
continue;
h[0]++;
} else if (val >= hi) {
if (val > hi && ignoreOutliers)
continue;
h[nbins-1]++;
} else {
// search for correct bin
final int b = Arrays.binarySearch(edges, val);
if (b >= 0) {
h[b]++; // check for special case where rightmost edge is caught
} else {
h[-b - 2]++;
}
}
}
result.add(histo);
result.add(bins);
}
}
return result;
}
/**
* Set minimum and maximum edges of histogram bins
* @param min
* @param max
*/
public void setMinMax(double min, double max) {
this.min = min;
this.max = max;
bins = (DoubleDataset) DatasetFactory.createLinearSpace(min, max, nbins + 1, Dataset.FLOAT64);
}
/**
* Set histogram's outliers handling
* @param b if true, then ignore values that lie outside minimum and maximum bin edges
*/
public void setIgnoreOutliers(boolean b) {
ignoreOutliers = b;
}
}