package com.compomics.util.math.statistics.linear_regression.filters;
import com.compomics.util.math.BasicMathFunctions;
import java.util.ArrayList;
import java.util.Collections;
/**
* Filters by binning.
*
* @author Marc Vaudel
*/
public class BinningFilter {
/**
* Returns a list containing first the filtered xs and then the filtered ys.
* The filtered values correspond to the median of the points grouped in the
* given number of bins.
*
* @param x x series
* @param y y series
* @param nBins the number of bins to create
*
* @return a filtered list of x and y
*/
public static ArrayList<ArrayList<Double>> getFilteredInputFixedBins(ArrayList<Double> x, ArrayList<Double> y, int nBins) {
if (nBins < 2) {
throw new IllegalArgumentException("The number of bins must be greater than 1.");
}
if (x == null) {
throw new IllegalArgumentException("null given as x for filtering.");
}
if (y == null) {
throw new IllegalArgumentException("null given as y for filtering.");
}
if (x.size() != y.size()) {
throw new IllegalArgumentException("Attempting to perform filtering of lists of different sizes.");
}
int n = x.size();
if (n <= nBins) {
throw new IllegalArgumentException("Vector size (" + n + ") smaller than number of bins (" + nBins + ").");
}
int binSize = n / nBins;
int rest = n - (binSize * nBins);
return getFilteredInput(x, y, binSize, rest);
}
/**
* Returns a list containing first the filtered xs and then the filtered ys.
* The filtered values correspond to the median of the points grouped in the
* given bin size.
*
* @param x x series
* @param y y series
* @param binSize the maximal bin size
*
* @return a filtered list of x and y
*/
public static ArrayList<ArrayList<Double>> getFilteredInputFixedBinsSize(ArrayList<Double> x, ArrayList<Double> y, int binSize) {
if (binSize < 1) {
throw new IllegalArgumentException("Bin size must be greater than 1.");
}
if (x == null) {
throw new IllegalArgumentException("null given as x for filtering.");
}
if (y == null) {
throw new IllegalArgumentException("null given as y for filtering.");
}
if (x.size() != y.size()) {
throw new IllegalArgumentException("Attempting to perform filtering of lists of different sizes.");
}
int n = x.size();
int nBins = n / binSize;
if (nBins < 2) {
throw new IllegalArgumentException("Vector size (" + n + ") does not allow having more than two bins of size (" + binSize + ").");
}
int rest = n - (binSize * nBins);
return getFilteredInput(x, y, binSize, rest);
}
/**
* Filters the input vectors x and y in bins of size binSize with the rest
* distributed in the first bins.
*
* @param x x series
* @param y y series
* @param binSize the bin size
* @param rest the rest to distribute
*
* @return a filtered list of x and y
*/
private static ArrayList<ArrayList<Double>> getFilteredInput(ArrayList<Double> x, ArrayList<Double> y, int binSize, int rest) {
int currentBin = 0;
ArrayList<Double> sortedX = new ArrayList<Double>(x);
Collections.sort(sortedX);
ArrayList<Double> currentX = new ArrayList<Double>(binSize + 1);
ArrayList<Double> currentY = new ArrayList<Double>(binSize + 1);
ArrayList<Double> filteredX = new ArrayList<Double>(x.size());
ArrayList<Double> filteredY = new ArrayList<Double>(y.size());
Double x0 = x.get(0);
boolean newX = false;
for (int i = 0; i < x.size(); i++) {
Double xi = x.get(i);
if (!newX && !xi.equals(x0)) {
newX = true;
}
Double yi = y.get(i);
currentX.add(xi);
currentY.add(yi);
int limit = binSize;
if (currentBin < rest) {
limit += 1;
}
if (currentX.size() == limit) {
Double xMedian = BasicMathFunctions.medianSorted(currentX);
Double yMedian = BasicMathFunctions.median(currentY);
filteredX.add(xMedian);
filteredY.add(yMedian);
currentX.clear();
currentY.clear();
currentBin++;
}
}
if (!newX) {
throw new IllegalArgumentException("Attempting to perform the linear regression of a vertical line or a point.");
}
if (!currentX.isEmpty()) {
throw new IllegalArgumentException("Not all points in bins.");
}
ArrayList<ArrayList<Double>> result = new ArrayList<ArrayList<Double>>(2);
result.add(filteredX);
result.add(filteredY);
return result;
}
}