package com.compomics.util.math.statistics.linear_regression.filters;
import com.compomics.util.math.BasicMathFunctions;
import com.compomics.util.math.statistics.distributions.NonSymmetricalNormalDistribution;
import java.util.ArrayList;
import java.util.HashMap;
/**
* Filter removing points with outlying slope.
*
* @author Marc Vaudel
*/
public class ProbabilityFilter {
/**
* Returns a list containing first the filtered xs and then the filtered ys.
* Doublets are filtered to the given (non cumulative) probability p of
* belonging to the distribution of points based on the distance to the
* median regression.
*
* @param x x series
* @param y y series
* @param p the (non cumulative) probability to use for filtering, e.g. 0.95
* for 95%
*
* @return a filtered list of x and y
*/
public static ArrayList<ArrayList<Double>> getFilteredInput(ArrayList<Double> x, ArrayList<Double> y, double p) {
if (x == null) {
throw new IllegalArgumentException("null given as x for filtering.");
}
if (y == null) {
throw new IllegalArgumentException("null given as y for filtering.");
}
if (x.size() != y.size()) {
throw new IllegalArgumentException("Attempting to perform filtering of lists of different sizes.");
}
int n = x.size();
if (n <= 10) {
throw new IllegalArgumentException("Attempting to perform filtering of a vectore of size " + n + ". 10 minimum, >100 advised.");
}
Double medianX = BasicMathFunctions.median(x);
Double quantileX1 = BasicMathFunctions.percentile(x, 0.25);
Double quantileX2 = BasicMathFunctions.percentile(x, 0.75);
Double medianY = BasicMathFunctions.median(y);
Double quantileY1 = BasicMathFunctions.percentile(y, 0.25);
Double quantileY2 = BasicMathFunctions.percentile(y, 0.75);
ArrayList<Double> slopes = new ArrayList<Double>(n);
HashMap<Integer, Double> slopesMap = new HashMap<Integer, Double>(n);
Double x0 = x.get(0);
boolean newX = false;
for (int i = 0; i < x.size(); i++) {
Double xi = x.get(i);
if (!newX && !xi.equals(x0)) {
newX = true;
}
Double yi = y.get(i);
if (xi >= medianX) {
Double slope = (yi - quantileY1) / (xi - quantileX1);
slopes.add(slope);
slopesMap.put(i, slope);
} else {
Double slope = (quantileY2 - yi) / (quantileX2 - xi);
slopes.add(slope);
slopesMap.put(i, slope);
}
}
if (!newX) {
throw new IllegalArgumentException("Attempting to perform the linear regression of a vertical line or a point.");
}
Double medianSlope = BasicMathFunctions.median(slopes);
ArrayList<Double> deviationsSquare = new ArrayList<Double>(n);
HashMap<Integer, Double> deviationsSquareMap = new HashMap<Integer, Double>(n);
for (int i = 0; i < x.size(); i++) {
Double xi = x.get(i);
Double slope = slopesMap.get(i);
if (slope != null) {
Double deltaX;
if (xi >= medianX) {
deltaX = xi - quantileX1;
} else {
deltaX = quantileX2 - xi;
}
double yMedian = medianSlope * deltaX;
double yi = slope * deltaX;
Double deviationSquare = (yi * yi) - (yMedian * yMedian);
deviationsSquare.add(deviationSquare);
deviationsSquareMap.put(i, deviationSquare);
}
}
NonSymmetricalNormalDistribution slopeDistribution = NonSymmetricalNormalDistribution.getRobustNonSymmetricalNormalDistribution(deviationsSquare);
double threshold = 1 - p;
Double deviationMax = slopeDistribution.getMaxValueForProbability(threshold);
Double deviationMin = slopeDistribution.getMinValueForProbability(threshold);
ArrayList<Double> filteredX = new ArrayList<Double>(x.size());
ArrayList<Double> filteredY = new ArrayList<Double>(y.size());
for (int i = 0; i < slopes.size(); i++) {
Double deviation = slopes.get(i);
if (deviation != null && deviation >= deviationMin && deviation <= deviationMax) {
filteredX.add(x.get(i));
filteredY.add(y.get(i));
}
}
ArrayList<ArrayList<Double>> result = new ArrayList<ArrayList<Double>>(2);
result.add(filteredX);
result.add(filteredY);
return result;
}
}