package uk.ac.diamond.scisoft.analysis.peakfinding.peakfinders; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.apache.commons.math3.stat.descriptive.rank.Percentile; import org.eclipse.dawnsci.analysis.dataset.impl.Signal; import org.eclipse.january.dataset.Comparisons; import org.eclipse.january.dataset.Dataset; import org.eclipse.january.dataset.DatasetFactory; import org.eclipse.january.dataset.DatasetUtils; import org.eclipse.january.dataset.IDataset; import org.eclipse.january.dataset.IndexIterator; import org.eclipse.january.dataset.Maths; import org.eclipse.january.dataset.Slice; import uk.ac.diamond.scisoft.analysis.peakfinding.MexicanHatWavelet; /** * * * Performs a continuous wavelet transformation peak finding algorithm. * * This requires a wavelet function to base of the peak finding. * In this a Ricker(MexicanHat) Wavelet is used. * * Increasing widths will comb for more peaks and decreasing tends to decrease peaks. * * Works best for finding peak with initial sharp area and irregular patterns. * For example mass spectrometry data or powder diffraction data. * Where a sharp initial peak area is present. * As oppose to most peak finding algorithms being adept to sinusoidal consistent market applications. * * NOTE: Peak finding is tricky. To find the RIGHT peaks in the RIGHT place is difficult to categorise. * These solutions are only estimates considering the incoming data. * * The defaults being used are based of python.scipy.waveletPeakFInd: * -gapThresh = first of widths[0] * -maxDistances = widths/ 4.0 * -wavelet -> ricket (because thats the one that has been implemented ) * * * References - * [1] Bioinformatics (2006) 22 (17): 2059-2065. * :doi:`10.1093/bioinformatics/btl355` * http://bioinformatics.oxfordjournals.org/content/22/17/2059.long * * @author Dean P. Ottewell * * * * TODO'S * TODO: this might be due to a ignore of the first unit... there is a initial added 0,0 that is passed that should not be analysed * TODO: fixed the algorithm is probably horribly ineffienct. Move to lazy dataset and point at problem areas. Perhaps functionalize to free up whats loaded. * TODO: FUNCTIONALISE REFACTOR - I say that lots, but what I mean is those functions for filtering and generating a matrix got out of hand. * */ public class WaveletTransformPeakFinds extends AbstractPeakFinder { public final static String NAME = "WaveletTransformPeaks"; /*PARAMETER NAMES*/ //TODO: because of the way the dynamcially loaded configurables exist a neater name should be set here private String WIDTHSIZENAME = "Conolve Width Size"; private String MINSNRATIO = "Minimum signal to noise ratio";// ; private String NOISEPERCNAME = "Noise percentile";//=10; private String MINLENGTHNAME = "Minimum ridge length"; private String GAPTHRESHNAME = "Gap threshold"; /* * The width size the convulation of signal data will step through. * * 1 to n. */ private double widthSzParam; private double minSNR;// =1.0; private double noisePerc;//=10; private double minLength; private double gapThresh; /* * Ridgeline class structure to help with manipulation understanding */ class RidgeLine { public List<Integer> row = new ArrayList<Integer>(); public List<Integer> col = new ArrayList<Integer>(); public double gapVal = 0; } public void setWidthParam(double widthParam) { this.widthSzParam = widthParam; } @Override protected void setName() { this.name = NAME; } public void loadParam() { try { widthSzParam = (double) getParameterValue(WIDTHSIZENAME); minSNR = (double) getParameterValue(MINSNRATIO); noisePerc = (double) getParameterValue(NOISEPERCNAME); minLength = (double) getParameterValue(MINLENGTHNAME); gapThresh = (double) getParameterValue(GAPTHRESHNAME); } catch (Exception e) { logger.error("Could not find specified peak finding parameters"); } } public WaveletTransformPeakFinds() { super(); try { initialiseParameter(WIDTHSIZENAME, false, 1.0); initialiseParameter(MINSNRATIO, false, 1.0); initialiseParameter(NOISEPERCNAME, false, 10.0); initialiseParameter(MINLENGTHNAME, false, 0.0); initialiseParameter(GAPTHRESHNAME, false, 2.0); } catch (Exception e) { System.out.println(e); logger.error("Problem initialising " + this.getName() + " peak finder: e"); } loadParam(); } /** * * Performs a continuous wavelet transform. Components being a conv of wavelet data dependent on width[i] with the original data. * * @param xData axis * @param yData axis (match xData shape) * @param widths to convolve over * * * @return Dataset of 2D (0 - col, 1 - row) matrix to id ridge lines on. */ public Dataset countinousWaveletTransform(IDataset yData, IDataset widths) { int dataSz = yData.getSize(); Dataset output = DatasetFactory.zeros(new int[] { widths.getSize(), dataSz }, Dataset.FLOAT64); for (int width = 0; width < widths.getSize(); ++width) { double w = 10 * widths.getDouble(width); //widths[width]; double nPoints = dataSz > w ? w : yData.getSize(); MexicanHatWavelet wave = new MexicanHatWavelet(nPoints, widths.getDouble(width));//widths[width]); Dataset points = DatasetFactory.createRange(nPoints); Dataset waveletData = wave.calculateValues(points); //TODO: were missing a initial point as seen in the python implementation. //This is a problem with the convoleTOSameShape function. Flat line should convolve... //However, zero points does work according to the unit tests... Dataset convl = Signal.convolveToSameShape((Dataset) yData, waveletData, null); // might get away with null.. output.setSlice(convl, new Slice(width,width+1)); } return output; } /** * Creates a 2D relative extreme matrix determining which points could be considered extremes * * @param mtx 2D dataset * @param axis to determine the relative extremes on * * @return boolean matrix that is True with each point classed as a extrema, Shape matches @param mtx */ private Dataset boolRelativeExtrema(Dataset mtx, int axis) { int order = 1; //The windows size to check against int size = mtx.getShape()[axis]; Dataset locs = DatasetFactory.createRange(size, Dataset.INT64); Dataset resultsMtx = DatasetFactory.ones(mtx, Dataset.INT64); Dataset main = DatasetUtils.take(mtx,locs, axis); for (int shift = 1; shift < order + 1; ++shift) { Dataset above = DatasetUtils.take(mtx, Maths.add(locs,shift), axis); Dataset below = DatasetUtils.take(mtx, Maths.subtract(locs, shift), axis); // TODO: // Compare values and set true results value true - Dataset cmp = null; cmp = Comparisons.greaterThan(main, above); resultsMtx = Maths.bitwiseAnd(resultsMtx, cmp.cast(Dataset.INT64)); cmp = Comparisons.greaterThan(main, below); resultsMtx = Maths.bitwiseAnd(resultsMtx, cmp.cast(Dataset.INT64)); // if (Maths.bitwiseInvert(resultsMtx).any()) // return resultsMtx; } return resultsMtx; } /** * @param convolveMtx * 2D (0 - col, 1 - row) matrix to id ridge lines on. THis is * typically the result of the convolution of wavelet and signal * data * @param maxDistances * between points to connect the dots in becoming lines. Must * have at least as many rows as input @param convoleMtx * @param gapThresh * the threshold by which a connection between ridge lines would not be made * * @return needs to return back a set of x,y coords but also there is a gap * number given that is then used in filterRidgesToPeaks * */ private List<IDataset> identifyRidgeLines(Dataset convolveMtx, IDataset maxDistances, double gapThresh) { /*Gather extrame based on neighbour values away from a given order*/ Dataset allRelMaxCols = boolRelativeExtrema(convolveMtx, 1); int startRow = allRelMaxCols.getShape()[0] -1; List<RidgeLine> ridgeLines = new ArrayList<RidgeLine>(); Dataset ridgeStartSlice = allRelMaxCols.getSlice(new Slice(startRow,startRow+1)); /* * Generate a slice based on the starting point. Based on these create starting ridges to then be adjusted and filtered on. * */ IndexIterator all = ridgeStartSlice.getIterator(); while (all.hasNext()) { int pos = all.index; if (ridgeStartSlice.getElementBooleanAbs(pos)) { RidgeLine ridge = new RidgeLine(); ridge.row.add(startRow); ridge.col.add(all.index); ridgeLines.add(ridge); } } List<RidgeLine> finalRidges = new ArrayList<RidgeLine>(); Dataset rows = (startRow > 1) ? DatasetFactory.createRange(startRow, -1, -1,Dataset.INT64) : DatasetFactory.createRange(0); Dataset cols = convolveMtx.all(0).getIndices(); /* * The bulk of the conditioning. Will go through each ridge line attempting to connect with neighbours or other rows. * */ IndexIterator rowItr = rows.getIterator(); while (rowItr.hasNext()) { int rowIdx = rows.getInt(rowItr.index); //Get all true max vol Dataset thisRow = allRelMaxCols.getSlice(new Slice(rowIdx, rowIdx+1)); Dataset thisMaxCols = cols.getByBoolean(thisRow); // Increment all gap values for (RidgeLine lineIt : ridgeLines) { lineIt.gapVal += 1; } // XXX these should always be allMaxCols[row] List<Integer> prevRidgesCols = new ArrayList<Integer>(); for (RidgeLine lineItr : ridgeLines) { List<Integer> ridgeCols = lineItr.col; int col = ridgeCols.get(ridgeCols.size()-1); prevRidgesCols.add(col); } // Look through every relative maximum found at current row // attempt to connect them with existing ridge lines IndexIterator colItr = thisMaxCols.getIterator(); while (colItr.hasNext()) { // if previous ridge line within the maxDistance to connect to, // do so otherwise start a new one RidgeLine line = null; int colIdx = thisMaxCols.getInt(colItr.index); if (prevRidgesCols.size() > 0) { List<Integer> diffsLst = new ArrayList<Integer>(); for (int i = 0; i < prevRidgesCols.size(); ++i){ int diff = Math.abs(colIdx - prevRidgesCols.get(i)); diffsLst.add(diff); } int closest = diffsLst.indexOf(Collections.min(diffsLst)); if (diffsLst.get(closest) <= maxDistances.getDouble(rowIdx)) { line = ridgeLines.get(closest); } } if (line != null) { // Found a point close enough extend current ridge lines line.col.add(colIdx); line.row.add(rowIdx); line.gapVal = 0; } else { // newLine=[[row] RidgeLine nLine = new RidgeLine(); nLine.col.add(colIdx); nLine.row.add(rowIdx); nLine.gapVal = 0; ridgeLines.add(nLine); } } // Remove the ridge lines with gapNumber too high // XXX modifying a list whilst iterating for (int i = ridgeLines.size()-1; i > 0; --i) { RidgeLine line = ridgeLines.get(i); if (line.gapVal > gapThresh) { finalRidges.add(line); ridgeLines.remove(i); } } } List<RidgeLine> entireLines = new ArrayList<RidgeLine>(); entireLines.addAll(finalRidges); entireLines.addAll(ridgeLines); List<IDataset> output = new ArrayList<IDataset>(); /* * Filter out low gapValue lines and * format these ridglines into output. * */ for(RidgeLine line : entireLines){ List<Integer> sortArgs = new ArrayList<Integer>(line.row); Collections.sort(sortArgs); Dataset ridgeLine = DatasetFactory.zeros(new int[]{sortArgs.size(),2}, Dataset.FLOAT64); //TODO: not the most efficent, just needed them to be ordered for filtering later for (int i = 0; i < sortArgs.size(); ++i){ int rVal = sortArgs.get(i); int idxOrg = line.row.indexOf(rVal); int cVal = line.col.get(idxOrg); ridgeLine.set(rVal, i, 0); ridgeLine.set(cVal, i, 1); } output.add(ridgeLine); } return output; } /** * * Take peaks from the ridges top value * * RidgeLines -> line -> Points -> x,y,gap val * * *@param mtx * 2D (0 - col, 1 - row) matrix to id ridge lines on. THis is * typically the result of the convolution of wavelet and signal * data. * * @param ridges * Sequence of ridges contains 2 sequences of the respective rows and columns that creates the ridge line respectively. * * @param windowSz * Size of window to use to calculate noise floor. * Default is col /20 -> TODO: why? * * @param minLength * min length a ridge line need to be acceptable. * * @param minSignalNoiseRatio * Min signal to noise ratio. The signal is a value of the cwt matrix at the shortest length scale (cwt[0.loc]) the * noise the 'noise perc'th percentiles of data points contained within a window of 'window_size' around 'cwt[0,loc]' * * @param noisePerc * When calculating the noise floor, percentile of data points examined below which to consider noise. * Calculated using HOPEFULLY apache math3 percentile functions. * * * * @return List of 2D dataset (0 - col, 1 - row) with respect to the original @mtx */ private List<IDataset> filterRidgesToPeaks(Dataset mtx, List<IDataset> ridges, int windowSz, double minLength, double minSignalNoiseRatio, double noisePerc) { //Conditions generated based on parameter input int numPoints = mtx.getShape()[1]; int odd = windowSz % 2; int hfWindow = windowSz / 2; Dataset startRow = mtx.getSlice(new Slice(0, 1)).flatten(); Dataset noises = DatasetFactory.zeros(startRow, Dataset.FLOAT64); /* * Generate noise at slices about rows and on the windowsSz given * */ IndexIterator itr= startRow.getIterator(); while(itr.hasNext()){ int idx = itr.index; int windowStart= Math.max(idx - hfWindow, 0); int windowEnd = Math.min(idx + hfWindow + odd, numPoints); Dataset sliceWindow = startRow.getSlice(new Slice(windowStart,windowEnd)); double[] args = (double[]) DatasetUtils.createJavaArray(sliceWindow); //Calculate percentile based on this slice. This can be compared later to see if this area was too low to be considered significant. double score = new Percentile().evaluate(args, noisePerc); noises.set(score, idx); } /* * Based on noises parameter filter out ridges. * */ List<IDataset> filteredRidges = new ArrayList<IDataset>(); for(int i = 0; i < ridges.size(); ++i){ IDataset ridgeLine = ridges.get(i); if (isFilteredRidgeAPeak(mtx, ridgeLine, noises, minLength, minSignalNoiseRatio)){ filteredRidges.add(ridgeLine); } } return filteredRidges; } /** * * Check against value and conditions to see if could consider point a peak. * * @param mtx * 2D (0 - col, 1 - row) matrix to id ridge lines on. This is * typically the result of the convolution of wavelet and signal data. * * @param ridgeLine * Sequence of ridges contains 2 sequences of the respective rows and columns that creates the ridge line respectively. * * * @param noises * 1D set of noise values produced based on @param mtx(1) * * @param minLength * min lenght a ridge line need to be acceptable. * * @param minSignalNoiseRatio * Min signal to noise ratio. The signal is a value of the cwt matrix at the shortest length scale (cwt[0.loc]) the * noise the 'noise perc'th percentiles of data points contained within a window of 'window_size' around 'cwt[0,loc]' * * * @return */ private Boolean isFilteredRidgeAPeak(IDataset mtx, IDataset ridgeLine, IDataset noises, double minLength, double minSignalNoiseRatio){ if ((ridgeLine.getShape()[0]) < minLength){ return false; } //TODO: need to take a better slice of the noise mtx int c = ridgeLine.getInt(0,0); int r = ridgeLine.getInt(0,1); double signal = mtx.getDouble(c,r); double noise = noises.getDouble(ridgeLine.getInt(0,1)); double snr = Math.abs( signal/noise); if (snr < minSignalNoiseRatio){ //Not a significant enough noise val to be considered a peak return false; } return true; } @Override public Map<Integer, Double> findPeaks(IDataset xData, IDataset yData, Integer maxPeaks) { //TODO: shouldnt be reloading params inside find peaks... loadParam(); //Generate width set to step through for convoluation IDataset widths = DatasetFactory.createRange(1.0, widthSzParam+1.0, 1.0, Dataset.FLOAT64); IDataset maxDis = Maths.divide(widths, 4.0); // Pass over data given width and produce waveletConvolve Dataset mtx = countinousWaveletTransform(yData, widths); List<IDataset> ridges = identifyRidgeLines(mtx, maxDis,gapThresh); if (minLength == 0) minLength = Math.ceil(mtx.getShape()[0] /4.0); int windowSz = (int) Math.ceil((double) mtx.getShape()[1] / 20.0); List<IDataset> ridgesFiltered = filterRidgesToPeaks(mtx, ridges, windowSz, minLength, minSNR, noisePerc); Map<Integer, Double> peaks = new TreeMap<Integer,Double>(); for(IDataset ridge : ridgesFiltered){ int peakPos = ridge.getInt(0,1); peaks.put(peakPos, yData.getDouble(peakPos)); } return peaks; } }