/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.proteomics.feature; import modwt.Filter; import modwt.Transform; import org.fhcrc.cpl.toolbox.CPUTimer; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.fhcrc.cpl.toolbox.datastructure.FloatRange; import org.fhcrc.cpl.toolbox.datastructure.IntegerArray; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import javax.media.jai.JAI; import javax.media.jai.PlanarImage; import javax.media.jai.RasterFactory; import javax.media.jai.RenderedOp; import javax.media.jai.operator.DFTDescriptor; import java.awt.Point; import java.awt.image.DataBuffer; import java.awt.image.DataBufferFloat; import java.awt.image.Raster; import java.awt.image.WritableRaster; import java.awt.image.renderable.ParameterBlock; import java.io.IOException; import java.io.Writer; import java.util.*; /** * User: mbellew * Date: Jun 2, 2004 * Time: 1:57:21 PM */ public class Spectrum { //private static Logger _log = Logger.getLogger(Spectrum.class); public static final float HYDROGEN_ION_MASS = (float) (1.0078250 - 5.485e-4); //h - electron public static final double LN2 = Math.log(2.0); /** *for micromass, doesn't work for bruker public static float[][] ExpandZeroes(float[][] spectrum) { final float ZERO = 0F; // set to -2 for debugging final float ZEROFILL = 0F; // set to -1 for debugging final int notZero = 0; final int oneZero = 1; final int twoZero = 2; float[] mzIN = spectrum[0]; float[] intIN = spectrum[1]; FloatArray mzOUT = new FloatArray(); FloatArray intOUT = new FloatArray(); int state = notZero; float interval = Math.min(0.5F, mzIN[1] - mzIN[0]); for (int i=0 ; i<mzIN.length ; i++) { float mz = mzIN[i]; float f = intIN[i]; if (f != 0) { state = notZero; mzOUT.add(mz); intOUT.add(f); } else if (state == notZero) { state = oneZero; mzOUT.add(mz); intOUT.add(ZERO); } else { state = twoZero; if (i >= 2) interval = mzIN[i-1] - mzIN[i-2]; float mzFrom = mzIN[i-1]; float mzTo = mzIN[i] - interval/2; for (float m = mzFrom+interval ; m < mzTo ; m+=interval) { mzOUT.add(m); intOUT.add(ZEROFILL); } mzOUT.add(mz); intOUT.add(ZERO); } } return new float[][] {mzOUT.toArray(null), intOUT.toArray(null)}; } */ /** * For both micromass and bruker, intervals between readings are proportional to sqrt(mz). * This function infers the sampling frequency from a spectrum. * * r.min should be an actual start value from a real spectrum (e.g. not just 400) * * This is expensive so it's done once per MSRun, see MSRun.getTemplateSpectrum(). * Since we do it once, don't try to optimize too much. */ public static float[] GenerateSpectrumTemplate(float[] mzArray, FloatRange r) { // convert to sqrt(mz) domain double start_sqrt = Math.sqrt(r.min); double end_sqrt = Math.sqrt(r.max); double[] mzArray_sqrt = new double[mzArray.length]; for (int i=0 ; i<mzArray.length ; i++) mzArray_sqrt[i] = Math.sqrt((double)mzArray[i]); /* double gaps[] = new double[mzArray.length-1]; int lenGaps = 0; for (int i=1 ; i<mzArray.length ; i++) { double d = mzArray_sqrt[i] - mzArray_sqrt[i-1]; if (d > 0 && d < .025) gaps[lenGaps++] = d; } Arrays.sort(gaps, 0, lenGaps); */ // find smallest gap double min = 0.001; for (int i=1 ; i<mzArray_sqrt.length ; i++) { double d = mzArray_sqrt[i] - mzArray_sqrt[i-1]; if (d < min) min = d; } // find average of small gaps double sum = 0.0; int count = 0; double cutoff = min * 1.5; for (int i=1 ; i<mzArray_sqrt.length ; i++) { double d = mzArray_sqrt[i] - mzArray_sqrt[i-1]; if (d < cutoff) { sum += d; count++; } } double interval_sqrt = sum/count; //double interval_upper = interval_sqrt * 1.5; double interval_lower = interval_sqrt * 0.5; // We have a really good interval value, but we'll drift from reported mz values if we use it directly. // "merge" input data and calculate missing values int lenTemp = (int)Math.floor((end_sqrt-start_sqrt)/interval_sqrt) + 200; double[] template_sqrt = new double[lenTemp]; int dst = 0, src = 0; mzArray_sqrt[0] = start_sqrt; mzArray_sqrt[mzArray_sqrt.length-1] = end_sqrt; double prev = start_sqrt; for ( ; src<mzArray_sqrt.length ; src++) { double next = mzArray_sqrt[src]; double gap = next - prev; int c = (int)((gap + interval_lower) / interval_sqrt); assert c<2 || gap/c > interval_sqrt*0.9 && gap/c < interval_sqrt*1.1; for (int i=1 ; i<c ; i++) template_sqrt[dst++] = prev + i * gap/c; template_sqrt[dst++] = prev = next; } int len = dst; // convert back to mz domain float[] template = new float[len]; for (int i=0 ; i<len ; i++) { double mz = template_sqrt[i] * template_sqrt[i]; template[i] = (float)(Math.round(10000.0 * mz) / 10000.0); //System.out.println(template[i]); } return template; } static int find(float[] s, float f) { int i = Arrays.binarySearch(s, f); i = i<0 ? -(i+1) : i; i = Math.min(s.length-1, i); return i; } public static float[][] CombineRawSpectra(float[] template, List<float[][]> spectra, FloatRange rangeIN) { int from = find(template, rangeIN.min); if (from > 0 && template[from] > rangeIN.min) from--; int to = find(template, rangeIN.max); if (to == template.length) to--; float[] mzArray = new float[to-from+1]; System.arraycopy(template, from, mzArray, 0, mzArray.length); // now combine intensity values float[] iArray = new float[mzArray.length]; for (int s=0 ; s<spectra.size() ; s++) { float[][] spectrum = spectra.get(s); from = find(spectrum[0], rangeIN.min); to = find(spectrum[0], rangeIN.max); if (spectrum[0][to] > rangeIN.max) --to; for (int i=from, j=0 ; i<to ; i++) { float f = spectrum[0][i]; while (mzArray[j] < f) j++; // which is closer mzArray[j] or mzArray[j-1]; if (j == 0 || mzArray[j] - f < f - mzArray[j-1]) iArray[j] += spectrum[1][i]; else iArray[j-1] += spectrum[1][i]; } } int sz = spectra.size(); for (int i=0 ; i<iArray.length ; i++) iArray[i] /= sz; return new float[][] {mzArray, iArray}; } /** * resample and translate to zero charge domain * assumes the input is higher resolution than 1/scale*resolution * <p/> * CONSIDER: There's probably a better, curve estimating/fitting algorithm */ public static float[][] TranslateZeroCharge(float[][] spectrum, FloatRange r, int charge, int resolution) { if (spectrum.length != 2) throw new IllegalArgumentException(); if (spectrum[0].length != spectrum[1].length) throw new IllegalArgumentException(); float[][] out = new float[2][resolution * ((int) r.max - (int) r.min) + 1]; int lenOut = out[0].length; for (int i = 0; i < lenOut; i++) out[0][i] = r.min + i * 1.0F / resolution; int len = spectrum[0].length; int count = 0; float sum = 0.0F; int indexLast = -1; for (int i = 0; i < len; i++) { float m = spectrum[0][i]; m = (m - HYDROGEN_ION_MASS) * charge; if (m < r.min - 1 || m > r.max + 1) continue; float f = spectrum[1][i]; int index = Math.round((m - r.min) * resolution); if (index < 0 || index >= lenOut) continue; if (index != indexLast) { if (count > 0) out[1][indexLast] = sum / count; indexLast = index; sum = 0.0F; count = 0; } sum += f; count++; } if (count > 0) out[1][indexLast] = sum / count; return out; } public static float[][] ResampleSpectrum(float[][] spectrum, FloatRange r, int resolution, boolean zeroCharge) { float[] signal = Resample(spectrum, r, resolution); float[] mz = new float[signal.length]; double in = 1.0 / resolution; double start = r.min; if (zeroCharge) start -= HYDROGEN_ION_MASS; for (int i = 0; i < signal.length; i++) mz[i] = (float) (start + i * in); return new float[][]{mz, signal}; } static CPUTimer timerResample = new CPUTimer("Spectrum.Resample()"); public static float[] Resample(float[][] spectrum, FloatRange r, int resolution) { try { assert timerResample.start(); return Resample2(spectrum, r, resolution); } finally { assert timerResample.stop(); } } /** * resample * assumes the input is higher resolution than 1/scale*resolution * <p/> * CONSIDER: There's probably a better, curve estimating/fitting algorithm */ private static float[] Resample1(float[][] spectrum, FloatRange r, int resolution) { if (spectrum.length != 2) throw new IllegalArgumentException(); if (spectrum[0].length != spectrum[1].length) throw new IllegalArgumentException(); // ExpandZeroes // It's probably correct to ExpandZeros here, however // a) it is expensive // b) it only affects small noise and edge of peaks // I don't think it actually affects results float[] out = new float[resolution * ((int) r.max - (int) r.min) + 1]; int lenOut = out.length; int count = 0; float sum = 0.0F; int indexLast = -1; int start = Arrays.binarySearch(spectrum[0], r.min-1); start = start < 0 ? -(start+1) : start; int end = Arrays.binarySearch(spectrum[0], r.max+1); end = end < 0 ? -(end+1) : end; for (int i=start ; i<end ; i++) { float m = spectrum[0][i]; float x = spectrum[1][i]; int index = Math.round((m - r.min) * resolution); if (index < 0 || index >= lenOut) continue; if (index != indexLast) { if (count > 0) out[indexLast] = sum / count; indexLast = index; sum = 0.0F; count = 0; } sum += x; count++; } if (count > 0) out[indexLast] = sum / count; return out; } private static float[] Resample2(float[][] spectrum, FloatRange r, int resolution) { if (spectrum.length != 2) throw new IllegalArgumentException(); if (spectrum[0].length != spectrum[1].length) throw new IllegalArgumentException(); // ExpandZeroes // It's probably correct to ExpandZeros here, however // a) it is expensive // b) it only affects small noise and edge of peaks // I don't think it actually affects results float[] out = new float[resolution * ((int) r.max - (int) r.min) + 1]; float[] weights = new float[out.length]; int start = Arrays.binarySearch(spectrum[0], r.min-1F/resolution); start = start < 0 ? -(start+1) : start; int end = Arrays.binarySearch(spectrum[0], r.max+1F/resolution); end = end < 0 ? -(end+1) : end; for (int i=start ; i<end ; i++) { float m = spectrum[0][i]; float x = spectrum[1][i]; float bucket = (m - r.min) * resolution; int index = (int)Math.floor(bucket); float frac = bucket-index; if (index >= 0 && index < out.length) { out[index] += x*(1-frac); weights[index] += (1-frac); } if (index+1 >= 0 &&index+1 < out.length) { out[index+1] += x*frac; weights[index+1] += frac; } } for (int i=0 ; i<out.length; i++) { assert weights[i] != 0 || out[i] == 0; out[i] /= Math.max(1.0,weights[i]); } // this algorithm does tend to smooth nicely // however, you sometimes get a largish peak right on // a bucket boundary, this won't smooth very well. SmoothALittle(out); return out; } /** * don't put too much effort in this yet, * Are there standard centroiding algorithms? * I think Tim Randolph has some ideas */ public static int[] PickPeakIndexes(float[] signal, double minFilter) { IntegerArray arr = new IntegerArray(); int len = signal.length; float prev = -Float.MIN_VALUE; float curr; for (int i = 0; i < len;) { for (; i < len && prev <= (curr = signal[i]); i++) prev = curr; if (prev > minFilter) arr.add(i - 1); for (; i < len && prev >= (curr = signal[i]); i++) prev = curr; assert i==len || !Float.isNaN(signal[i]); } return arr.toArray(null); } public static Peak[] PickPeaks(float[][] spectrum, double minFilter) { List<Peak> arr = new ArrayList<Peak>(); float[] signal = spectrum[1]; int len = spectrum[0].length; float prev = -Float.MAX_VALUE; float curr; for (int i = 0; i < len;) { for (; i < len && prev <= (curr = signal[i]); i++) prev = curr; if (prev > minFilter) arr.add(new CentroidedPeak(spectrum[0][i - 1], spectrum[1][i - 1])); for (; i < len && prev >= (curr = signal[i]); i++) prev = curr; } return (Peak[]) arr.toArray(new Peak[arr.size()]); } public static void SmoothALittle(float[] x) { float last = x[0]; float cur = x[0]; int stop = x.length - 1; float next; for (int i = 0; i < stop; i++) { next = x[i + 1]; x[i] = (last + cur + cur + next) / 4.0F; last = cur; cur = next; } next = x[stop]; x[stop] = (last + cur + cur + next) / 4.0F; } public static void SmoothALittle(double[] x) { double last = x[0]; double cur = x[0]; int stop = x.length - 1; double next; for (int i = 0; i < stop; i++) { next = x[i + 1]; x[i] = (last + cur + cur + next) / 4.0F; last = cur; cur = next; } next = x[stop]; x[stop] = (last + cur + cur + next) / 4.0F; } public static Comparator<Peak> comparePeakIntensityDesc = new Comparator<Peak>() { public int compare(Peak a, Peak b) { //if (a == null || b == null) return a==null ? 1 : -1; return _compareDesc(a.intensity, b.intensity); } }; public static Comparator<Peak> comparePeakMzAsc = new Comparator<Peak>() { public int compare(Peak a, Peak b) { return _compareAsc(a.mz, b.mz); } }; public static Comparator<Feature> comparePeakMassAsc = new Comparator<Feature>() { public int compare(Feature a, Feature b) { return _compareAsc(a.mass, b.mass); } }; public static Comparator<Peak> comparePeakScanAsc = new Comparator<Peak>() { public int compare(Peak a, Peak b) { return _compareAsc(a.scan, b.scan); } }; public static Comparator<Feature> compareFeatureLengthDesc = new Comparator<Feature>() { public int compare(Feature a, Feature b) { return _compareAsc(a.getScanCount(), b.getScanCount()); } }; static class FloatImage extends PlanarImage { WritableRaster raster; FloatImage(int w, int h, DataBuffer buf) { tileWidth = width = w; tileHeight = height = h; tileGridXOffset = tileGridYOffset = minX = minY = 0; assert buf.getSize() == w * h; sampleModel = RasterFactory.createBandedSampleModel(DataBuffer.TYPE_FLOAT, w, h, 1); raster = RasterFactory.createWritableRaster(sampleModel, buf, new Point(0, 0)); } public Raster getData() { return raster; } public Raster getTile(int tileX, int tileY) { return raster; } } public static float[] FFTsmooth(float[] x, double smoothfactor, boolean cliff) { try { DataBuffer buf = new DataBufferFloat(x, x.length); FloatImage img = new FloatImage(x.length, 1, buf); RenderedOp opDFT = JAI.create("DFT", new ParameterBlock() .add(DFTDescriptor.SCALING_UNITARY) .add(DFTDescriptor.REAL_TO_COMPLEX) .addSource(img)); float[] fft = ((DataBufferFloat) opDFT.getData().getDataBuffer()).getData(); int complexLen = fft.length / 2; // UNDONE cache, this seems overexpensive /* int len = complexLen / 2; float[] g = new float[len]; double s = len / smoothfactor; double s2 = -1.0 / (2.0 * s * s); int z = cliff ? (int) s : 0; for (int i = 0; i < z; i++) g[i] = 1F; for (int i = 0; i < len - z; i++) { double d = Math.exp((double)i * i * s2); if (d < Float.MIN_VALUE) // arbitrary cut-off for small.. break; g[z + i] = (float) d; } */ int len = complexLen / 2; float[] g = NormalP(len, 1/smoothfactor, false, false, null); int off = 2; for (int i = 0; i < len; i++) { fft[off++] *= g[i]; // Real fft[off++] *= g[i]; // Complex } for (int i = len - 2; i >= 0; i--) { fft[off++] *= g[i]; // Real fft[off++] *= g[i]; // Complex } assert off == fft.length; RenderedOp opIDFT = JAI.create("IDFT", new ParameterBlock() .add(DFTDescriptor.SCALING_UNITARY) .add(DFTDescriptor.COMPLEX_TO_REAL) .addSource(opDFT)); DataBufferFloat bufReturn = (DataBufferFloat) opIDFT.getData().getDataBuffer(); return bufReturn.getData(); } catch (Exception ex) { ex.printStackTrace(); return null; } } /** * Computes points in Gaussian distribution function, over range [0,1] * with mean=0 * * @param len number of points to compute in the distrubition (min 2) * @param sigma sqrt(variance) * @param g optional array to copy result into * @return */ private static float[] NormalP(int len, double sigma, boolean even, boolean normalize, float[] g) { g = realloc(g, len); double variance = sigma * sigma; double delta;// = 1.0/(len-1); double x; // = 0; if (even) { delta = 1.0/(len-0.5); x = delta/2.0; } else { delta = 1.0/(len-1.0); x = 0.0; } double s2 = -1.0 / (2.0 * variance); double scale = normalize ? 1/(sigma * Math.sqrt(2 * Math.PI)) : 1; for ( int i=0 ; i<len ; i++, x+= delta) { double d = scale * Math.exp((double)x * x * s2); if (d < Float.MIN_VALUE) // arbitrary cut-off for small, avoid denormals break; g[i] = (float) d; } return g; } /* * Computes points in Gaussian distribution function, over range [0,1] * with mean=0.5 * * @param len number of points to compute in the distrubition * @param sigma sqrt(variance) * @param g optional array to copy result into * @return private static float[] NormalD(int len, double sigma, float[] g) { assert null == "NYI"; return null; } */ public static double HellingerDistance(float[] p, float[] q) { assert p.length == q.length; double d = 0.0; for (int i = 0; i < p.length; i++) { double x = Math.sqrt(p[i]) - Math.sqrt(q[i]); d += x * x; } return d; } public static float KLPoissonDistance(float mass, float[] q) { float[] p = Poisson(mass); assert q.length == p.length; double diff = 0.0; double sumP = 0.0; double sumQ = 0.0; int pqMinLength = Math.min(p.length, q.length); for (int k = 0; k < pqMinLength ; k++) { diff += p[k] * Math.log((double)p[k] / q[k]); sumP += p[k]; sumQ += q[k]; } double kl = diff / LN2; assert kl > -0.0001; assert Math.abs(sumP-1.0) < 0.001 && Math.abs(sumQ-1.0) < 0.001; kl = Math.max(kl,0); return (float)(diff / Math.log(2.0)); } public static float KLPoissonDistanceSymmetric(float mass, float[] signal) { float[] p = Poisson(mass); float diff = 0.0F; for (int k = 0; k < 4; k++) { float f0 = p[k]; float f1 = signal[k]; diff += f0 * Math.log(f0 / f1) + f1 * Math.log(f1 / f0); } return (float)(diff / Math.log(2.0)); } public static double KLDistanceSymmetric(float[] signal1, int off1, float[] signal2, int off2, int len) { double sum1 = 0.0, sum2 = 0.0; for (int i = 0; i < len; i++) { sum1 += signal1[off1 + i]; sum2 += signal2[off2 + i]; } double diff = 0.0; double min = 0.001 / len; for (int i = 0; i < len; i++) { double s1 = Math.max(signal1[off1 + i] / sum1, min); double s2 = Math.max(signal2[off2 + i] / sum2, min); diff += s1 * Math.log(s1 / s2) + s2 * Math.log(s2 / s1); } return diff / Math.log(2.0); } public static float KLGayDistance(float m, float[] signal) { float[] p = _gay(m); float diff = 0.0F; for (int k = 0; k < 4; k++) { float f0 = p[k]; float f1 = signal[k]; diff += f0 * Math.log(f0 / f1) + f1 * Math.log(f1 / f0); } return diff; } /* CONSIDER: Better mathematically justified ways to compute noise? */ public static float Noise(float[] signal, int start, int end) { int len = end - start; float[] copy = new float[len]; System.arraycopy(signal, start, copy, 0, len); Arrays.sort(copy); int quartile = len * 3 / 4; return copy[quartile]; } static float[][] _poisson = new float[6400 / 10][6]; static { for (int i = 0; i < _poisson.length; i++) { int mass = i * 10 + 5; float mu = mass * 0.0005556F; // martin's empircal lambda = 1/1800 (was mass * 0.000489F) double muExp = Math.exp(-mu); _poisson[i][0] = (float) (1 * muExp / 1); _poisson[i][1] = (float) (mu * muExp / 1); _poisson[i][2] = (float) (Math.pow(mu, 2) * muExp / 2); _poisson[i][3] = (float) (Math.pow(mu, 3) * muExp / 6); _poisson[i][4] = (float) (Math.pow(mu, 4) * muExp / 24); _poisson[i][5] = (float) (Math.pow(mu, 5) * muExp / 120); float s = _poisson[i][0] + _poisson[i][1] + _poisson[i][2] + _poisson[i][3] + _poisson[i][4] + _poisson[i][5]; _poisson[i][0] /= s; _poisson[i][1] /= s; _poisson[i][2] /= s; _poisson[i][3] /= s; _poisson[i][4] /= s; _poisson[i][5] /= s; } } public static float[] Poisson(float m) { int i = ((int) m - 5) / 10; i = Math.max(0, Math.min(_poisson.length - 1, i)); //System.err.println("**Poisson: m=" + m + ", i=" + i + ", p1=" + _poisson[i][0] + ", p2=" + _poisson[i][1]); return _poisson[i]; } /** * Return the index of the peak that, according to the Poisson distribution for this mass, should be most intense * @param mass * @return */ public static int calcMaxIdealPeakIndex(float mass) { //Compare intensities of the peak that has theoretical max intensity int theoreticalMaxIndex = 0; float[] p = Poisson(mass); float maxTheoreticalInt = p[0]; for (int i=1; i<p.length; i++) if (p[i] > maxTheoreticalInt) { maxTheoreticalInt = p[i]; theoreticalMaxIndex = i; } return theoreticalMaxIndex; } /** * Return the indexes of the peaks in descending order of theoretical intensity. * Relies on no exact ties from Poisson(); * @param mass * @return */ public static int[] calcIdealPeakIntensityOrderDesc(float mass) { //Compare intensities of the peak that has theoretical max intensity float[] p = Poisson(mass); float[] pSorted = new float[p.length]; System.arraycopy(p, 0, pSorted, 0, p.length); Arrays.sort(pSorted); //sorted in ASCENDING ORDER int[] result = new int[p.length]; for (int i=0; i<p.length; i++) { float match = pSorted[pSorted.length-i-1]; for (int j=0; j<p.length; j++) if (p[j] == match) { result[i] = j; break; } } return result; } // from article in Electrophoresis 1999 by S. Gay et al static double[] m0 = new double[]{0.0576, -0.2553, 0.5827, -0.8436, 0.6182}; static double[] m1 = new double[]{-0.1207, 0.4688, -0.7373, 0.3845, 0.2701}; static double[] m2 = new double[]{0.0569, -0.1173, -0.0838, 0.3068, 0.0865}; static double[] m3 = new double[]{0.0296, -0.1293, 0.1330, 0.1151, 0.0204}; static double[] m4 = new double[]{-0.0101, -0.0118, 0.0789, 0.0292, 0.0040}; static double[] m5 = new double[]{-0.0132, 0.0448, 0.0256, 0.0079, 0.0008}; static float[][] _gay = new float[6400 / 10][6]; static { for (int i = 0; i < _gay.length; i++) { int m = i * 10 + 5; double x = ((double) m - 800.0) / 2200.0; double x2 = x * x; double x3 = x2 * x; double x4 = x3 * x; _gay[i][0] = (float) (m0[0] * x4 + m0[1] * x3 + m0[2] * x2 + m0[3] * x + m0[4]); _gay[i][1] = (float) (m1[0] * x4 + m1[1] * x3 + m1[2] * x2 + m1[3] * x + m1[4]); _gay[i][2] = (float) (m2[0] * x4 + m2[1] * x3 + m2[2] * x2 + m2[3] * x + m2[4]); _gay[i][3] = (float) (m3[0] * x4 + m3[1] * x3 + m3[2] * x2 + m3[3] * x + m3[4]); _gay[i][4] = (float) (m4[0] * x4 + m4[1] * x3 + m4[2] * x2 + m4[3] * x + m4[4]); _gay[i][5] = (float) (m5[0] * x4 + m5[1] * x3 + m5[2] * x2 + m5[3] * x + m5[4]); float s = _gay[i][0] + _gay[i][1] + _gay[i][2] + _gay[i][3] + _gay[i][4] + _gay[i][5]; _gay[i][0] /= s; _gay[i][1] /= s; _gay[i][2] /= s; _gay[i][3] /= s; _gay[i][4] /= s; _gay[i][5] /= s; } } static float[] _gay(float m) { int i = ((int) m - 5) / 10; i = Math.max(0, Math.min(_gay.length - 1, i)); return _gay[i]; } public static float[][] Centroid(float[][] spectrum) { // this isn't exactly well defined, since spectrum is not evenly sampled float n = Spectrum.Noise(spectrum[1], 0, spectrum[1].length); Spectrum.SmoothALittle(spectrum[1]); Spectrum.SmoothALittle(spectrum[1]); Peak[] peaks = Spectrum.PickPeaks(spectrum, n); float[][] spectrumPeaks = new float[2][peaks.length]; for (int i = 0; i < peaks.length; i++) { Peak p = peaks[i]; spectrumPeaks[0][i] = p.mz; spectrumPeaks[1][i] = p.intensity; } return spectrumPeaks; } public static void CopyToTSV(float[][] spectrum, Writer out, boolean useHeader) throws IOException { if (useHeader) out.write("mz\tintensity\n"); for (int i = 0; i < spectrum[0].length; i++) { out.write(String.valueOf(spectrum[0][i])); out.write('\t'); out.write(String.valueOf(spectrum[1][i])); out.write('\n'); } } public static class Peak { public int scan = -1; public float mz; public float intensity; public float background = 0F; public float median = 0F; public boolean excluded = false; public Peak() { } public Peak(int scan, float mz, float intensity) { this.scan = scan; this.mz = mz; this.intensity = intensity; } public Peak(float mz, float intensity) { this.mz = mz; this.intensity = intensity; } public Peak(Peak p) { scan = p.scan; mz = p.mz; intensity = p.intensity; background = p.background; median = p.median; } public String toString() { return scan + "\t" + mz + "\t" + intensity; } public boolean equals(Object o) { if (null == o || o.getClass() != this.getClass()) return false; Peak p = (Peak) o; return p.scan == scan && p.mz == mz && p.intensity == intensity; } public int hashCode() { return scan ^ Float.floatToIntBits(mz) ^ Float.floatToIntBits(intensity); } public int getScan() { return scan; } public void setScan(int scan) { this.scan = scan; } public float getMz() { return mz; } public void setMz(float mz) { this.mz = mz; } public float getIntensity() { return intensity; } public void setIntensity(float intensity) { this.intensity = intensity; } public float getBackground() { return background; } public void setBackground(float background) { this.background = background; } public float getMedian() { return median; } public void setMedian(float median) { this.median = median; } } /* * this is just a marker class to distinguish between raw and centroided peaks */ public static class CentroidedPeak extends Peak { public CentroidedPeak(int scan, float mz, float intensity) { super(scan, mz, intensity); } public CentroidedPeak(float mz, float intensity) { super(mz, intensity); } } static final int _compareAsc(float a, float b) { return a == b ? 0 : a < b ? -1 : 1; } static final int _compareDesc(float a, float b) { return a == b ? 0 : a < b ? 1 : -1; } /* public static boolean IsLockSpray(float[][] fullSpectrum) { float[][] spectrum = ResampleSpectrum(fullSpectrum, new FloatRange(780.0F, 7900.F), 36, false); spectrum[1] = Spectrum.FFTsmooth(spectrum[1], 8, false); Peak[] peaks = Spectrum.PickPeaks(spectrum, 20.0F); Arrays.sort(peaks, Spectrum.comparePeakIntensityDesc); if (peaks.length < 3) return false; if (Math.abs(peaks[0].mz - 785.8333) > .06) return false; if (Math.abs(peaks[1].mz - 786.3333) > .06) return false; if (Math.abs(peaks[2].mz - 786.8333) > .06) return false; return true; } */ public static void NormalizeSum(float[] x) { double sum = 0.0; for (int i=0 ; i<x.length ; i++) sum += x[i]; for (int i=0 ; i<x.length ; i++) x[i] /= sum; } public static void NormalizeSquares(float[] x) { double sum = 0.0; for (int i=0 ; i<x.length ; i++) sum += x[i] * x[i]; double sqr = Math.sqrt(sum); for (int i=0 ; i<x.length ; i++) x[i] /= sqr; } public static double Correlation(float[] X, float[] Y) { assert X.length == Y.length; if (X.length != Y.length) throw new IllegalArgumentException("Spectrum.Correlation(), arrays need be equal length"); int n = X.length; double Sx=0.0, Sx2=0.0, Sy=0.0, Sy2=0.0, Sxy=0.0; for (int i=0 ; i<n ; i++) { double x = X[i], y = Y[i]; Sx += x; Sx2 += x*x; Sy += y; Sy2 += y*y; Sxy += x*y; } return (n * Sxy - Sx*Sy) / Math.sqrt((n*Sx2 - Sx*Sx) * (n*Sy2 - Sy*Sy)); } /* static class array { float[] v; array(float[] v) { this.v = v; } float get(int i) { //return (i<0) ? v[0] : i>=v.length ? v[v.length-1] : v[i]; return (i < 0 || i >= v.length) ? 0 : v[i]; } } public static float[] average(float[] signal, int scale) { int len = scale * 2 + 1; array s = new array(signal); float[] x = new float[signal.length]; double sum = 0.0; for (int i = -scale; i < signal.length; i++) { sum += s.get(i + scale) - s.get(i - scale); if (i > 0) x[i] = (float) (sum / len); } return x; } */ /* * assumes input has been preprocessed * lightly smoothed * removed background * * UNDONE: does not filter out small peaks in low signal area (median is zero or close to zero) * * UNDONE: CombinedStrategy uses WaveletD3(), NOT the same as WaveletPeaks() * * CONSIDER: rewrite as HaarPeaks() e.g. remove extraneous wavelet trappings. Has this already been done? */ public static Peak[] WaveletPeaks(float[][] spectrum) { float[] x = spectrum[1]; int K = 4; int N = spectrum[0].length; // // D1 // float[] mm0 = MedianWindow(x, N, 36 * 5, false); // Transform.thresholdSoft(x, mm0); float[][] modwt = Transform.decompose(x, N, K, new Filter("haar"), "modwt", "periodic", null); float[] D1 = modwt[2]; modwt[2] = new float[D1.length]; // so we can reuse modwt // // D2 // //double m = MedianSampled(D1, true); //Transform.thresholdSoft(D1, m); Transform.decompose(D1, N, K, new Filter("haar"), "modwt", "periodic", modwt); float[] D2 = modwt[2]; // threshold, smooth //double[] mm2 = MedianWindow(D2, N, 36 * 5, true); //Transform.thresholdSoft(D2, m); SmoothALittle(D2); // peaks are at D2 minima Rotate(D2, -7); int[] minima = FindMinimaIndexes(D2); List<Peak> list = new ArrayList<Peak>(); for (int i=0 ; i<minima.length ; i++) { int curr = minima[i]; if (x[curr] < 2*mm0[curr]) continue; float mz = spectrum[0][curr]; float in = (float)x[curr]; Peak p = new Peak(mz, in); list.add(p); } return (Peak[])list.toArray(new Peak[list.size()]); } public static Peak[] WaveletPeaksD3(float[][] spectrum) { float[] m = MedianWindow(spectrum[1], spectrum[0].length, 72, false); float[] d3 = Spectrum.WaveletD3(spectrum[1], null); int[] indexes = PickPeakIndexes(d3, .5); List<Peak> l = new ArrayList<Peak>(); for (int i=0 ; i<indexes.length; i++) { int p = indexes[i]; if (spectrum[1][p] < 3 * Math.max(1,m[p])) continue; Peak peak = new Peak(0, spectrum[0][p], spectrum[1][p]); l.add(peak); } return (Peak[])l.toArray(new Peak[0]); } /** find smallest (most negative) values between zero crossings */ public static int[] FindMinimaIndexes(float[] signal) { IntegerArray arr = new IntegerArray(); int len = signal.length; double min; int pos; for (int i = 0; i < len;) { // find falling zero crossing for (; i < len && signal[i] >= 0; i++) ; if (i == len) break; pos = i; min = signal[pos]; // find rising zero crossing for (; i < len && signal[i] <= 0; i++) { if (signal[i] < min) { pos = i; min = signal[pos]; } } arr.add(pos); } return arr.toArray(null); } public static void Rotate(float[] x, int d) { if (d < 0) d += x.length; // kinda slow, but don't need another array Reverse(x, 0, x.length); Reverse(x, 0, d); Reverse(x, d, x.length-d); } public static void Reverse(float[] x, int start, int len) { float t; for (int i=start, j=start+len-1 ; i<j ; i++, j--) { t = x[i]; x[i] = x[j]; x[j] = t; } } /** returns reconstruction of last L levels of wavelet decomposition * not including the S 'remainder' * * this is not a real analysis method, just for visual examination */ public static float[] WaveletFilter(float[] signalF, int K, int L, float[] bg) { double[] x = PadToDouble(signalF, pow2(K)); int N = x.length; double[][] t = Transform.decompose(x, N, K, new Filter("haar"), "modwt", "periodic", null); double[][] m = Transform.multiresolution(t, N, K, new Filter("haar"), "modwt", "periodic", null); Arrays.fill(x, 0F); for (int d = K - L; d < K; d++) { double[] D = m[d]; for (int i = 0; i < N; i++) x[i] += D[i]; } if (bg != null) UnpadToFloat(m[K], pow2(K), bg); return UnpadToFloat(x, pow2(K), null); } /** * Hardcode K=3. Uses tmp argument because of backward compatibility * @param X spectra * @param tmp Note! This is not passed in, as you might expect. It's ignored entirely. This is to preserve * the legacy behavior of WaveletD3, which accepted but ignored the tmp argument. * @return transformed spectra */ public static float[] WaveletD3(float[] X, Pair<float[][], float[][]> tmp) { return WaveletDX(X, null, 3); } /** * Hardcode K=4. Doesn't use tmp argument because of backward compatibility * @param X spectra * @return transformed spectra */ public static float[] WaveletD4(float[] X) { return WaveletDX(X, null, 4); } /** * Call WaveletDX and ignore the result of Transform.decompose() * @param X spectra * @param K * @return transformed spectra */ public static float[] WaveletDX(float[] X, int K) { return WaveletDX(X, null, K); } /** * Perform wavelet transformation. Return the specified level of the result of Transform.multiresolution() * @param X spectra * @param tmp If this is supplied, populate it with the results of Transform.decompose() and * Transform.multiresolution() * @param K * @return transformed spectra */ public static float[] WaveletDX(float[] X, Pair<float[][], float[][]> tmp, int K) { if (null == tmp) tmp = new Pair<float[][], float[][]>(null, null); int N = X.length; tmp.first = Transform.decompose(X, N, K, new Filter("haar"), "modwt", "periodic", tmp.first); tmp.second = Transform.multiresolution(tmp.first, N, K, new Filter("haar"), "modwt", "periodic", tmp.second); return (tmp.second)[K-1]; } public static double ChooseThreshold(double[] x, double f) { if (true) { double m = MedianSampled(x, true); return m * f; } else { // mean double Sx = 0.0; for (int i=0 ; i<x.length ; i++) Sx += Math.abs(x[i]); return (Sx / x.length) * f; } } public static double Median(double[] x, int start, int len, boolean fABS, double[] t) { assert null == t || t.length >= len; t = realloc(t, len); if (!fABS) System.arraycopy(x, start, t, 0, len); else { for (int i = 0 ; i<len ; i++) t[i] = Math.abs(x[i+start]); } Arrays.sort(t); return t[len/2]; } public static float Median(float[] x, int start, int len, boolean fABS, float[] t) { assert null == t || t.length >= len; t = realloc(t, len); if (!fABS) System.arraycopy(x, start, t, 0, len); else { for (int i = 0 ; i<len ; i++) t[i] = Math.abs(x[i+start]); } Arrays.sort(t); return t[len/2]; } public static final float Median(float a, float b, float c) { if (a < b) return c < a ? a : b < c ? b : c; else // b < a return a < c ? a : c < b ? b : c; } public static float[] MedianSmooth(float[] x) { return MedianSmooth(x, x.length, null); } /** @deprecated */ public static float[] MedianSmooth(float[] x, int len) { return MedianSmooth(x, len, null); } public static float[] MedianSmooth(float[] x, int len, float[] in) { float[] y = Spectrum.realloc(in, len); if (len < 3) return (float[])x.clone(); for (int i=1 ; i<len-1 ; i++) y[i] = Median(x[i-1],x[i],x[i+1]); y[0] = y[1]; y[len-1] = y[len-2]; return y; } public static float[] MinimaWindow(float[] x, int len, int windowSize, float[] result) { result = realloc(result, len); float[] buckets = new float[((x.length-1) / windowSize) + 1]; System.arraycopy(x, 0, result, 0, len); for (int b = 0 ; b<buckets.length ; b++) { int start = b*windowSize; int end = Math.min(len, (b+1)*windowSize); float min = Float.MAX_VALUE; for (int i=start ; i<end && min > 0 ; i++) if (result[i] < min) min = result[i]; buckets[b] = min; } for (int b=0 ; b<buckets.length-1 ; b++) buckets[b] = Math.min(buckets[b], buckets[b+1]); for (int b=buckets.length-1 ; b>0 ; b--) buckets[b] = Math.min(buckets[b-1], buckets[b]); SmoothALittle(buckets); result = Interpolate(buckets, len, windowSize, result); return result; } public static float[] MedianWindow(float[] x, int len, int windowSize, boolean fABS) { int windows = ((len-1)/windowSize)+1; float[] buckets = new float[windows]; for (int b=0 ; b<windows ; b++) { int windowStart = b * windowSize; int windowEnd = Math.min(x.length, (b+1) * windowSize); int windowLen = windowEnd - windowStart; buckets[b] = Median(x, windowStart, windowLen, fABS, null); } for (int b=0 ; b<buckets.length-1 ; b++) buckets[b] = Math.min(buckets[b], buckets[b+1]); for (int b=buckets.length-1 ; b>0 ; b--) buckets[b] = Math.min(buckets[b-1], buckets[b]); SmoothALittle(buckets); float[] result = Interpolate(buckets, len, windowSize, null); return result; } private static float[] Interpolate(float[] m, int len, int windowSize, float[] result) { windowSize = Math.min(len, windowSize); try { result = realloc(result, len); // fill non-interpolated part Arrays.fill(result, 0, windowSize, m[0]); Arrays.fill(result, result.length-windowSize, result.length, m[m.length-1]); // fill interpolated part for (int w=0 ; w<m.length-1 ; w++) { int windowStart = len * w/m.length + windowSize/2; int windowEnd = len * (w+1)/m.length + windowSize/2; int windowLen = windowEnd - windowStart; double v = m[w]; double d = (m[w+1]-m[w]) / windowLen; for (int i=0; i<windowLen ; i++) result[windowStart+i] = (float)(v + d*i); } return result; } catch (RuntimeException x) { x.printStackTrace(); throw x; // just a place to put a breakpoint } } public static double MedianSampled(double[] x, boolean fABS) { double p = 0.5; int len = x.length; int sample = 100 + (int)Math.ceil(Math.sqrt(len)); double[] copy = null; if (sample < len / 4) { copy = new double[sample]; for (int i=0 ; i<sample ; i++) { int r = (int)Math.floor(Math.random() * len); copy[i] = fABS ? Math.abs(x[r]) : x[r]; } } else { copy = new double[len]; for (int i=0 ; i<len ; i++) copy[i] = fABS ? Math.abs(x[i]) : x[i]; } Arrays.sort(copy); double l = (copy.length * p) - 1; int i = Math.max(0, (int)Math.floor(l)); double m = copy[i]; return m; } public static float MedianSampled(float[] x, boolean fABS) { double p = 0.5; int len = x.length; int sample = 100 + (int)Math.ceil(Math.sqrt(len)); float[] copy = null; if (sample < len / 4) { copy = new float[sample]; for (int i=0 ; i<sample ; i++) { int r = (int)Math.floor(Math.random() * len); copy[i] = fABS ? Math.abs(x[r]) : x[r]; } } else { copy = new float[len]; for (int i=0 ; i<len ; i++) copy[i] = fABS ? Math.abs(x[i]) : x[i]; } Arrays.sort(copy); double l = (copy.length * p) - 1; int i = Math.max(0, (int)Math.floor(l)); float m = copy[i]; return m; } public static float[][] RemoveBackground(float[][] spectra) { int imzMax = spectra[0].length; float[][] background = new float[spectra.length][]; for (int s = 0; s < spectra.length; s++) { float[] S = spectra[s]; background[s] = Spectrum.MinimaWindow(spectra[s], spectra[s].length, 72, null); //background[s] = Spectrum.MedianWindow(S, imzMax, 72, false); for (int i=0 ; i<imzMax ; i++) S[i] = Math.max(0, S[i]-background[s][i]); } if (spectra.length == 1) return background; float[] bg=null, row=null; for (int i=0 ; i<imzMax ; i++) { row = Spectrum.getRow(spectra,i,row); bg = Spectrum.MinimaWindow(row, row.length, 15, bg); //bg = Spectrum.MedianWindow(row, row.length, 10, false); for (int s=0 ; s<spectra.length ; s++) { float b = bg[s]; background[s][i] += b; spectra[s][i] = Math.max(0, spectra[s][i]-b); } } return background; } // not in Spectrum.java since this is experimental public static void threshold(double[][] Xin, double[] threshold) { int K = Xin.length - 1; for (int k = 0; k <= K; k++) { double[] s = Xin[k]; int N = s.length; double t = threshold[k]; if (0.0 == t) continue; if (Double.MAX_VALUE == t) { Arrays.fill(s, 0, s.length, 0.0); continue; } for (int i = 0; i < N; i++) { double d = s[i]; double dm = Math.abs(d); s[i] = dm <= t ? 0.0 : d * Math.max(0, 1 - Math.exp(1 - dm / t)); } } } public static double[] PadToDouble(float[] x, int pad) { return PadToDouble(x, x.length, pad, null); } // since we're copying anyway, we might as well pad with something public static double[] PadToDouble(float[] x, int length, int pad, double[] y) { int N = length + 2 * pad; y = Transform.realloc(y, N); double first = x[0]; double last = x[length-1]; double middle = (first+last)/2; double d = (last-first)/(pad*2); // pad for (int i = 0; i < pad; i++) y[i] = middle - i * d; d = x[length - 1]; for (int i = 0; i < pad; i++) y[y.length - i - 1] = middle + i * d; // copy/convert for (int i = 0; i < length; i++) y[i + pad] = x[i]; return y; } public static float[] UnpadToFloat(double[] y, int pad, float[] x) { int N = y.length - 2 * pad; if (null == x || x.length != N) x = new float[N]; for (int i = 0; i < N; i++) x[i] = (float) y[i + pad]; return x; } static int pow2(int k) { return 1 << k; } public static double[] realloc(double[] array, int length) { if (null == array || array.length < length) array = new double[length]; boolean debug = false; assert true == (debug = true); if (debug) Arrays.fill(array, Double.NaN); return array; } public static float[] realloc(float[] array, int length) { if (null == array || array.length < length) array = new float[length]; boolean debug = false; assert true == (debug = true); if (debug) Arrays.fill(array, Float.NaN); return array; } public static float[] getRow(float[][] m, int r, float[] out) { out = realloc(out, m.length); for (int s = 0; s < m.length; s++) out[s] = m[s][r]; return out; } public static void setRow(float[][] m, int r, float[] in) { for (int s = 0; s < m.length; s++) m[s][r] = in[s]; } public static void main(String[] args) { float[] g10 = NormalP(11, .01, false, false, null); float[] g20 = NormalP(21, .01, false, false, null); for (int i=0 ; i<g10.length ; i++) System.out.println("" + (((double)i)/(g10.length-1)) + "\t" + g10[i]); System.out.println(); for (int i=0 ; i<g20.length ; i++) System.out.println("" + (((double)i)/(g20.length-1)) + "\t" + g20[i]); assert 2F == Median(1, 2, 3); assert 2F == Median(1, 3, 2); assert 2F == Median(2, 1, 2); assert 2F == Median(2, 1, 3); assert 2F == Median(2, 3, 1); assert 2F == Median(3, 2, 1); } }