/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.feature;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.CPUTimer;
import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.viewer.feature.ExtractEdgeFeatures;
import org.fhcrc.cpl.viewer.feature.ExtractMaxima2D;
import org.fhcrc.cpl.viewer.feature.extraction.SpectrumResampler;
import org.fhcrc.cpl.toolbox.proteomics.Scan;
import org.fhcrc.cpl.toolbox.proteomics.MSRun;
import org.fhcrc.cpl.toolbox.datastructure.Tree2D;
import org.fhcrc.cpl.toolbox.datastructure.FloatRange;
import java.io.InputStream;
import java.util.*;
/**
* User: mbellew
* Date: Nov 2, 2004
*/
public class FeatureStrategyCombined extends FeatureStrategyUsingWindow // extends FeatureExtractor
{
static Logger _log = Logger.getLogger(FeatureStrategyCombined.class);
static int _WindowMargin = 16;
static int _FeatureScanWindowStart;
static int _FeatureScanWindowWidth;
static float _FeatureMzWindowStart;
static float _FeatureMzWindowHeight;
static float _AverageWindowWidth;
static CPUTimer timerAnalyze = new CPUTimer("FeatureStrategyCombined.analyze");
static CPUTimer timerResample = new CPUTimer("FeatureStrategyCombined.get and resample");
static CPUTimer timerBackground = new CPUTimer("FeatureStrategyCombined.background");
static CPUTimer timerEdgeFeatures = new CPUTimer("FeatureStrategyCombined.edges");
static CPUTimer timerExtractPeaks = new CPUTimer("FeatureStrategyCombined.peaks");
static CPUTimer timerExtractPeptides = new CPUTimer("FeatureStrategyCombined.peptides");
static
{
initProps();
}
private static boolean bPropsInitialized = false;
int _startNum = 0;
int _endNum = 0;
Scan[] _scans = null;
private static void initProps()
{
assert false == (bPropsInitialized = false);
if (bPropsInitialized)
return;
try
{
InputStream is = ExtractEdgeFeatures.class.getResourceAsStream("features.properties");
Properties props = new Properties();
props.load(is);
_FeatureScanWindowStart = Integer.parseInt(((String)props.get("feature.ScanWindowStart")).trim());
_FeatureScanWindowWidth = Integer.parseInt(((String)props.get("feature.ScanWindowWidth")).trim());
_FeatureMzWindowStart = Float.parseFloat(((String)props.get("feature.MzWindowStart")).trim());
_FeatureMzWindowHeight = Float.parseFloat(((String)props.get("feature.MzWindowHeight")).trim());
_AverageWindowWidth = Integer.parseInt(((String)props.get("feature.AverageWindowWidth")).trim());
}
catch (java.io.IOException x)
{
x.printStackTrace();
}
}
public FeatureStrategyCombined(MSRun run, int scanIndex, int count, int maxCharge, FloatRange range, double sn)
{
super(run, scanIndex, count, maxCharge, range, sn);
int c2 = Math.max(128, count + 2 * _WindowMargin);
scanIndex = Math.max(0, scanIndex - (c2 - count) / 2);
int scanMax = Math.min(scanIndex + c2, run.getScanCount());
count = scanMax - scanIndex;
_scans = getScans(run, scanIndex, count);
_startNum = run.getScan(scanIndex).getNum();
_endNum = run.getScan(scanMax-1).getNum();
}
public int getType()
{
return TYPE_2D;
}
public Feature[] _analyze() throws InterruptedException
{
Feature[] features = analyzeWindow(_scans, 256, _WindowMargin);
ArrayList filtered = new ArrayList();
for (int i = 0; i < features.length; i++)
{
Feature feature = features[i];
if (feature.scan >= _startNum && feature.scan <= _endNum)
filtered.add(feature);
}
return (Feature[])filtered.toArray(new Feature[filtered.size()]);
}
/**
* THIS IS THE MAIN FEATURE FINDING ROUTINE
*/
protected Collection analyze2D(Scan[] scans)
{
_log.debug("analyze2D " + scans[0].getNum() + "-" + scans[scans.length - 1].getNum());
assert timerAnalyze.start();
//
// Convert data into 2D matrix
// we will do all processing on this data until the end and
// then process back to "scan" space
//
assert timerResample.start();
float[][] spectra = new float[scans.length][];
for (int i = 0; i < scans.length; i++)
spectra[i] = Spectrum.Resample(scans[i].getSpectrum(), _mzRange, SpectrumResampler.getResampleFrequency());
assert timerResample.stop();
int width = spectra.length;
int height = spectra[0].length;
_log.debug("analyze2D datasize = " + (width * height * 4));
// separate background and signal components
// we're pretty conservative about what we call background,
// we can use background and/or median later to filter peaks
// further.
timerBackground.start();
float[][] background = Spectrum.RemoveBackground(spectra);
float[][] median = new float[spectra.length][];
for (int s=0 ; s<scans.length ; s++)
median[s] = Spectrum.MedianWindow(spectra[s], height, 72, false);
timerBackground.stop();
//
// Perform a high-level 2D analysis to detect peptide elution
//
// This is done using edge detection technique, this seems to
// be effective at detecting even low intensity features, while
// ignoring noise
//
timerEdgeFeatures.start();
Spectrum.Peak[][] listPeaks = ExtractEdgeFeatures.analyze(spectra, _mzRange, 0);
Spectrum.Peak[] grossFeatures = listPeaks[2];
// UNDONE: can be make use of the edge matrices computed by ExtractEdgeFeatures
for (int i = 0; i < grossFeatures.length; i++)
{
Spectrum.Peak peak = grossFeatures[i];
int imz = (int)((peak.mz-_mzRange.min) * SpectrumResampler.getResampleFrequency());
// TODO intensity of charge 0+ is not comparable to other peaks
//peak.intensity = spectra[peak.scan][imz];
peak.background = background[peak.scan][imz];
peak.setMedian(median[peak.scan][imz]);
}
timerEdgeFeatures.stop();
//
// STEP 2
//
// Analyse these areas for isotopic distributions, and report
// peptide features.
//
// Ideally, we would do some sort of 2D deconvolution of the
// elution/isotopic distribution. However, I don't know how
// do that.
//
// see ExtractPeaks()
//
assert timerExtractPeaks.start();
// .analyze() is destructive, need to copy
float[][] spectraT = new float[spectra.length][];
for (int i = 0; i < spectraT.length; i++)
spectraT[i] = (float[])spectra[i].clone();
Spectrum.Peak[] peaksAll = ExtractMaxima2D.analyze(
spectraT, _mzRange.min, SpectrumResampler.getResampleInterval(),
new FeatureStrategyWavelet2D.SmoothWavelet(),
//new Smooth2D(),
-Float.MAX_VALUE);
//
// crude filtering, eliminate peaks not near grossFeatures
//
// OK go back through peaks and set intensity based on spectra
ArrayList list = new ArrayList();
for (int i = 0; i < peaksAll.length; i++)
{
Spectrum.Peak peak = peaksAll[i];
int imz = (int)((peak.mz-_mzRange.min) * SpectrumResampler.getResampleFrequency());
if (imz >= height || peak.scan >= width)
continue;
peak.intensity = spectra[peak.scan][imz];
peak.background = background[peak.scan][imz];
peak.setMedian(median[peak.scan][imz]);
// TODO: this is a made up cut-off
// TODO: still need to consider what a good value would be
if (peak.intensity < (0.5*peak.background + 2*peak.getMedian() + 3))
continue;
list.add(peaksAll[i]);
}
peaksAll = (Spectrum.Peak[])list.toArray(new Spectrum.Peak[list.size()]);
Arrays.sort(peaksAll, Spectrum.comparePeakIntensityDesc);// debug only
Spectrum.Peak[] peaks = FeatureStrategyUsingWindow2D.FilterPeakSet(grossFeatures, peaksAll, null, 4);
assert timerExtractPeaks.stop();
assert timerExtractPeptides.start();
for (int i = 0; i < peaks.length; i++)
{
Spectrum.Peak peak = peaks[i];
_logDebug(peak.toString());
}
ArrayList peptidesAll = ExtractPeptideFeatures(_run, peaks);
// we know that all peptides returned are near a grossFeature, because
// we filtered them. However, there may be gross features that we did not
// find a good match for. Add these back as 0+ features.
Tree2D treeAll = new Tree2D();
for (Iterator it = peptidesAll.iterator(); it.hasNext();)
{
Feature feature = (Feature)it.next();
treeAll.add(feature.scan, feature.mz, feature);
}
for (int i = 0; i < grossFeatures.length; i++)
{
Spectrum.Peak f = grossFeatures[i];
if (!treeAll.containsPoints(f.scan-4, f.mz-1, f.scan+4, f.mz+1))
peptidesAll.add(f);
}
ArrayList fitleredPeptides = new ArrayList();
// convert back to scanNum, and do a little more filtering
for (Iterator iterator = peptidesAll.iterator(); iterator.hasNext();)
{
Feature feature = (Feature)iterator.next();
if (feature.intensity < 2*feature.getMedian() + 0.5*feature.background + 1)
continue;
int index = feature.scan;
feature.scan = scans[index].getNum();
// fix up the retention time to match updated scan number
feature.setTime((float)scans[index].getDoubleRetentionTime());
fitleredPeptides.add(feature);
}
assert timerExtractPeptides.stop();
assert timerAnalyze.stop();
CPUTimer.DumpAllTimers();
return fitleredPeptides;
}
Collection PeaksAsFeatures(Spectrum.Peak[] peaks)
{
ArrayList l = new ArrayList();
for (int p = 0; p < peaks.length; p++)
l.add(new Feature(peaks[p]));
return l;
}
}