/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.feature;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithRPerspectivePlot;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureGrouper;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureClusterer;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.Clusterer2D;
import java.util.*;
import org.apache.log4j.Logger;
import javax.swing.*;
/**
* This class handles deconvolution of features; that is, identifying features
* that we've found in multiple charge states and collapsing them down. Collapsed
* features contain the details of the original feature with highest intensity
* (except for the intensity itself, which is the sum of all collapsed features').
*
* Optimization attempts to find the best mass and time tolerance values for performing
* deconvolution. It does this by trying a large number of possible settings and,
* for each combination of parameters, assigning some kind of quality score. See the
* methods below for details.
*/
public class Deconvoluter implements Cloneable
{
static Logger _log = Logger.getLogger(Deconvoluter.class);
public static final double DEFAULT_DELTA_TIME = 30;
public static final double DEFAULT_DELTA_MASS = 15;
protected double deltaTime = DEFAULT_DELTA_TIME;
protected double deltaMass = DEFAULT_DELTA_MASS;
//the boundaries for the optimization parameters.
//ideally the MAXes should be multiples of the MINs.
public static final double DEFAULT_MAX_DELTA_MASS_PPM = 40;
public static final double DEFAULT_MAX_DELTA_TIME = 50;
public static final double DEFAULT_MIN_DELTA_MASS_PPM = 5;
public static final double DEFAULT_MIN_DELTA_TIME = 10;
//Increments between different values of the parameters to try
protected static final double MASS_INCREMENT_PPM = 2;
protected static final double TIME_INCREMENT_SECONDS = 5;
protected FeatureGrouper grouper;
protected FeatureSet featureSet;
protected boolean showCharts=false;
public Deconvoluter(FeatureSet featureSet)
{
this.featureSet = featureSet;
grouper = new FeatureGrouper();
grouper.addSet(featureSet);
grouper.setGroupByMass(true);
setMassType(FeatureClusterer.DELTA_MASS_TYPE_PPM);
setDeltaMass(DEFAULT_DELTA_MASS);
setDeltaTime(DEFAULT_DELTA_TIME);
setElutionMode(FeatureClusterer.ELUTION_MODE_TIME);
}
/**
* Optimize both the mass and time matching tolerances. As described above,
* this is based on the ratio of multi-to-1 deconvolutions to 2-to-1 deconvolutions.
*/
public void optimizeParameters()
{
int numMassIncrements = (int) ((DEFAULT_MAX_DELTA_MASS_PPM - DEFAULT_MIN_DELTA_MASS_PPM)
/ MASS_INCREMENT_PPM) + 1;
int numTimeIncrements = (int) ((DEFAULT_MAX_DELTA_TIME - DEFAULT_MIN_DELTA_TIME)
/ TIME_INCREMENT_SECONDS) + 1;
double[] massSettings = new double[numMassIncrements];
double[] timeSettings = new double[numTimeIncrements];
double[][] ratioMultiTo2EachSetting = new double[numMassIncrements][numTimeIncrements];
for (int timeIndex = 0; timeIndex < numTimeIncrements; timeIndex++)
{
timeSettings[timeIndex] = timeIndex * TIME_INCREMENT_SECONDS +
DEFAULT_MIN_DELTA_TIME;
}
for (int massIndex = 0; massIndex < numMassIncrements; massIndex++)
{
massSettings[massIndex] = massIndex * MASS_INCREMENT_PPM +
DEFAULT_MIN_DELTA_MASS_PPM;
}
double bestDeltaMass = 0;
double bestDeltaTime = 0;
double highestMultiTo2Ratio = -1;
//System.err.println("deltaMass\tdeltaTime\tnumFeatures\tnumBuckets\tnum2\tsameCharge2\tdiffCharge2\tnum3\tbucketsWith3with3SameCharge\tbucketsWith3with2SameCharge\tnumMore\tbucketsWithMoreWith4OrMoreSameCharge\tbucketsWithMoreWith3SameCharge\tbucketsWithMoreWith2SameCharge");
for (int massIndex = 0; massIndex < numMassIncrements; massIndex++)
{
for (int timeIndex = 0; timeIndex < numTimeIncrements; timeIndex++)
{
setDeltaMass(massSettings[massIndex]);
setDeltaTime(timeSettings[timeIndex]);
ratioMultiTo2EachSetting[massIndex][timeIndex] =
//this is for debugging
// collectBucketInfo();
//this is the best one
calcMultiToSingleChargeStateDifference();
//this doesn't work well
// calcMultiTo2ratio();
//this is downright silly
//deconvolute().getFeatures().length;
if (ratioMultiTo2EachSetting[massIndex][timeIndex] > highestMultiTo2Ratio)
{
highestMultiTo2Ratio = ratioMultiTo2EachSetting[massIndex][timeIndex];
bestDeltaMass = massSettings[massIndex];
bestDeltaTime = timeSettings[timeIndex];
}
}
}
setDeltaMass(bestDeltaMass);
setDeltaTime(bestDeltaTime);
_log.debug("Optimal deconvolution parameters: deltaMass=" + bestDeltaMass + ", deltaTime=" + bestDeltaTime);
//This plot is pretty obscure to read; mainly for development purposes
if (showCharts)
{
PanelWithRPerspectivePlot plotPanel =
new PanelWithRPerspectivePlot();
plotPanel.setRotationAngle(225);
plotPanel.plot(massSettings, timeSettings,
ratioMultiTo2EachSetting);
JDialog imageDialog = new JDialog();
imageDialog.setTitle("X = deltaMass, Y = deltaTime, Z = Ratio of 3-feature collapses to 2-feature");
imageDialog.setDefaultCloseOperation(JDialog.DISPOSE_ON_CLOSE);
imageDialog.add(plotPanel);
plotPanel.repaint();
imageDialog.setSize(PanelWithRPerspectivePlot.DEFAULT_CHART_DIALOG_WIDTH,
PanelWithRPerspectivePlot.DEFAULT_CHART_DIALOG_HEIGHT);
imageDialog.setVisible(true);
}
}
/**
* Here's where we calculate the ratio of multiple-features-collapsed-to-one to
* 2-features-collapsed-to-one, for a given set of matching parameters. We do this
* by performing the relevant bits of deconvolution and then analyzing the clusters.
* @deprecated
* @return
*/
// protected double calcMultiTo2ratio()
// {
// grouper.split2D(deltaMass, deltaTime);
// Clusterer2D.BucketSummary[] buckets = grouper.summarize();
// int numMorethan2s=0;
// int num2s=0;
//
// for (int i = 0; i < buckets.length; i++)
// {
// Clusterer2D.BucketSummary bucket = buckets[i];
// if (bucket.featureCount > 1)
// {
// if (bucket.featureCount > 2)
// numMorethan2s++;
// else if (bucket.featureCount ==2)
// num2s++;
// }
// }
//
// return (double) numMorethan2s / (double) num2s;
// }
/**
*
* @return
*/
protected double collectBucketInfo()
{
grouper.split2D(deltaMass, deltaTime);
Clusterer2D.BucketSummary[] buckets = grouper.summarize();
int numFeatures = deconvolute().getFeatures().length;
int numBuckets = buckets.length;
int bucketsWith2 = 0;
int bucketsWith3 = 0;
int bucketsWithMoreThan3 = 0;
int bucketsWith2SameCharge = 0;
int bucketsWith2DiffCharge = 0;
int bucketsWith3AllSameCharge = 0;
int bucketsWith3TwoSameCharge = 0;
int bucketsWithMoreFourOrMoreSameCharge = 0;
int bucketsWithMoreThreeSameCharge = 0;
int bucketsWithMoreTwoSameCharge = 0;
for (Clusterer2D.BucketSummary bucket : buckets)
{
Feature[] featuresInBucket = FeatureGrouper.getFeatures(bucket);
int[] numInEachCharge = new int[10];
for (int j=0; j<featuresInBucket.length; j++)
{
numInEachCharge[featuresInBucket[j].getCharge()]++;
}
switch(bucket.featureCount)
{
case 0:
break;
case 1:
break;
case 2:
bucketsWith2++;
boolean sameCharge = false;
for (int i=0; i<10; i++)
if (numInEachCharge[i] == 2)
{
sameCharge = true;
break;
}
if (sameCharge)
bucketsWith2SameCharge++;
else
bucketsWith2DiffCharge++;
break;
case 3:
bucketsWith3++;
boolean threeSameCharge = false;
boolean twoSameCharge = false;
for (int i=0; i<10; i++)
{
if (numInEachCharge[i] == 3)
{
threeSameCharge = true;
break;
}
else if (numInEachCharge[i] == 2)
{
twoSameCharge = true;
break;
}
}
if (twoSameCharge)
bucketsWith3TwoSameCharge++;
else if (threeSameCharge)
bucketsWith3AllSameCharge++;
break;
default:
bucketsWith3++;
boolean moreThanThreeSameCharge = false;
boolean threeSameChargeAgain = false;
boolean twoSameChargeAgain = false;
for (int i=0; i<10; i++)
{
if (numInEachCharge[i] == 2)
{
twoSameCharge = true;
}
else if (numInEachCharge[i] == 3)
{
threeSameCharge = true;
twoSameCharge = false;
}
else if (numInEachCharge[i] > 3)
{
moreThanThreeSameCharge = true;
twoSameCharge = false;
threeSameCharge = false;
break;
}
}
if (moreThanThreeSameCharge)
bucketsWithMoreFourOrMoreSameCharge++;
else if (threeSameChargeAgain)
bucketsWithMoreThreeSameCharge++;
else if (twoSameChargeAgain)
bucketsWithMoreTwoSameCharge++;
break;
}
}
System.err.println(deltaMass + "\t" + deltaTime + "\t" + numFeatures + "\t" + numBuckets + "\t" +
bucketsWith2 + "\t" + bucketsWith2SameCharge + "\t" + bucketsWith2DiffCharge + "\t" +
bucketsWith3 + "\t" + bucketsWith3AllSameCharge + "\t" + bucketsWith3TwoSameCharge + "\t" +
bucketsWithMoreThan3 + "\t" + bucketsWithMoreFourOrMoreSameCharge + "\t" + bucketsWithMoreThreeSameCharge + "\t" + bucketsWithMoreTwoSameCharge);
return 0;
}
/**
* Here's where we calculate the number of multi-feature buckets for which
* there are no features with the same charge, and subract the number of buckets
* for which there are two or more features with the same charge (even if there
* are other features with different charges. This gives us
* an estimate of how good deconvolution looks.
*
* We do this
* by performing the relevant bits of deconvolution and then analyzing the clusters.
* @return
*/
protected double calcMultiToSingleChargeStateDifference()
{
grouper.split2D(deltaMass, deltaTime);
Clusterer2D.BucketSummary[] buckets = grouper.summarize();
int numMultipleCharges=0;
int numSameCharges=0;
for (Clusterer2D.BucketSummary bucket : buckets)
{
if (bucket.featureCount > 1)
{
Feature[] featuresInBucket = FeatureGrouper.getFeatures(bucket);
int firstChargeState = featuresInBucket[0].getCharge();
boolean sameChargesInBucket = false;
for (int j=1; j<featuresInBucket.length; j++)
{
if (featuresInBucket[j].getCharge() == firstChargeState)
{
sameChargesInBucket = true;
break;
}
}
if (sameChargesInBucket)
numSameCharges++;
else
numMultipleCharges++;
}
}
return (double) numMultipleCharges - (double) numSameCharges;
}
/**
* Combine features that represent the same peptide. Features
* must be within a deltaMass, deltaTime window to be considered same.
* <p/>
* This removes redunancy caused by multiple charge states. It
* also combines features of the same mass/charge that are close
* together (e.g. a small feature that is split by ion competition)
* <p/>
* CONSIDER(mbellew) the mass (mz) tolerance could be tighter
* for features w/ same charge
*
* @return
*/
public FeatureSet deconvolute()
{
_log.debug("Deconvoluting. Delta Mass: " + deltaMass + "ppm, delta time: " + deltaTime);
_log.debug("Before deconvolution, #features: " + featureSet.getFeatures().length);
grouper.split2D(deltaMass, deltaTime);
Clusterer2D.BucketSummary[] buckets = grouper.summarize();
Feature[] deconvoluted = new Feature[buckets.length];
int numPeptideConflicts = 0;
int numPreservedPeptides = 0;
for (int i = 0; i < buckets.length; i++)
{
Clusterer2D.BucketSummary bucket = buckets[i];
Feature deconvolutedFeature = null;
if (bucket.featureCount == 1)
{
deconvolutedFeature = (Feature) FeatureGrouper.getFeatures(bucket)[0].clone();
deconvolutedFeature.setChargeStates(1);
}
else
{
Feature[] bucketFeatures = FeatureGrouper.getFeatures(bucket);
Feature best = bucketFeatures[0];
float totalIntensity = 0.0f;
String description = "";
for (Feature f : bucketFeatures)
{
if (description.length() > 0)
description += ", ";
description += f.charge;
if (null != f.getDescription())
description += " (" + f.getDescription() + ")";
totalIntensity += f.intensity;
if (f.totalIntensity > best.totalIntensity)
best = f;
}
deconvolutedFeature = (Feature) best.clone();
deconvolutedFeature.setIntensity(totalIntensity);
deconvolutedFeature.setChargeStates(bucket.featureCount);
deconvolutedFeature.setDescription(description);
//if there's MS2 data in this FeatureSet, then we need to make sure
//that the collapsed feature contains the peptide and protein ID carried
//by its components.
//If there are conflicts, we leave the existing ID on the collapsed feature
//alone, or if it had none, don't assign
//TODO: somehow move this to MS2ExtraInfoDef?
if (featureSet.hasExtraInformationType(MS2ExtraInfoDef.getSingletonInstance()))
{
Set<String> featurePeptides = new HashSet<String>();
Set<String> featureProteins = new HashSet<String>();
for (Feature f : bucketFeatures)
{
String featurePeptide = MS2ExtraInfoDef.getFirstPeptide(f);
if (featurePeptide != null)
{
featurePeptides.add(featurePeptide);
String featureProtein = MS2ExtraInfoDef.getFirstProtein(f);
if (featureProtein != null)
featureProteins.add(featureProtein);
}
}
if (featurePeptides.size() == 1 &&
MS2ExtraInfoDef.getFirstPeptide(deconvolutedFeature) == null)
{
MS2ExtraInfoDef.setSinglePeptide(deconvolutedFeature,
featurePeptides.iterator().next());
numPreservedPeptides++;
if (featureProteins.size() == 1 &&
MS2ExtraInfoDef.getFirstProtein(deconvolutedFeature) == null)
MS2ExtraInfoDef.addProtein(deconvolutedFeature,
featureProteins.iterator().next());
}
else
{
if (featurePeptides.size() > 1)
numPeptideConflicts++;
}
}
}
deconvolutedFeature.comprised = FeatureGrouper.getFeatures(bucket);
deconvoluted[i] = deconvolutedFeature;
}
//reporting on peptides preserved and conflicts
if (featureSet.hasExtraInformationType(MS2ExtraInfoDef.getSingletonInstance()))
{
_log.debug("deconvolute: peptides actively preserved: " + numPreservedPeptides);
_log.debug("deconvolute: peptide conflicts: " + numPeptideConflicts);
}
FeatureSet fs = (FeatureSet) featureSet.clone();
//Make map modifiable & set properties.
Map props = new HashMap();
props.putAll(featureSet.getProperties());
if (null != featureSet.getSourceFile())
props.put("origSourceFile", featureSet.getSourceFile());
props.put("deconvoluteScanDiff", String.valueOf(deltaTime));
props.put("deconvoluteMassDiff", String.valueOf(deltaMass));
fs.setFeatures(deconvoluted);
fs.setProperties(props);
_log.debug("After deconvolution, #features: " + deconvoluted.length);
return fs;
}
public int getMassType()
{
return grouper.getMassType();
}
public void setMassType(int massType)
{
grouper.setMassType(massType);
}
public double getDeltaTime()
{
return deltaTime;
}
public void setDeltaTime(double deltaTime)
{
this.deltaTime = deltaTime;
}
public double getDeltaMass()
{
return deltaMass;
}
public void setDeltaMass(double deltaMass)
{
this.deltaMass = deltaMass;
}
public int getElutionMode()
{
return grouper.getElutionMode();
}
public void setElutionMode(int _elutionMode)
{
grouper.setElutionMode(_elutionMode);
}
public boolean shouldShowCharts()
{
return showCharts;
}
public void setShowCharts(boolean showCharts)
{
this.showCharts = showCharts;
}
}