/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.feature;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot;
import org.fhcrc.cpl.toolbox.*;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import org.fhcrc.cpl.toolbox.proteomics.MassCalibrationUtilities;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
/**
* Utilities for recalibrating and characterizing the mass calibration of sets of
* features, based on an approach that uses theoretical peptide mass clusters.
*
* Peptides should have masses with certain predictable qualities. In particular, the
* distances between masses should cluster around multiples of a certain number, called
* the "mass wavelength". This number would be 1 (the mass of a proton in a Carbon atom),
* except that different elements' nucleon masses are different, due to the effect
* of the mass defect. So the clustering isn't perfect, but it is pretty good. You can
* determine how the deviation from predicted cluster changes as the distance between
* masses increases.
*
* This deviation can be used to calibrate masses in a given run, and it can be used to
* toss out wildly deviating masses from runs known to be calibrated correctly.
*
* For more information, and for the theoretical basis of pretty much everything in here,
* check out:
* "Analytical model of peptide mass cluster centres with applications"
* Wolski, Farrow, et al.
* http://www.proteomesci.com/content/4/1/18
*/
public class FeatureMassCalibrationUtilities
{
protected static Logger _log = Logger.getLogger(FeatureMassCalibrationUtilities.class);
public FeatureMassCalibrationUtilities()
{
}
/**
* Convenience method for a single feature set
* @param features
* @return
*/
public static PanelWithScatterPlot plotMassDefectDeviation(Feature[] features,
double theoreticalMassWavelength,
boolean shouldDisplayPPMLine,
double ppmForLine)
{
List<Feature[]> featureList = new ArrayList<Feature[]>(1);
featureList.add(features);
return plotMassDefectDeviation(featureList, theoreticalMassWavelength,
shouldDisplayPPMLine, ppmForLine,true);
}
/**
* Plot the mass defect deviations of multiple featuresets, in different colors.
* Optionally displays lines indicating deviations of ppmForLine parts per million
* from theoretical cluster center.
* @param featureArrays
* @return
*/
public static PanelWithScatterPlot plotMassDefectDeviation(List<Feature[]> featureArrays,
double theoreticalMassWavelength,
boolean shouldDisplayPPMLine,
double ppmForLine,
boolean useMz)
{
List<float[]> massArrays = new ArrayList();
for (Feature[] featureArray : featureArrays)
{
float[] massArray = new float[featureArray.length];
for (int i=0; i<featureArray.length; i++)
{
massArray[i] = useMz ? featureArray[i].getMz() : featureArray[i].getMass();
}
massArrays.add(massArray);
}
return MassCalibrationUtilities.plotMassDefectDeviation(massArrays,
theoreticalMassWavelength, shouldDisplayPPMLine, ppmForLine);
}
/**
* master method for mass correction.
*
* Masses can be corrected separately for different ranges of scans within the file,
* since calibration can drift throughout the run.
*
* @param features
* @param maxPairsForLeverageCalc
* @param numPartitions
* @param theoreticalMassWavelength
* @param showCharts
*/
public static void calibrateMasses(Feature[] features,
int maxPairsForLeverageCalc,
int numPartitions,
double theoreticalMassWavelength,
int initialMassFilterPPM,
boolean showCharts)
{
Feature[] featuresByScan = new Feature[features.length];
System.arraycopy(features, 0, featuresByScan, 0, features.length);
Arrays.sort(featuresByScan, new Feature.ScanAscComparator());
//if features for mass calibration calculation must first undergo an initial mass filter:
//1. do the initial filter
//2. calculate all the partition parameters
//3. apply those parameters to ALL the features
if (initialMassFilterPPM > 0)
{
Feature[] initialFilteredFeatures =
FeatureMassCalibrationUtilities.filterFeaturesByMassDefectDeviation(features,
initialMassFilterPPM);
Feature[] initialFilteredFeaturesByScan = new Feature[initialFilteredFeatures.length];
System.arraycopy(initialFilteredFeatures, 0, initialFilteredFeaturesByScan, 0,
initialFilteredFeatures.length);
Arrays.sort(initialFilteredFeaturesByScan, new Feature.ScanAscComparator());
Pair<Integer, Pair<Double, Double>>[] partitionParameters =
FeatureMassCalibrationUtilities.calculateWavelengthsAndOffsetsMultiplePartitions(
initialFilteredFeaturesByScan, maxPairsForLeverageCalc, numPartitions,
theoreticalMassWavelength, showCharts);
calibrateMasses(featuresByScan, partitionParameters, theoreticalMassWavelength, showCharts);
}
else
{
//Calculate the calibration parameters for all partitions at the outset
Pair<Integer, Pair<Double, Double>>[] partitionParameters =
calculateWavelengthsAndOffsetsMultiplePartitions(
featuresByScan, maxPairsForLeverageCalc, numPartitions,
theoreticalMassWavelength, showCharts);
calibrateMasses(featuresByScan, partitionParameters, theoreticalMassWavelength, showCharts);
}
}
public static void calibrateMasses(Feature[] featuresByScan,
Pair<Integer, Pair<Double, Double>>[] partitionParameters,
double theoreticalMassWavelength,
boolean showCharts)
{
int numPartitions = partitionParameters.length;
int paramIndex = 0;
Pair<Double,Double> currentParameters = null;
double currentWavelength =0;
double currentOffset =0;
int nextPartitionStartScan = -1;
List<Feature> currentPartitionFeatureList = new ArrayList<Feature>();
List<Feature[]> partitionFeaturesList = new ArrayList<Feature[]>();
for (Feature feature : featuresByScan)
{
if (feature.getScan() >= nextPartitionStartScan)
{
currentParameters = partitionParameters[paramIndex++].getValue();
currentWavelength = currentParameters.first;
currentOffset = currentParameters.second;
ApplicationContext.infoMessage("Partition " + (paramIndex) + ": wavelength=" +
currentWavelength + ", offset=" + currentOffset);
if (numPartitions > paramIndex)
nextPartitionStartScan = partitionParameters[paramIndex].getKey();
else
{
//last partition
nextPartitionStartScan = Integer.MAX_VALUE;
}
partitionFeaturesList.add(currentPartitionFeatureList.toArray(new Feature[currentPartitionFeatureList.size()]));
currentPartitionFeatureList = new ArrayList<Feature>();
}
float newMass = feature.getMass();
newMass += feature.getMass() * (theoreticalMassWavelength - currentWavelength) - currentOffset;
feature.setMass(newMass);
if (feature.getCharge() > 0)
feature.updateMz();
currentPartitionFeatureList.add(feature);
}
//last partition
partitionFeaturesList.add(currentPartitionFeatureList.toArray(new Feature[currentPartitionFeatureList.size()]));
if (showCharts)
{
PanelWithScatterPlot pwsp =
plotMassDefectDeviation(partitionFeaturesList, theoreticalMassWavelength, false, 0,true);
pwsp.setName("after");
pwsp.displayInTab();
}
}
/**
* Returns an array of pairs. The first item in the pair is a _scan number_ indicating
* where the region begins. The second item is a Double,Double pair containing the
* wavelength and intercept to be applied in the region.
* @param featuresByScan
* @param maxPairsForLeverageCalc
* @param numPartitions
* @param theoreticalMassWavelength
* @param showCharts
* @return
*/
public static Pair<Integer, Pair<Double,Double>>[]
calculateWavelengthsAndOffsetsMultiplePartitions(Feature[] featuresByScan,
int maxPairsForLeverageCalc,
int numPartitions,
double theoreticalMassWavelength,
boolean showCharts)
{
int partitionSize = featuresByScan.length / numPartitions;
List<Feature[]> partitionFeaturesList = new ArrayList<Feature[]>();
Pair<Integer, Pair<Double,Double>>[] result =
(Pair<Integer, Pair<Double,Double>>[]) new Pair[numPartitions];
for (int i=0; i<numPartitions; i++)
{
int partitionStart = i * partitionSize;
int partitionEnd = ((i+1) * partitionSize) - 1;
if (i==numPartitions-1)
partitionEnd = featuresByScan.length-1;
int thisPartitionSize = (partitionEnd - partitionStart) - 1;
Feature[] featuresInPartition = new Feature[thisPartitionSize];
System.arraycopy(featuresByScan, partitionStart,
featuresInPartition, 0, thisPartitionSize);
_log.debug("Partition " + (i+1) + ": scans " + partitionStart + "-" + partitionEnd +
". " + featuresInPartition.length + " features");
partitionFeaturesList.add(featuresInPartition);
}
if (showCharts)
{
PanelWithScatterPlot pwsp =
plotMassDefectDeviation(partitionFeaturesList, theoreticalMassWavelength, false, 0,true);
pwsp.setName("Before");
pwsp.displayInTab();
}
for (int i=0; i<partitionFeaturesList.size(); i++)
{
Feature[] featuresInPartition = partitionFeaturesList.get(i);
Pair<Double,Double> wavelengthAndOffset =
calculateWavelengthAndOffset(featuresInPartition, maxPairsForLeverageCalc,
theoreticalMassWavelength);
result[i] =
new Pair<Integer, Pair<Double,Double>>(
featuresByScan[i * partitionSize].getScan(),
wavelengthAndOffset);
}
return result;
}
/**
* Calibrate masses in a single partition.
*
* First we calculate the mass wavelength, and the offset from theoretical mass
* intercept. Then we adjust each mass.
* @param features
* @param maxPairsForLeverageCalc
* @param theoreticalMassWavelength
*/
protected static void calibrateMassesSinglePartition(Feature[] features,
int maxPairsForLeverageCalc,
double theoreticalMassWavelength)
{
float[] masses = new float[features.length];
for (int i=0; i<features.length; i++)
masses[i] = features[i].getMass();
float[] newMasses = MassCalibrationUtilities.calibrateMassesSinglePartition(masses, maxPairsForLeverageCalc,
theoreticalMassWavelength);
for (int i=0; i<features.length; i++)
{
features[i].setMass(masses[i]);
}
}
/**
* Use robust regression techniques to map mass to mass offset from theoretical cluster
* @param features
* @param maxPairsForLeverageCalc more is better, but more costs more time
* @return a Pair containing slope, intercept of the regression line.
*/
public static Pair<Double,Double> calculateWavelengthAndOffset(Feature[] features,
int maxPairsForLeverageCalc,
double theoreticalMassWavelength)
{
float[] allMasses = new float[features.length];
for (int i=0; i<features.length; i++)
allMasses[i] = features[i].getMass();
return MassCalibrationUtilities.calculateWavelengthAndOffset(allMasses, maxPairsForLeverageCalc,
theoreticalMassWavelength);
}
/**
* Not the entry point for the --filter command
* @param featureSet
*/
public static void filterFeatureSetByMassDefectDeviation(FeatureSet featureSet,
double theoreticalMassWavelength)
{
featureSet.setFeatures(filterFeaturesByMassDefectDeviation(featureSet.getFeatures(),
theoreticalMassWavelength));
}
/**
* Not the entry point for the --filter command
* @param featuresList
* @return
*/
public static List<Feature> filterFeaturesByMassDefectDeviation(List<Feature> featuresList,
double maxDeviationPPM)
{
Feature[] features = featuresList.toArray(new Feature[featuresList.size()]);
Feature[] filtered = filterFeaturesByMassDefectDeviation(features,
maxDeviationPPM,
MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH);
List<Feature> result = new ArrayList<Feature>(filtered.length);
for (Feature feature : filtered)
result.add(feature);
return result;
}
/**
* Not the entry point for the --filter command
* @param features
* @return
*/
public static Feature[] filterFeaturesByMassDefectDeviation(Feature[] features,
double maxDeviationPPM)
{
return filterFeaturesByMassDefectDeviation(features,
maxDeviationPPM,
MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH);
}
/**
* Not the entry point for the --filter command
* @param features
* @param maxDeviationPPM
* @return
*/
public static Feature[] filterFeaturesByMassDefectDeviation(Feature[] features,
double maxDeviationPPM,
double theoreticalMassWavelength)
{
List<Feature> resultList = new ArrayList<Feature>();
for (Feature feature : features)
{
if (Math.abs(MassCalibrationUtilities.calculateMassDefectDeviationPPM(feature.getMass(),
theoreticalMassWavelength)) <= maxDeviationPPM)
resultList.add(feature);
}
return resultList.toArray(new Feature[resultList.size()]);
}
}