/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.feature;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import org.fhcrc.cpl.toolbox.proteomics.MSRun;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.*;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureGrouper;
import org.fhcrc.cpl.toolbox.proteomics.Clusterer2D;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.commandline.arguments.BooleanArgumentDefinition;
import org.fhcrc.cpl.toolbox.commandline.arguments.ArgumentValidationException;
import org.fhcrc.cpl.toolbox.proteomics.MassCalibrationUtilities;
import org.fhcrc.cpl.toolbox.proteomics.feature.filehandler.*;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.lang.math.IntRange;
import java.awt.*;
import java.io.*;
import java.util.*;
import java.util.List;
import org.apache.log4j.Logger;
/**
* User: migra
* Date: Sep 13, 2004
* Time: 3:05:56 PM
*/
public class FeatureSet implements Cloneable
{
static Logger _log = Logger.getLogger(FeatureSet.class);
public static final int FEATURE_FILE_FORMAT_MSINSPECTTSV = 0;
public static final int FEATURE_FILE_FORMAT_APML = 1;
public static final int FEATURE_FILE_FORMAT_HARDKLOR = 2;
public static final int DEFAULT_FEATURE_FILE_FORMAT = FEATURE_FILE_FORMAT_MSINSPECTTSV;
//maintain loading status
protected int _loadStatus = FEATURESET_LOAD_NOT_LOADED;
protected String _loadStatusMessage = null;
protected Feature[] _features;
private Map<String, Object> _properties = new HashMap<String, Object>();
private String _tag; // optional tag for this feature set
private Color _color;
private int _style = 0;
private File _sourceFile;
private boolean _displayed = true;
//loading status codes
public static final int FEATURESET_LOAD_NOT_LOADED = -1;
public static final int FEATURESET_LOAD_SUCCESS = 0;
public static final int FEATURESET_LOAD_ERROR_FILE_NOT_FOUND = 1;
public static final int FEATURESET_LOAD_ERROR_BAD_FILE_FORMAT = 2;
public static final int FEATURESET_LOAD_ERROR_NO_FEATURES_FOUND = 3;
public static final int FEATURESET_LOAD_ERROR_UNKNOWN = 4;
//different types of intensities to use in quantitation
public static final int TOTAL_INTENSITY=0;
public static final int MAX_INTENSITY=1;
public static final int RECALCULATED_INTENSITY=2;
//default to totalIntensity
public static final int DEFAULT_INTENSITY_TYPE=TOTAL_INTENSITY;
// How many daltons to extract from run on either side of each feature,
// when comparing features
static final int MZ_WINDOW_SIZE = 3;
// Each scan contains a list of (mz, intensity) pairs. The m/z values are typically not sampled
// regularly, so we often resample onto a regular grid before doing other processing. This value
// sets the number of samples to provide per one m/z unit. The value 36 is typical throughout
// msInspect (changing it could actually be a little tricky).
static final int RESAMPLING_FREQUENCY = 36;
//track the known extra information types for this FeatureSet
protected List<FeatureExtraInformationDef> extraInformationTypes;
//no-arg constructor used by feature-file handlers
public FeatureSet()
{
}
public FeatureSet(File file, Color color)
{
//initialize loading status values
setLoadStatus(FEATURESET_LOAD_NOT_LOADED);
setLoadStatusMessage("Features not yet loaded");
//this used to throw a NullPointerException
if (file == null)
{
setLoadStatus(FEATURESET_LOAD_ERROR_FILE_NOT_FOUND);
setLoadStatusMessage("Error loading features: no file specified");
return;
}
try
{
_sourceFile = file;
// ApplicationContext.setMessage("Loading file " + file);
loadFeatureFile(file);
//check for load success before continuing
if (getLoadStatus() == FEATURESET_LOAD_SUCCESS)
{
Arrays.sort(_features, new Feature.MzScanAscComparator());
_color = color;
}
}
catch (Exception e)
{
setLoadStatus(FEATURESET_LOAD_ERROR_UNKNOWN);
_log.error("Feature-loading exception",e);
setLoadStatusMessage("Unknown error loading features from file ");
}
if (getLoadStatus() != FEATURESET_LOAD_SUCCESS)
{
//all the non-success statuses require the filename appended
setLoadStatusMessage(getLoadStatusMessage() + file.getName());
}
}
public FeatureSet(File file) throws Exception
{
this(file, null);
}
public FeatureSet(Feature[] features)
{
_features = features;
inferExtraInformationTypesFromFeatures();
}
/**
* For each feature, determine its set of extra information types,
* using the property names set on the feature.
*
* Set the full list of information types on this FeatureSet to
* that set.
*
* Return value is null so as to avoid confusion -- this does affect the set
*/
public void inferExtraInformationTypesFromFeatures()
{
Set<FeatureExtraInformationDef> extraInfoSet =
new HashSet<FeatureExtraInformationDef>();
for (Feature feature : _features)
{
for (FeatureExtraInformationDef featureInfoDef :
feature.determineExtraInformationTypes())
{
extraInfoSet.add(featureInfoDef);
}
}
removeAllExtraInformationTypes();
for (FeatureExtraInformationDef infoDef : extraInfoSet)
{
addExtraInformationType(infoDef);
}
}
public FeatureSet(Feature[] features, Color color)
{
this(features);
_color = color;
}
public FeatureSet(Spectrum.Peak[] peaks, Color color)
{
_color = color;
_features = new Feature[peaks.length];
for (int i = 0; i < peaks.length; i++)
{
Spectrum.Peak p = peaks[i];
_features[i] = new Feature(p.scan, p.scan, p.scan, p.mz, p.intensity, 1, 0.0F, p.intensity);
}
}
//dmay adding 12/19/2005
public int getLoadStatus()
{
return _loadStatus;
}
public void setLoadStatus(int loadStatus)
{
_loadStatus = loadStatus;
}
public String getLoadStatusMessage()
{
return _loadStatusMessage;
}
public void setLoadStatusMessage(String loadStatusMessage)
{
_loadStatusMessage = loadStatusMessage;
}
/**
* Populate the Time attribute of every Feature in a FeatureSet
* with the time corresponding to its scan.
*
* This is rather inefficient. It would be much, much better if MSRun
* kept ahold of a map of scan numbers to times.
* //TODO: when we start getting pepXml files with retention times
//TODO: populated, need to check and see if they're populated
//TODO: before going back to run
* @param run
*/
public void populateTimesForMS2Features(MSRun run)
{
// Feature[] featuresCopy = new Feature[featureSet.getFeatures().length];
// System.arraycopy(featureSet.getFeatures(), 0, featuresCopy,
// 0, featuresCopy.length);
// Arrays.sort(featuresCopy, new Feature.ScanAscComparator());
for (Feature feature : getFeatures())
{
//first hoping that time is populated.
//If not (if time <= 0), recalculating from scan
//if this throws an NPE, so be it
if (feature.getTime() <= 0)
{
int ms2ScanIndex =
run.getIndexForMS2ScanNum(feature.getScan());
if (ms2ScanIndex < 0)
ms2ScanIndex = -ms2ScanIndex;
MSRun.MSScan scan = run.getMS2Scan(ms2ScanIndex);
feature.setTime((float)
scan.getDoubleRetentionTime());
}
}
}
/**
* Populate the startTime and endTime properties of every Feature in a FeatureSet
* with the times corresponding to its start and end scans
* @param run
*/
public void populateTimesForMS1Features(MSRun run)
{
for (Feature feature : getFeatures())
{
//first, set the time appropriately (if it's already set, believe that setting)
//Then, check whether scanFirst and scanLast are the same as scan. If so, set the
//start and end times the same. Otherwise, check the run to find the real ones out.
//All this complexity is to avoid hitting the run, which is expensive.
if (feature.getTime() <= 0)
feature.setTime((float) run.getScan(run.getIndexForScanNum(feature.getScan())).getDoubleRetentionTime());
if (feature.scanFirst == feature.scan)
TimeExtraInfoDef.setStartTime(feature, feature.getTime());
else
{
int fScanFirstIndex = run.getIndexForScanNum(feature.scanFirst);
TimeExtraInfoDef.setStartTime(feature,
run.getScan(fScanFirstIndex).getDoubleRetentionTime());
}
if (feature.scanLast == feature.scan)
TimeExtraInfoDef.setEndTime(feature, feature.getTime());
else
{
int fScanLastIndex = run.getIndexForScanNum(feature.scanLast);
TimeExtraInfoDef.setEndTime(feature,
run.getScan(fScanLastIndex).getDoubleRetentionTime());
}
}
}
/**
*
* Select a subset of a feature array.
* Input array MUST be sorted with the Spectrum.Feature.MzScanAscComparator
*
* @param features
* @param sel What features to select. Features will not be merged
* @return
*/
public static Feature[] selectFeatures(Feature[] features, FeatureSelector sel)
{
ArrayList<Feature> list = new ArrayList<Feature>();
for (Feature f : features)
{
if (f.intensity >= sel.getMinIntensity() &&
f.charge <= sel.getMaxCharge() && f.charge >= sel.getMinCharge() &&
f.mz >= sel.getMinMz() && f.mz <= sel.getMaxMz() &&
f.mass >= sel.getMinMass() && f.mass <= sel.getMaxMass() &&
f.peaks >= sel.getMinPeaks() && f.peaks <= sel.getMaxPeaks() &&
f.scan >= sel.getScanFirst() && f.scan <= sel.getScanLast() &&
f.kl <= sel.getMaxKL() && f.scanCount >= sel.getMinScans() &&
f.totalIntensity >= sel.getMinTotalIntensity() &&
f.time >= sel.getMinTime() && f.time <= sel.getMaxTime() &&
MS2ExtraInfoDef.getPeptideProphet(f) >= sel.getMinPProphet() &&
(sel.getMaxAMTFDR() == 1 || (AmtExtraInfoDef.hasMatchFDR(f) &&
AmtExtraInfoDef.getMatchFDR(f) < sel.getMaxAMTFDR())) &&
(sel.getMaxMassDeviationPPM() == Integer.MAX_VALUE ||
Math.abs(MassCalibrationUtilities.calculateMassDefectDeviationPPM(f.getMass(),
MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH)) <=
sel.getMaxMassDeviationPPM()) &&
f.getSumSquaresDist() <= sel.getMaxSumSquaresDist() &&
(!sel.isAccurateMzOnly() || f.isAccurateMZ())
)
{
list.add(f);
}
}
return list.toArray(new Feature[list.size()]);
}
public static Feature[] mergeFeatures(Feature[] features, FeatureSelector sel)
{
if (null == features)
return null;
ArrayList<Feature> featureRangeList;
while (true) //Loop until we can't compress any more
{
int trail = 0;
featureRangeList = new ArrayList<Feature>();
for (int i = 0; i < features.length; i++)
{
Feature f = features[i];
if (f.intensity >= sel.getMinIntensity() && f.charge <= sel.getMaxCharge() &&
f.mz >= sel.getMinMz() && f.mz <= sel.getMaxMz() && f.kl <= sel.getMaxKL()
&& f.peaks >= sel.getMinPeaks()
&& f.scan >= sel.getScanFirst() && f.scan <= sel.getScanLast()
//TODO: properly this should be handled in such a way that I don't have to
//reference this column name
&& f.getIntProperty("peptideprophet",0) >= sel.getMinPProphet())
{
boolean newRange = true;
//Scan all the feature ranges & merge.
int j;
for (j = trail; j < featureRangeList.size(); j++)
{
Feature fr = (Feature) featureRangeList.get(j);
float ppmGap = (f.mz - fr.mz);
if (ppmGap > sel.maxMzGap)
{
//Don't need to look at this next time since we're sorted.
trail = j + 1;
continue;
}
if (fr.isFeatureInRange(f, sel.maxScanGap, sel.maxMzGap))
{
fr.addFeatureToRange(f);
newRange = false;
break;
}
if (-ppmGap > sel.maxMzGap)
break;
}
if (newRange)
featureRangeList.add(new Feature(f));
}
}
if (featureRangeList.size() == features.length)
break; //OK, no more compressing can be done
features = (Feature[]) featureRangeList.toArray(new Feature[featureRangeList.size()]);
Arrays.sort(features, new Feature.MzScanAscComparator());
}
//Now filter by length
ArrayList<Feature> filteredByLength = new ArrayList<Feature>();
for (Feature fr : featureRangeList)
{
if (fr.getScanCount() >= sel.getMinScans())
filteredByLength.add(fr);
}
//Re-sort because merging might lead to subtly out-of-order peaks
Feature[] ranges = (Feature[]) filteredByLength.toArray(new Feature[filteredByLength.size()]);
Arrays.sort(ranges, new Feature.MzScanAscComparator());
return ranges;
}
public Feature[] getFeatures(FeatureSelector sel)
{
if (null == _features)
return null;
return selectFeatures(_features, sel);
}
public Feature[] getFeatures()
{
return _features;
}
public void setFeatures(Feature[] features)
{
_features = features;
}
public String getTag()
{
return _tag;
}
public void setTag(String tag)
{
_tag = tag;
}
public FeatureSet filter(FeatureSelector sel)
{
FeatureSet fs = (FeatureSet)this.clone();
Feature[] features = getFeatures(sel);
fs.setFeatures(features);
fs.setColor(this.getColor());
Map<String,Object> properties = new HashMap<String,Object>();
properties.putAll(this.getProperties());
if (null != this.getSourceFile())
properties.put("origSourceFile", this.getSourceFile().getPath());
properties.put("filter", sel.toString());
fs.setProperties(properties);
return fs;
}
public Feature findNearestFeature(int scan, float mz)
{
return findNearestFeature(scan, mz, Integer.MAX_VALUE, Float.MAX_VALUE);
}
public int findNearestFeatureIndex(int scan, float mz, int maxScanDistance, float maxMzDistance)
{
Feature feature = new Feature(scan, mz, 1);
int index = Arrays.binarySearch(_features, feature, new Feature.MzScanAscComparator());
double minDistance = Float.MAX_VALUE;
int nearestFeature = -1;
if (index >= 0)
return index;
int pos = -index - 1;
//Didn't find an exact match. Search for nearest feature in 2 dimensions
for (int i = pos - 1; i >= 0; i--)
{
Feature f = _features[i];
float mzDist = Math.abs(mz - f.mz);
if (mzDist > maxMzDistance || mzDist > minDistance)
break;
int scanDist = Math.abs(scan - f.scan);
if (scanDist > maxScanDistance)
continue;
double distance = Math.sqrt(scanDist * scanDist + mzDist * mzDist);
if (distance < minDistance)
{
minDistance = distance;
nearestFeature = i;
}
}
for (int i = pos; i < _features.length; i++)
{
Feature f = _features[i];
float mzDist = Math.abs(mz - f.mz);
if (mzDist > minDistance || mzDist > maxMzDistance) //Not going to get any closer since sorted by Mz
return nearestFeature;
int scanDist = Math.abs(scan - f.scan);
if (scanDist > maxScanDistance)
continue;
double distance = Math.sqrt(scanDist * scanDist + mzDist * mzDist);
if (distance < minDistance)
{
minDistance = distance;
nearestFeature = i;
}
}
return nearestFeature;
}
public Feature findNearestFeature(int scan, float mz, int maxScanDistance, float maxMzDistance)
{
int index = findNearestFeatureIndex(scan, mz, maxScanDistance, maxMzDistance);
return index < 0 ? null : _features[index];
}
public Object clone()
{
try
{
FeatureSet fs = (FeatureSet) super.clone();
fs.setProperties(this._properties);
fs.setTag(this._tag);
fs.setSourceFile(this._sourceFile); // Used for column headers
return fs;
}
catch (CloneNotSupportedException x)
{
return null;
}
}
/**
* Deep-copies everything except the properties array. The properties array
* is shallow-copied. This is done because we've done nothing to make sure that
* the items in the properties array will clone() nicely. Sheer laziness, really.
* @return
*/
public FeatureSet deepCopy()
{
Feature[] features = new Feature[_features.length];
for (int i = 0; i < _features.length; i++)
features[i] = (Feature) _features[i].clone();
FeatureSet fs = new FeatureSet(features, _color);
fs.setDisplayed(_displayed);
fs.setSourceFile(new File(_sourceFile.getAbsolutePath()));
//shallow copy of the properties array.
HashMap propertiesCopy = new HashMap();
for (Object propertyKey : _properties.keySet())
propertiesCopy.put(propertyKey, _properties.get(propertyKey));
fs.setProperties(propertiesCopy);
return fs;
}
/**
* Combine features that represent the same peptide. Features
* must be within a scanDiff,massDiff window to be considered same.
* <p/>
* This removes redunancy caused by multiple charge states. It
* also combines features of the same mass/charge that are close
* together (e.g. a small feature that is split by ion competition)
* <p/>
* CONSIDER(mbellew) the mass (mz) tolerance could be tighter
* for features w/ same charge
*
* @param scanDiff
* @param massDiff
* @param sumIntensities If this is true, resulting features have intensity and
* totalIntensity values that are the sums of component features. If false,
* resulting features retain their original intensities
* charge state
* @return
*/
public FeatureSet deconvolute(int scanDiff, double massDiff, boolean sumIntensities)
{
FeatureGrouper grouper = new FeatureGrouper();
grouper.addSet(this);
grouper.setGroupByMass(true);
grouper.split2D(massDiff, scanDiff);
Clusterer2D.BucketSummary[] buckets = grouper.summarize();
Feature[] deconvoluted = new Feature[buckets.length];
int numPeptideConflicts = 0;
int numPreservedPeptides = 0;
for (int i = 0; i < buckets.length; i++)
{
Clusterer2D.BucketSummary bucket = buckets[i];
Feature deconvolutedFeature = null;
if (bucket.featureCount == 1)
{
deconvolutedFeature = (Feature) FeatureGrouper.getFeatures(bucket)[0].clone();
deconvolutedFeature.setChargeStates(1);
}
else
{
Feature[] bucketFeatures = FeatureGrouper.getFeatures(bucket);
Feature best = bucketFeatures[0];
float sumIntensity = 0.0f;
float sumTotalIntensity = 0.0f;
//NOTE: this won't work with stuff of charge > 10. Then again, what will?
int[] chargeStateCounts = new int[10];
String description = "";
for (Feature f : bucketFeatures)
{
if (description.length() > 0)
description += ", ";
chargeStateCounts[f.charge]++;
description += f.charge;
if (null != f.getDescription())
description += " (" + f.getDescription() + ")";
sumIntensity += f.intensity;
sumTotalIntensity += f.totalIntensity;
if (f.totalIntensity > best.totalIntensity)
best = f;
}
deconvolutedFeature = (Feature) best.clone();
int numChargeStates = 0;
for (int j=0; j<chargeStateCounts.length; j++)
if (chargeStateCounts[j] > 0)
numChargeStates++;
if (sumIntensities)
{
deconvolutedFeature.setIntensity(sumIntensity);
deconvolutedFeature.setTotalIntensity(sumTotalIntensity);
}
deconvolutedFeature.setChargeStates(numChargeStates);
deconvolutedFeature.setDescription(description);
//if there's MS2 data in this FeatureSet, then we need to make sure
//that the collapsed feature contains the peptide and protein ID carried
//by its components.
//If there are conflicts, we leave the existing ID on the collapsed feature
//alone, or if it had none, don't assign
//TODO: somehow move this to MS2ExtraInfoDef?
if (this.hasExtraInformationType(MS2ExtraInfoDef.getSingletonInstance()))
{
Set<String> featurePeptides = new HashSet<String>();
Set<String> featureProteins = new HashSet<String>();
for (Feature f : bucketFeatures)
{
String featurePeptide = MS2ExtraInfoDef.getFirstPeptide(f);
if (featurePeptide != null)
{
featurePeptides.add(featurePeptide);
String featureProtein = MS2ExtraInfoDef.getFirstProtein(f);
if (featureProtein != null)
featureProteins.add(featureProtein);
}
}
if (featurePeptides.size() == 1 &&
MS2ExtraInfoDef.getFirstPeptide(deconvolutedFeature) == null)
{
MS2ExtraInfoDef.setSinglePeptide(deconvolutedFeature,
featurePeptides.iterator().next());
numPreservedPeptides++;
if (featureProteins.size() == 1 &&
MS2ExtraInfoDef.getFirstProtein(deconvolutedFeature) == null)
MS2ExtraInfoDef.addProtein(deconvolutedFeature,
featureProteins.iterator().next());
}
else
{
if (featurePeptides.size() > 1)
numPeptideConflicts++;
}
}
}
deconvolutedFeature.comprised = FeatureGrouper.getFeatures(bucket);
deconvoluted[i] = deconvolutedFeature;
}
//reporting on peptides preserved and conflicts
if (this.hasExtraInformationType(MS2ExtraInfoDef.getSingletonInstance()))
{
_log.debug("deconvolute: peptides actively preserved: " + numPreservedPeptides);
_log.debug("deconvolute: peptide conflicts: " + numPeptideConflicts);
}
FeatureSet fs = (FeatureSet) this.clone();
//Make map modifiable & set properties.
Map props = new HashMap();
props.putAll(this.getProperties());
if (null != this.getSourceFile())
props.put("origSourceFile", this.getSourceFile());
props.put("deconvoluteScanDiff", String.valueOf(scanDiff));
props.put("deconvoluteMassDiff", String.valueOf(massDiff));
fs.setFeatures(deconvoluted);
fs.setProperties(props);
return fs;
}
/**
* Relative quantitation using ICAT label defaults...
*/
public FeatureSet icat()
{
return quant(AnalyzeICAT.icatLabel);
}
/**
* Relative quantitiation using explicit label...
*/
public FeatureSet quant(float light, float heavy, char residue, int maxLabelCount)
{
return quant(light, heavy, residue, maxLabelCount, null);
}
/**
* Relative quantitiation using explicit label...
*/
public FeatureSet quant(float light, float heavy, char residue, int maxLabelCount, MSRun run)
{
float delta = heavy - light;
AnalyzeICAT.IsotopicLabel label = new AnalyzeICAT.IsotopicLabel(light, delta, residue, maxLabelCount);
return quant(label, run);
}
public FeatureSet quant(AnalyzeICAT.IsotopicLabel label)
{
return quant(label, null);
}
//This comparator is used to sort pairs in increasing order of first scan.
//This makes accessing the MS1 data faster, since data access is more localized
static Comparator comparePairScanAsc = new Comparator()
{
public int compare(Object a, Object b)
{
Feature lightA = (Feature) ((Pair) a).first;
Feature heavyA = (Feature) ((Pair) a).second;
Feature lightB = (Feature) ((Pair) b).first;
Feature heavyB = (Feature) ((Pair) b).second;
IntRange aRange = Feature.findOverlappingScanRange(lightA, heavyA);
IntRange bRange = Feature.findOverlappingScanRange(lightB, heavyB);
int aScan = (aRange == null ? 0 : aRange.getMinimumInteger());
int bScan = (bRange == null ? 0 : bRange.getMinimumInteger());
return aScan == bScan ? 0 : aScan < bScan ? -1 : 1;
}
};
//by default, use totalIntensity of each partner
public FeatureSet quant(AnalyzeICAT.IsotopicLabel label, MSRun run)
{
return quant(label, TOTAL_INTENSITY, run);
}
/**
* Relative quantitiation using explicit label...
*/
public FeatureSet quant(AnalyzeICAT.IsotopicLabel label, int intensityType, MSRun run)
{
return quant(label, intensityType, run, AnalyzeICAT.DEFAULT_DELTA_MASS,
AnalyzeICAT.DEFAULT_DELTA_MASS_TYPE, AnalyzeICAT.DEFAULT_DELTA_TIME);
}
public FeatureSet quant(AnalyzeICAT.IsotopicLabel label, int intensityType, MSRun run,
float massTolerance, int massToleranceType, float timeTolerance)
{
boolean pairsOnly = false;
ArrayList pairs = AnalyzeICAT.analyze(getFeatures(), label, massTolerance,
massToleranceType, timeTolerance);
// TODO: option to output unpaired features
Map<Feature,Feature> icatFeatures = new IdentityHashMap<Feature,Feature>(3 * pairs.size());
ArrayList<Feature> list = new ArrayList<Feature>(_features.length);
//
// output paired features
//
// Consider May want to make ratio MAX_VALUE or MIN_VALUE when heavy is zero.
// +/- Inf (or Nan) may confuse downstream tools.
// Give us the option of using either the total, recalculated, or maximum intensity
// of each partner. Total has problems when one partner shows longer
// elution time (or runs into a different co-eluting peptide).
// TODO: This is just a surrogate for using the same range of scans
// for each partner (max intensity has problems too).
// Recalculated intensity requires access to the MS1 run
if (intensityType == TOTAL_INTENSITY)
Collections.sort(pairs,comparePairScanAsc);
for (int i = 0; i < pairs.size(); i++)
{
Pair p = (Pair)pairs.get(i);
Feature light = (Feature)p.first;
Feature heavy = (Feature)p.second;
Feature f = new Feature(light);
f.setTotalIntensity(heavy.totalIntensity + light.totalIntensity);
if (intensityType == TOTAL_INTENSITY)
{
IsotopicLabelExtraInfoDef.setHeavyIntensity(f,heavy.totalIntensity);
IsotopicLabelExtraInfoDef.setLightIntensity(f,light.totalIntensity);
}
else if (intensityType == RECALCULATED_INTENSITY)
{
if (run == null)
{
_log.error("No run specified, unable to recalculate intensities.");
return null;
}
IntRange overlappingScanRange = Feature.findOverlappingScanRange(light, heavy);
IsotopicLabelExtraInfoDef.setLightIntensity(f,
light.calculateFeatureIntensityInRange(run, MZ_WINDOW_SIZE, overlappingScanRange,
RESAMPLING_FREQUENCY));
IsotopicLabelExtraInfoDef.setHeavyIntensity(f,
heavy.calculateFeatureIntensityInRange(run, MZ_WINDOW_SIZE, overlappingScanRange,
RESAMPLING_FREQUENCY));
}
else if (intensityType == MAX_INTENSITY)
{
IsotopicLabelExtraInfoDef.setHeavyIntensity(f,heavy.intensity);
IsotopicLabelExtraInfoDef.setLightIntensity(f,light.intensity);
// smearing the total out doesn't seem to work better than just using the max
// f.setHeavyIntensity(heavy.totalIntensity/heavy.scanCount);
// f.setLightIntensity(light.totalIntensity/light.scanCount);
}
IsotopicLabelExtraInfoDef.setRatio(f,
IsotopicLabelExtraInfoDef.getLightIntensity(f) /
IsotopicLabelExtraInfoDef.getHeavyIntensity(f));
IsotopicLabelExtraInfoDef.setLabelCount(f,
Math.round((heavy.mass - light.mass) / label.getHeavy()));
f.setProperty("label", label);
f.setChargeStates(Math.max(light.getChargeStates(), heavy.getChargeStates()));
//we used to do this in order to subtract out the light label. Not doing that any more
//(mass will be consistent with m/z and charge), so no need to update mass at all
// f.updateMass();
//deal with any peptide identifications, likely supplied by AMT.
//If both light and heavy have the same ID, or any in common, or only one has an ID,
//keep it. If light and heavy have different IDs, toss them both out
List<String> heavyPeptides = MS2ExtraInfoDef.getPeptideList(heavy);
List<String> lightPeptides = MS2ExtraInfoDef.getPeptideList(heavy);
if (heavyPeptides != null || lightPeptides != null)
{
if (heavyPeptides == null)
{
MS2ExtraInfoDef.setPeptideList(f, lightPeptides.get(0));
}
else if (lightPeptides == null)
{
MS2ExtraInfoDef.setPeptideList(f, heavyPeptides.get(0));
}
else
{
//both heavy and light peptides exist.
if (heavyPeptides.size() == 1 && lightPeptides.size() == 1)
{
if (heavyPeptides.get(0).equals(lightPeptides.get(0)))
MS2ExtraInfoDef.setPeptideList(f, heavyPeptides.get(0));
else
MS2ExtraInfoDef.removeAllPeptides(f);
}
else
{
Set<String> commonPeptides = new HashSet<String>();
for (String heavyPeptide : heavyPeptides)
if (lightPeptides.contains(heavyPeptide))
commonPeptides.add(heavyPeptide);
if (commonPeptides.size() == 0)
MS2ExtraInfoDef.removeAllPeptides(f);
else
MS2ExtraInfoDef.setPeptideList(f, commonPeptides.iterator().next());
}
}
//now that we've figured out what peptide to assign, make sure it has the
//right number of labeled residues. If not, unset.
if (MS2ExtraInfoDef.getFirstPeptide(f) != null)
{
int numLabeledResidues = 0;
String featurePeptide = MS2ExtraInfoDef.getFirstPeptide(f);
for (int j=0; j<featurePeptide.length(); j++)
if (featurePeptide.charAt(j) == label.getResidue())
numLabeledResidues++;
if (numLabeledResidues != IsotopicLabelExtraInfoDef.getLabelCount(f))
{
// if (numLabeledResidues > 0) System.err.println("Tossing: " + featurePeptide + ", " + numLabeledResidues + ", " + IsotopicLabelExtraInfoDef.getLabelCount(f));
MS2ExtraInfoDef.removeAllPeptides(f);
}
// else
// System.err.println("Saving: " + featurePeptide + ", " + numLabeledResidues);
}
}
list.add(f);
icatFeatures.put(light, light);
icatFeatures.put(heavy, heavy);
}
//
// output remaining features
//
if (!pairsOnly)
{
for (int i=0 ; i<_features.length ; i++)
{
Feature f = _features[i];
if (icatFeatures.containsKey(f))
continue;
list.add(new Feature(f));
}
}
FeatureSet fs = (FeatureSet)this.clone();
fs.addExtraInformationType(new IsotopicLabelExtraInfoDef());
fs.getProperties().put("label", label.toString());
fs.setFeatures(list.toArray(new Feature[0]));
return fs;
}
/**
* Return the best hit from a List of FeatureSets
*/
public static Feature hitTest(java.util.List featureSets, int x, float y, int maxScan, float maxMz)
{
Feature feature = null;
double minDistance = Double.MAX_VALUE;
if (null == featureSets)
return null;
for (int i = 0; i < featureSets.size(); i++)
{
FeatureSet fs = (FeatureSet)featureSets.get(i);
Feature feat = fs.findNearestFeature(x, (float)y, maxScan, maxMz);
if (null == feat)
continue;
double distance = Math.sqrt(Math.pow(feat.scan - x, 2) + Math.pow(feat.mz - y, 2));
if (distance < minDistance)
feature = feat;
}
return feature;
}
/*
* @param file the file containing features to load
*/
public void loadFeatureFile(File file) throws Exception
{
//first check if the file exists
if (file == null || !file.exists())
{
setLoadStatus(FEATURESET_LOAD_ERROR_FILE_NOT_FOUND);
setLoadStatusMessage("Error loading features: unable to find file ");
return;
}
FeatureSetFileHandler fileHandler = null;
if (PepXMLFeatureFileHandler.getSingletonInstance().canHandleFile(file))
{
//try loading it as a pepXML file
_log.debug("Loading as PepXML file");
fileHandler = PepXMLFeatureFileHandler.getSingletonInstance();
}
else if (APMLFeatureFileHandler.getSingletonInstance().canHandleFile(file))
{
//try loading it as an APML file
_log.debug("Loading as APML file");
fileHandler = APMLFeatureFileHandler.getSingletonInstance();
}
else if (HardklorFeatureFileHandler.getSingletonInstance().canHandleFile(file))
{
//try loading it as a Hardklor file
_log.debug("Loading as Hardklor file");
fileHandler = HardklorFeatureFileHandler.getSingletonInstance();
}
else if (NativeTSVFeatureFileHandler.getSingletonInstance().canHandleFile(file))
{
//if not an xml file, assume tab-separated value file
_log.debug("Loading as msInspect .tsv file");
fileHandler = NativeTSVFeatureFileHandler.getSingletonInstance();
}
else
{
throw new IllegalArgumentException("Unknown feature file type. Doesn't seem to be APML, pepXML or msInspect TSV file. Quitting.");
}
try
{
FeatureSet loadedFeatureSet = fileHandler.loadFeatureSet(file);
//This is a bit cumbersome: load up the file in the handler, then take
//the resulting FeatureSet and copy all the important stuff here.
_features = loadedFeatureSet.getFeatures();
_log.debug("Loaded " + _features.length + " features from file");
setProperties(loadedFeatureSet._properties);
for(FeatureExtraInformationDef infoType : loadedFeatureSet.getExtraInformationTypes())
addExtraInformationType(infoType);
setTag(loadedFeatureSet._tag);
setSourceFile(file);
//if we got here, load was successful
setLoadStatus(FeatureSet.FEATURESET_LOAD_SUCCESS);
setLoadStatusMessage(_features.length + " Features loaded successfully");
}
catch (IOException e)
{
//in case of Exceptions, assume no features found,
//problem with file format
_log.error("User attempted to load bad feature file, filename = " + file.getName() +
", exception message = " + e.getMessage(),e);
setLoadStatus(FeatureSet.FEATURESET_LOAD_ERROR_BAD_FILE_FORMAT);
setLoadStatusMessage("Error loading features: bad file format in file ");
}
if (getLoadStatus() != FEATURESET_LOAD_SUCCESS)
return;
//"success" case
//if no features found, report
if (_features == null || _features.length == 0)
{
_log.info("User attempted to load file with no features");
setLoadStatus(FEATURESET_LOAD_ERROR_NO_FEATURES_FOUND);
setLoadStatusMessage("Error loading features: no features found in file ");
return;
}
}
public Color getColor()
{
return null == _color ? Color.RED : _color;
}
public void setColor(Color color)
{
_color = color;
}
public int getStyle()
{
return _style;
}
public void setStyle(int style)
{
_style = style;
}
public File getSourceFile()
{
return _sourceFile;
}
public void setSourceFile(File sourceFile)
{
_sourceFile = sourceFile;
}
public boolean isDisplayed()
{
return _displayed;
}
public void setDisplayed(boolean displayed)
{
_displayed = displayed;
}
public Map<String,Object> getProperties()
{
return _properties;
}
public Object getProperty(String propertyName)
{
if (null == _properties)
return null;
return _properties.get(propertyName);
}
public void setProperties(Map<String,Object> properties)
{
if (null == properties)
_properties = new HashMap<String,Object>();
else
_properties = new HashMap<String,Object>(properties);
}
public void setProperty(String propertyName, Object propertyValue)
{
if (null == _properties)
_properties = new HashMap<String,Object>();
_properties.put(propertyName, propertyValue);
if (_log.isDebugEnabled())
{
String className = null;
if (propertyValue != null)
className = propertyValue.getClass().getName();
_log.debug("setProperty: " + propertyName + ", class " + className);
}
}
public void save() throws IOException
{
save(getSourceFile());
}
public void save(File file) throws IOException
{
save(file, false);
}
public void save(File file, boolean dumpWindow) throws IOException
{
save(file, dumpWindow, NativeTSVFeatureFileHandler.FILE_TYPE_NAME);
}
public void save(PrintWriter outPW, boolean dumpWindow, String fileType) throws IOException
{
FeatureSetFileHandler fileHandler = null;
if (APMLFeatureFileHandler.FILE_TYPE_NAME.equals(fileType))
{
fileHandler = new APMLFeatureFileHandler();
}
else if (HardklorFeatureFileHandler.FILE_TYPE_NAME.equals(fileType))
{
fileHandler = new HardklorFeatureFileHandler();
}
else
{
fileHandler = new NativeTSVFeatureFileHandler();
}
fileHandler.setDumpWindow(dumpWindow);
fileHandler.saveFeatureSet(this, outPW);
}
public void save(File outFile, boolean dumpWindow, String fileType) throws IOException
{
FeatureSetFileHandler fileHandler = null;
if (APMLFeatureFileHandler.FILE_TYPE_NAME.equals(fileType))
{
fileHandler = new APMLFeatureFileHandler();
}
else if (HardklorFeatureFileHandler.FILE_TYPE_NAME.equals(fileType))
{
fileHandler = new HardklorFeatureFileHandler();
}
else
{
fileHandler = new NativeTSVFeatureFileHandler();
}
fileHandler.setDumpWindow(dumpWindow);
fileHandler.saveFeatureSet(this, outFile);
}
public void save(PrintWriter out)
{
save(out, false);
}
public void save(PrintWriter out, boolean dumpWindow)
{
NativeTSVFeatureFileHandler tsvFileHandler = new NativeTSVFeatureFileHandler();
tsvFileHandler.setDumpWindow(dumpWindow);
tsvFileHandler.saveFeatureSet(this, out);
}
public void savePepXml(File outFile)
throws IOException
{
savePepXml(outFile, 1);
}
/**
* For saving in pepXml format
* @param outFile
*/
public void savePepXml(File outFile, int firstSpectrumQueryIndex)
throws IOException
{
PepXMLFeatureFileHandler pepXmlFileHandler = new PepXMLFeatureFileHandler();
pepXmlFileHandler.setFirstSpectrumQueryIndex(firstSpectrumQueryIndex);
pepXmlFileHandler.saveFeatureSet(this, outFile);
}
public static class FeatureSelector implements Cloneable
{
int minCharge = -10;
int maxCharge = 10;
// UNDONE: default these ranges based on getLowMz(), getHighMz()
float maxMz = 10000;
float minMz = 0f;
float minIntensity = 0f;
private float minTotalIntensity = 0f;
int minScans = 0;
int scanFirst = 0;
int scanLast = Integer.MAX_VALUE;
double maxKL = Double.MAX_VALUE;
int minPeaks = 0;
int maxPeaks = Integer.MAX_VALUE;
float minMass = 0f;
float maxMass = Float.MAX_VALUE;
float minTime = 0f;
float maxTime = Float.MAX_VALUE;
int maxMassDeviationPPM = Integer.MAX_VALUE;
//dhmay adding 2/25/2007
float maxSumSquaresDist = Float.MAX_VALUE;
float minPProphet = 0;
float maxAMTFDR = 1f;
int maxScanGap = 3;
float maxMzGap = .12f;
//dhmay adding 2/25/2009
boolean accurateMzOnly = false;
public boolean equals(Object o)
{
if (null == o || !(o instanceof FeatureSelector))
return false;
FeatureSelector fs = (FeatureSelector) o;
return getMinCharge() == fs.getMinCharge() && getMaxCharge() == fs.getMaxCharge() &&
getMaxMz() == fs.getMaxMz() && getMinMz() == fs.getMinMz() &&
getMinIntensity() == fs.getMinIntensity() && getMinScans() == fs.getMinScans() &&
maxScanGap == fs.maxScanGap && maxMzGap == fs.maxMzGap && getScanFirst() == fs.getScanFirst() &&
getScanLast() == fs.getScanLast() &&
getMaxKL() == fs.getMaxKL() && getMinPeaks() == fs.getMinPeaks()
&& getMaxPeaks() == fs.getMaxPeaks()
&& getMinTotalIntensity() == fs.getMinTotalIntensity()
&& getMinMass() == fs.getMinMass() && getMaxMass() == fs.getMaxMass()
&& getMinTime() == fs.getMinTime() && getMaxTime() == fs.getMaxTime()
&& getMinPProphet() == fs.getMinPProphet() &&
getMaxMassDeviationPPM() == fs.getMaxMassDeviationPPM() &&
getMaxSumSquaresDist() == fs.getMaxSumSquaresDist() &&
isAccurateMzOnly() == fs.isAccurateMzOnly() &&
fs.getMaxAMTFDR() == getMaxAMTFDR();
}
public String toString()
{
StringBuffer sb = new StringBuffer();
FeatureSelector unchanged = new FeatureSelector();
String[] props = new String[] {"minCharge", "maxCharge", "minMz", "maxMz", "minMass", "maxMass", "minIntensity", "minTotalIntensity", "maxKL",
"minPeaks", "maxPeaks", "scanFirst", "scanLast", "minTime", "maxTime", "minScans", "minPProphet",
"maxMassDeviationPPM","maxSumSquaresDist","accurateMzOnly","maxAMTFDR"};
try
{
for (String prop : props)
{
Object val = BeanUtils.getSimpleProperty(this, prop);
Object orig = BeanUtils.getSimpleProperty(unchanged, prop);
if (!val.equals(orig))
sb.append(" --" + prop + "=" + val.toString());
}
}
catch (Exception x)
{
ApplicationContext.errorMessage("FeatureSelector: ", x);
}
return sb.toString();
}
public boolean setFilterParam(String paramName, String paramVal)
{
//TODO: Should use reflection here since all names match
if ("--minMz".equalsIgnoreCase(paramName))
setMinMz(Float.parseFloat(paramVal));
else if ("--maxMz".equalsIgnoreCase(paramName))
setMaxMz(Float.parseFloat(paramVal));
else if ("--minMass".equalsIgnoreCase(paramName))
setMinMass(Float.parseFloat(paramVal));
else if ("--maxMass".equalsIgnoreCase(paramName))
setMaxMass(Float.parseFloat(paramVal));
else if ("--minCharge".equalsIgnoreCase(paramName))
setMinCharge(Integer.parseInt(paramVal));
else if ("--maxCharge".equalsIgnoreCase(paramName))
setMaxCharge(Integer.parseInt(paramVal));
else if ("--minPeaks".equalsIgnoreCase(paramName))
setMinPeaks(Integer.parseInt(paramVal));
else if ("--maxPeaks".equalsIgnoreCase(paramName))
setMaxPeaks(Integer.parseInt(paramVal));
else if ("--minScanCount".equalsIgnoreCase(paramName))
setMinScans(Integer.parseInt(paramVal));
else if ("--scanFirst".equalsIgnoreCase(paramName))
setScanFirst(Integer.parseInt(paramVal));
else if ("--scanLast".equalsIgnoreCase(paramName))
setScanLast(Integer.parseInt(paramVal));
else if ("--minScans".equalsIgnoreCase(paramName))
setMinScans(Integer.parseInt(paramVal));
else if ("--maxKL".equalsIgnoreCase(paramName))
setMaxKL(Double.parseDouble(paramVal));
else if ("--minIntensity".equalsIgnoreCase(paramName))
setMinIntensity(Float.parseFloat(paramVal));
else if ("--minTime".equalsIgnoreCase(paramName))
setMinTime(Float.parseFloat(paramVal));
else if ("--maxTime".equalsIgnoreCase(paramName))
setMaxTime(Float.parseFloat(paramVal));
else if ("--minTotalIntensity".equalsIgnoreCase(paramName))
setMinTotalIntensity(Float.parseFloat(paramVal));
else if ("--minPProphet".equalsIgnoreCase(paramName))
setMinPProphet(Float.parseFloat(paramVal));
else if ("--maxMassDeviationPPM".equalsIgnoreCase(paramName))
setMaxMassDeviationPPM(Integer.parseInt(paramVal));
else if ("--maxSumSquaresDist".equalsIgnoreCase(paramName))
setMaxSumSquaresDist(Integer.parseInt(paramVal));
else if ("--accMzOnly".equalsIgnoreCase(paramName))
try
{
setAccurateMzOnly((Boolean) new BooleanArgumentDefinition("dummy").convertArgumentValue(paramVal));
}
catch (ArgumentValidationException e)
{
}
else if ("--maxamtfdr".equalsIgnoreCase(paramName))
setMaxAMTFDR(Float.parseFloat(paramVal));
else
return false;
return true;
}
public Object clone()
{
try
{
return super.clone();
}
catch (Exception x)
{
//Impossible
return null;
}
}
public int getMaxMassDeviationPPM()
{
return maxMassDeviationPPM;
}
public void setMaxMassDeviationPPM(int maxMassDeviationPPM)
{
this.maxMassDeviationPPM = maxMassDeviationPPM;
}
public int getMinCharge()
{
return minCharge;
}
public void setMinCharge(int minCharge)
{
this.minCharge = minCharge;
}
public int getMaxCharge()
{
return maxCharge;
}
public void setMaxCharge(int maxCharge)
{
this.maxCharge = maxCharge;
}
public float getMaxMz()
{
return maxMz;
}
public void setMaxMz(float maxMz)
{
this.maxMz = maxMz;
}
public float getMinMz()
{
return minMz;
}
public void setMinMz(float minMz)
{
this.minMz = minMz;
}
public float getMinPProphet()
{
return minPProphet;
}
public void setMinPProphet(float minPProphet)
{
this.minPProphet = minPProphet;
}
public float getMinIntensity()
{
return minIntensity;
}
public void setMinIntensity(float minIntensity)
{
this.minIntensity = minIntensity;
}
public int getMinScans()
{
return minScans;
}
public void setMinScans(int minScans)
{
this.minScans = minScans;
}
public int getScanFirst()
{
return scanFirst;
}
public void setScanFirst(int scanFirst)
{
this.scanFirst = scanFirst;
}
public int getScanLast()
{
return scanLast;
}
public void setScanLast(int scanLast)
{
this.scanLast = scanLast;
}
public double getMaxKL()
{
return maxKL;
}
public void setMaxKL(double maxKL)
{
this.maxKL = maxKL;
}
public int getMinPeaks()
{
return minPeaks;
}
public void setMinPeaks(int minPeaks)
{
this.minPeaks = minPeaks;
}
public int getMaxPeaks()
{
return maxPeaks;
}
public void setMaxPeaks(int maxPeaks)
{
this.maxPeaks = maxPeaks;
}
public float getMinTotalIntensity()
{
return minTotalIntensity;
}
public void setMinTotalIntensity(float minTotalIntensity)
{
this.minTotalIntensity = minTotalIntensity;
}
public float getMinMass()
{
return minMass;
}
public void setMinMass(float minMass)
{
this.minMass = minMass;
}
public float getMaxMass()
{
return maxMass;
}
public void setMaxMass(float maxMass)
{
this.maxMass = maxMass;
}
public float getMinTime()
{
return minTime;
}
public void setMinTime(float minTime)
{
this.minTime = minTime;
}
public float getMaxTime()
{
return maxTime;
}
public void setMaxTime(float maxTime)
{
this.maxTime = maxTime;
}
public float getMaxSumSquaresDist()
{
return maxSumSquaresDist;
}
public void setMaxSumSquaresDist(float maxSumSquaresDistance)
{
this.maxSumSquaresDist = maxSumSquaresDistance;
}
public boolean isAccurateMzOnly()
{
return accurateMzOnly;
}
public void setAccurateMzOnly(boolean accurateMzOnly)
{
this.accurateMzOnly = accurateMzOnly;
}
public float getMaxAMTFDR()
{
return maxAMTFDR;
}
public void setMaxAMTFDR(float maxAMTFDR)
{
this.maxAMTFDR = maxAMTFDR;
}
}
public List<FeatureExtraInformationDef> getExtraInformationTypes()
{
if (extraInformationTypes == null)
{
extraInformationTypes = new ArrayList<FeatureExtraInformationDef>();
}
return extraInformationTypes;
}
public FeatureExtraInformationDef[] getExtraInformationTypesArray()
{
return getExtraInformationTypes().toArray(new FeatureExtraInformationDef[0]);
}
public void addExtraInformationType(FeatureExtraInformationDef infoType)
{
if (!getExtraInformationTypes().contains(infoType))
getExtraInformationTypes().add(infoType);
}
public boolean hasExtraInformationType(FeatureExtraInformationDef infoType)
{
return getExtraInformationTypes().contains(infoType);
}
public void removeAllExtraInformationTypes()
{
extraInformationTypes = new ArrayList<FeatureExtraInformationDef>();
}
}