/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.feature.matching;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import org.fhcrc.cpl.toolbox.proteomics.Clusterer2D;
import org.fhcrc.cpl.toolbox.proteomics.MassUtilities;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureClusterer;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureGrouper;
import org.fhcrc.cpl.toolbox.proteomics.feature.*;
import org.apache.log4j.Logger;
import java.util.*;
/**
* Attempts to match one feature set against another using clustering
*/
public class ClusteringFeatureSetMatcher extends BaseFeatureSetMatcherImpl
implements FeatureSetMatcher
{
private static Logger _log = Logger.getLogger(ClusteringFeatureSetMatcher.class);
public static final double DEFAULT_HYDRO_ELUTION_BUCKET_INCREMENT = 0.05;
protected static final double ABSOLUTE_MASS_BUCKET_INCREMENT = 0.05;
protected static final double PPM_MASS_BUCKET_INCREMENT = 1;
protected double massBucketIncrement = ABSOLUTE_MASS_BUCKET_INCREMENT;
protected double elutionBucketIncrement = DEFAULT_HYDRO_ELUTION_BUCKET_INCREMENT;
//Up these values only with caution! The complexity of this feature-matching grows with
//numMassBuckets * numElutionBuckets
protected int numMassBuckets = 4;
protected int numElutionBuckets = 4;
protected double bestMassBucketSize = 0;
protected double bestElutionBucketSize = 0;
protected boolean useMassInsteadOfMz = true;
public ClusteringFeatureSetMatcher()
{
}
public ClusteringFeatureSetMatcher(float deltaMass, int deltaMassType,
float deltaElution)
{
super();
init(deltaMass, deltaMassType, deltaElution);
}
public void init(float deltaMass, int deltaMassType, float deltaElution)
{
super.init(deltaMass, deltaMassType, deltaElution);
if (deltaMassType == FeatureSetMatcher.DELTA_MASS_TYPE_PPM)
massBucketIncrement = PPM_MASS_BUCKET_INCREMENT;
}
public String toString()
{
return "ClusteringFeatureSetMatcher: deltaMass=" + deltaMass +
", deltaElution=" + deltaElution;
}
/**
* Calculate the buckets to use for either mass or hydro clustering
* @param numBuckets
* @param bucketIncrement
* @param maxBucketSize
* @return
*/
protected double[] calculateBuckets(int numBuckets, double bucketIncrement,
double maxBucketSize)
{
_log.debug("Buckets:");
ArrayList<Double> resultList = new ArrayList<Double>();
double lowBucket = maxBucketSize - (bucketIncrement * (numBuckets - 1));
for (int i=0; i<numBuckets; i++)
{
double bucketSize = lowBucket + (i * bucketIncrement);
if (bucketSize > 0)
{
resultList.add(bucketSize);
_log.debug(" " + resultList.get(resultList.size()-1));
}
}
double[] result = new double[resultList.size()];
for (int i=0; i<resultList.size(); i++)
result[i] = resultList.get(i);
return result;
}
/**
* Match two feature sets using clustering
* @param featureSet1
* @param featureSet2
* @return
*/
public FeatureMatchingResult
matchFeatures(FeatureSet featureSet1, FeatureSet featureSet2)
{
//for (Feature feature : featureSet1.getFeatures())
//{
// System.err.println(AmtExtraInfoDef.getObservedHydrophobicity(feature));
//}
FeatureClusterer clusterer =
new FeatureClusterer(useMassInsteadOfMz ? FeatureClusterer.MASS_MZ_MODE_MASS :
FeatureClusterer.MASS_MZ_MODE_MZ,
elutionMode, featureSet1);
clusterer.addSet(featureSet2);
//System.err.println("matcF 1, ms1Features: " + featureSet1.getFeatures().length + ", ms2Features: " + featureSet2.getFeatures().length);
//System.err.println("mass? " + useMassInsteadOfMz + ", elut: " + elutionMode);
//System.err.println("deltaMass: " + deltaMass + ", deltaElut: " + deltaElution);
//for (Feature feature : featureSet1.getFeatures()) System.err.println(" H: " + AmtExtraInfoDef.getObservedHydrophobicity(feature));
//System.err.println("set 1 features: " + featureSet1.getFeatures().length);
// In the PPM case, we have to convert from PPM to Da before figuring out whether we can split.
// This is done by converting maxBucket based on this bucket's getMin() value. This will
// ensure that a bucket will be split if any subregion of the bucket would be split for the
// given PPM value.
// This should be reasonable behavior, perhaps splitting slightly more than necessary.
if (deltaMassType == FeatureSetMatcher.DELTA_MASS_TYPE_PPM)
{
clusterer.setDimensionSplitCalculator(new Clusterer2D.ClusterDimensionSplitCalculator()
{
public double calculateDimension1ForSplit(double referenceValue, double dimensionValue)
{
return MassUtilities.calculateAbsoluteDeltaMass((float) referenceValue,
(float) dimensionValue,
FeatureSetMatcher.DELTA_MASS_TYPE_PPM);
}
public double calculateDimension2ForSplit(double referenceValue, double dimensionValue)
{
return dimensionValue;
}
});
}
FeatureMatchingResult result = new FeatureMatchingResult();
//maximum number of perfect matches we could possibly have
// int maxPossiblePerfectMatches = Math.min(featureSet1.getFeatures().length,
// featureSet2.getFeatures().length);
double[] massBuckets = calculateBuckets(numMassBuckets, massBucketIncrement, getDeltaMass());
double[] elutionBuckets = calculateBuckets(numElutionBuckets,
elutionBucketIncrement,
getDeltaElution());
setBestElutionBucketSize(0);
setBestMassBucketSize(0);
Pair<Double,Double> bestBucketSizes = clusterer.calculateBestBuckets(massBuckets, elutionBuckets, false);
clusterer.split2D(bestBucketSizes.first, bestBucketSizes.second);
_log.debug("Splitting on buckets " + bestBucketSizes.first + " and " +
bestBucketSizes.second);
Clusterer2D.BucketSummary[] bucketSummaries = clusterer.summarize();
int numConflictBuckets = 0;
for (Clusterer2D.BucketSummary bucketSummary : bucketSummaries)
{
//for each bucket, if it has at least one pair, do something with it
if (bucketSummary.setCount == 2)
{
if (bucketSummary.entries().length > 2)
{
List<Feature> slaveSetFeatures =
FeatureGrouper.getFeaturesFromSet(1, bucketSummary);
for (Feature masterSetFeature :
FeatureGrouper.getFeaturesFromSet(0, bucketSummary))
{
//order slave set features in the order determined by
//variable settings at the superclass level
List<Feature> orderedSlaveSetFeatures =
orderSlaveSetFeatures(slaveSetFeatures, masterSetFeature);
result.put(masterSetFeature, orderedSlaveSetFeatures);
}
numConflictBuckets++;
// Feature[] oneFromEachSet =
// FeatureGrouper.resolveClusterConflicts(bucketSummary,
// new FeatureSet[] {featureSet1, featureSet2},
// FeatureGrouper.CONFLICT_RESOLVER_BEST);
}
else
{
//exactly one from each, no-brainer
Feature masterSetFeature = ((FeatureClusterer.FeatureClusterable)
bucketSummary.getParentListForSetIndex(0).get(0)).getParentFeature();
Feature slaveSetFeature = ((FeatureClusterer.FeatureClusterable)
bucketSummary.getParentListForSetIndex(1).get(0)).getParentFeature();
result.add(masterSetFeature, slaveSetFeature);
//System.err.println("first scan: " + firstOne.getParentFeature().scan + ", second scan: " + secondOne.getParentFeature().scan);
}
/*
List<Clusterer2D.Clusterable> set2Clusterables = bucketSummary.getParentListForSetIndex(1);
List<Clusterer2D.Clusterable> set1Clusterables = bucketSummary.getParentListForSetIndex(0);
List<Feature> set2Features = getFeatureListForClusterableList(set2Clusterables);
List<Feature> set1Features = getFeatureListForClusterableList(set1Clusterables);
//if there are only two features in the bucket, then obviously we've got one from each
if (bucketSummary.featureCount == 2)
{
result.add(new Pair<Feature, Feature>(set2Features.get(0),
set1Features.get(0)));
}
else if (bucketSummary.featureCount > 2)
{
numConflictBuckets++;
result.add(resolvePairingConflicts(set2Features, set1Features));
}
*/
}
}
//System.err.println("Perfect buckets: " + clusterer.rowsWithOneFromEach(bucketSummaries) + ", conflicts: " + numConflictBuckets);
return result;
}
/**
* Convert a list of Clusterables, assumed to be FeatureMassHydroClusterables, to a list of
* Features
* @param clusterableList
* @return
*/
protected List<Feature>
getFeatureListForClusterableList(List<Clusterer2D.Clusterable> clusterableList)
{
List<Feature> result = new ArrayList<Feature>(clusterableList.size());
for (Clusterer2D.Clusterable clusterable : clusterableList)
{
result.add(((FeatureClusterer.FeatureClusterable) clusterable).getParentFeature());
}
return result;
}
/**
* Pick the closest feature to a given feature by mass, from a list.
* @param baseFeature
* @param featuresToMatch
* @return
*/
protected Feature pickClosestByMass(Feature baseFeature, List<Feature> featuresToMatch)
{
Feature result = null;
//worst value possible
double bestMassDistance = Double.MAX_VALUE;
for (Feature featureToMatch : featuresToMatch)
{
double currentMassDistance = Math.abs(baseFeature.getMass() - featureToMatch.getMass());
if (currentMassDistance < bestMassDistance)
{
bestMassDistance = currentMassDistance;
result = featureToMatch;
}
}
return result;
}
public double getBestMassBucketSize()
{
return bestMassBucketSize;
}
public void setBestMassBucketSize(double bestMassBucketSize)
{
this.bestMassBucketSize = bestMassBucketSize;
}
public double getBestElutionBucketSize()
{
return bestElutionBucketSize;
}
public void setBestElutionBucketSize(double bestElutionBucketSize)
{
this.bestElutionBucketSize = bestElutionBucketSize;
}
public int getNumMassBuckets()
{
return numMassBuckets;
}
public void setNumMassBuckets(int numMassBuckets)
{
this.numMassBuckets = numMassBuckets;
}
public int getNumElutionBuckets()
{
return numElutionBuckets;
}
public void setNumElutionBuckets(int numElutionBuckets)
{
this.numElutionBuckets = numElutionBuckets;
}
public double getMassBucketIncrement()
{
return massBucketIncrement;
}
public void setMassBucketIncrement(double massBucketIncrement)
{
this.massBucketIncrement = massBucketIncrement;
}
public double getElutionBucketIncrement()
{
return elutionBucketIncrement;
}
public void setElutionBucketIncrement(double elutionBucketIncrement)
{
this.elutionBucketIncrement = elutionBucketIncrement;
}
public void setUseMassInsteadOfMz(boolean value)
{
useMassInsteadOfMz = value;
}
}