package com.compomics.util.experiment.identification.psm_scoring.psm_scores;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.Ion;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.biology.ions.PeptideFragmentIon;
import com.compomics.util.experiment.identification.matches.IonMatch;
import com.compomics.util.experiment.identification.peptide_fragmentation.PeptideFragmentationModel;
import com.compomics.util.experiment.identification.spectrum_annotation.AnnotationSettings;
import com.compomics.util.experiment.identification.spectrum_annotation.SpecificAnnotationSettings;
import com.compomics.util.experiment.identification.spectrum_annotation.spectrum_annotators.PeptideSpectrumAnnotator;
import com.compomics.util.experiment.massspectrometry.MSnSpectrum;
import com.compomics.util.experiment.massspectrometry.Peak;
import com.compomics.util.experiment.massspectrometry.indexes.SpectrumIndex;
import com.compomics.util.math.BasicMathFunctions;
import com.compomics.util.math.HistogramUtils;
import com.compomics.util.math.statistics.linear_regression.LinearRegression;
import com.compomics.util.math.statistics.linear_regression.RegressionStatistics;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.commons.math.util.FastMath;
/**
* Hyperscore as variation of the score implemented in X!Tandem
* www.thegpm.org/tandem.
*
* The original X!Tandem score at the link above is governed by the Artistic
* license (https://opensource.org/licenses/artistic-license-1.0). X! tandem is
* a component of the X! proteomics software development project. Copyright
* Ronald C Beavis, all rights reserved.
*
* The code below does not use or reuse any of the X!Tandem code, but the
* scoring approach is the same. No copyright infringement intended.
*
* @author Marc Vaudel
*/
public class HyperScore {
/**
* The peptide fragmentation model to use.
*/
private PeptideFragmentationModel peptideFragmentationModel;
/**
* Histogram of the values found for a in the fitting.
*/
private HashMap<Double, Integer> as = new HashMap<Double, Integer>();
/**
* Histogram of the values found for b in the fitting.
*/
private HashMap<Double, Integer> bs = new HashMap<Double, Integer>();
/**
* Constructor.
*
* @param peptideFragmentationModel the peptide fragmentation model to use
*/
public HyperScore(PeptideFragmentationModel peptideFragmentationModel) {
this.peptideFragmentationModel = peptideFragmentationModel;
}
/**
* Constructor using a uniform fragmentation.
*/
public HyperScore() {
this(PeptideFragmentationModel.uniform);
}
/**
* Returns the hyperscore.
*
* @param peptide the peptide of interest
* @param spectrum the spectrum of interest
* @param annotationSettings the general spectrum annotation settings
* @param specificAnnotationSettings the annotation settings specific to
* this PSM
* @param peptideSpectrumAnnotator the spectrum annotator to use
*
* @return the score of the match
*/
public double getScore(Peptide peptide, MSnSpectrum spectrum, AnnotationSettings annotationSettings, SpecificAnnotationSettings specificAnnotationSettings, PeptideSpectrumAnnotator peptideSpectrumAnnotator) {
ArrayList<IonMatch> matches = peptideSpectrumAnnotator.getSpectrumAnnotation(annotationSettings, specificAnnotationSettings, spectrum, peptide);
boolean peak = false;
Double precursorIntensity = 0.0;
for (IonMatch ionMatch : matches) {
Ion ion = ionMatch.ion;
switch (ion.getType()) {
case PRECURSOR_ION:
precursorIntensity += ionMatch.peak.intensity;
break;
case PEPTIDE_FRAGMENT_ION:
peak = true;
}
}
if (!peak) {
return 0.0;
}
SpectrumIndex spectrumIndex = new SpectrumIndex();
spectrumIndex = (SpectrumIndex) spectrum.getUrParam(spectrumIndex);
if (spectrumIndex == null) {
// Create new index
spectrumIndex = new SpectrumIndex(spectrum.getPeakMap(), spectrum.getIntensityLimit(annotationSettings.getAnnotationIntensityLimit()),
annotationSettings.getFragmentIonAccuracy(), annotationSettings.isFragmentIonPpm());
spectrum.addUrParam(spectrumIndex);
}
Double totalIntensity = spectrumIndex.getTotalIntensity() - precursorIntensity;
double xCorr = 0;
HashSet<Integer> ionsForward = new HashSet<Integer>(1);
HashSet<Integer> ionsRewind = new HashSet<Integer>(1);
HashSet<Double> accountedFor = new HashSet<Double>(matches.size());
for (IonMatch ionMatch : matches) {
Peak peakI = ionMatch.peak;
Double mz = peakI.mz;
Ion ion = ionMatch.ion;
if (ion.getType() != Ion.IonType.PRECURSOR_ION && !accountedFor.contains(mz)) {
accountedFor.add(mz);
Double x0I = peakI.intensity / totalIntensity;
xCorr += x0I;
if (ion.getType() == Ion.IonType.PEPTIDE_FRAGMENT_ION && !ion.hasNeutralLosses()) {
PeptideFragmentIon peptideFragmentIon = (PeptideFragmentIon) ion;
int number = peptideFragmentIon.getNumber();
if (number > 1) {
if (ion.getSubType() == PeptideFragmentIon.X_ION
|| ion.getSubType() == PeptideFragmentIon.Y_ION
|| ion.getSubType() == PeptideFragmentIon.Z_ION) {
ionsForward.add(number);
} else if (ion.getSubType() == PeptideFragmentIon.A_ION
|| ion.getSubType() == PeptideFragmentIon.B_ION
|| ion.getSubType() == PeptideFragmentIon.C_ION) {
ionsRewind.add(number);
}
}
}
}
}
int nForward = ionsForward.size() / (Math.max(specificAnnotationSettings.getPrecursorCharge()-1, 1));
int nRewind = ionsRewind.size() / (Math.max(specificAnnotationSettings.getPrecursorCharge()-1, 1));
nForward = nForward > 20 ? 20 : nForward;
nRewind = nRewind > 20 ? 20 : nRewind;
long forwardFactorial = BasicMathFunctions.factorial(nForward);
long rewindFactorial = BasicMathFunctions.factorial(nRewind);
return xCorr * forwardFactorial * rewindFactorial;
}
/**
* Returns the e-value corresponding to a list of scores in a map. If not
* enough scores are present or if they are not spread the method returns
* null.
*
* @param hyperScores the different scores
*
* @return the e-values corresponding to the given scores
*/
public HashMap<Double, Double> getEValueHistogram(ArrayList<Double> hyperScores) {
HashMap<Integer, Integer> histogram = new HashMap<Integer, Integer>();
Double maxScore = 0.0;
Double minScore = Double.MAX_VALUE;
for (Double score : hyperScores) {
Integer intValue = score.intValue();
if (intValue > 0) {
Integer nScores = histogram.get(intValue);
if (nScores == null) {
nScores = 1;
} else {
nScores++;
}
histogram.put(intValue, nScores);
if (score > maxScore) {
maxScore = score;
}
if (score < minScore) {
minScore = score;
}
}
}
Integer lowestBin = minScore.intValue();
Integer highestBin = maxScore.intValue();
Integer secondEmptybin = highestBin;
Integer firstEmptybin = highestBin;
boolean emptyBin = false;
for (Integer bin = lowestBin; bin <= highestBin; bin++) {
if (!histogram.containsKey(bin)) {
if (!emptyBin) {
emptyBin = true;
firstEmptybin = bin;
} else {
secondEmptybin = bin;
break;
}
}
}
ArrayList<Integer> bins = new ArrayList<Integer>(histogram.keySet());
for (Integer bin : bins) {
if (bin > secondEmptybin) {
histogram.remove(bin);
} else if (bin > firstEmptybin) {
histogram.put(bin, 1);
}
}
bins = new ArrayList<Integer>(histogram.keySet());
Collections.sort(bins, Collections.reverseOrder());
ArrayList<Double> evalueFunctionX = new ArrayList<Double>(histogram.size());
ArrayList<Double> evalueFunctionY = new ArrayList<Double>(histogram.size());
Integer currentSum = 0;
for (Integer bin : bins) {
Integer nInBin = histogram.get(bin);
if (nInBin != null) {
currentSum += nInBin;
}
if (currentSum > 0) {
Double xValue = new Double(bin);
xValue = FastMath.log10(xValue);
evalueFunctionX.add(xValue);
Double yValue = new Double(currentSum);
yValue = FastMath.log10(yValue);
evalueFunctionY.add(yValue);
}
}
if (evalueFunctionX.size() <= 1) {
return null;
}
RegressionStatistics regressionStatistics = LinearRegression.getSimpleLinearRegression(evalueFunctionX, evalueFunctionY);
Double roundedA = Util.roundDouble(regressionStatistics.a, 2);
Double roundedB = Util.roundDouble(regressionStatistics.b, 2);
Integer nA = as.get(roundedA);
if (nA == null) {
as.put(roundedA, 1);
} else {
as.put(roundedA, nA + 1);
}
Integer nB = bs.get(roundedB);
if (nB == null) {
bs.put(roundedB, 1);
} else {
bs.put(roundedB, nB + 1);
}
return getInterpolation(hyperScores, regressionStatistics.a, regressionStatistics.b);
}
/**
* Returns the interpolation of a list of hyperscores using a linear
* interpolation of the form result = a * log(score) + b. If the score is
* null, returns the number of hyperscores. The value at every score is
* returned in a map.
*
* @param hyperScores a list of hyperscores
* @param a the slope of the interpolation
* @param b the offset of the interpolation
*
* @return the interpolation for every score in a map.
*/
public HashMap<Double, Double> getInterpolation(ArrayList<Double> hyperScores, Double a, Double b) {
HashMap<Double, Double> result = new HashMap<Double, Double>();
for (Double hyperScore : hyperScores) {
if (!result.containsKey(hyperScore)) {
if (hyperScore > 0) {
Double logScore = FastMath.log10(hyperScore);
Double eValue = getInterpolation(logScore, a, b);
result.put(hyperScore, eValue);
} else {
Double eValue = new Double(hyperScores.size());
result.put(hyperScore, eValue);
}
}
}
return result;
}
/**
* Returns the interpolated value for a given score in log. result = a *
* logScore + b.
*
* @param logScore the log of the score
* @param a the slope of the interpolation
* @param b the offset of the interpolation
*
* @return the interpolated value
*/
public static Double getInterpolation(Double logScore, Double a, Double b) {
return b + a * logScore;
}
/**
* Returns the rounded median of the as found in the previously interpolated
* scores. Null if none found.
*
* @return the rounded median of the as found in the previously interpolated
* scores
*/
public Double getMendianA() {
if (as.isEmpty()) {
return null;
}
return HistogramUtils.getMedianValue(as);
}
/**
* Returns the rounded median of the bs found in the previously interpolated
* scores. Null if none found.
*
* @return the rounded median of the bs found in the previously interpolated
* scores
*/
public Double getMendianB() {
if (bs.isEmpty()) {
return null;
}
return HistogramUtils.getMedianValue(bs);
}
/**
* Returns a histogram of the as found in the previously interpolated
* scores.
*
* @return a histogram of the as found in the previously interpolated scores
*/
public HashMap<Double, Integer> getAs() {
return as;
}
/**
* Returns a histogram of the bs found in the previously interpolated
* scores.
*
* @return a histogram of the bs found in the previously interpolated scores
*/
public HashMap<Double, Integer> getBs() {
return bs;
}
}