/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.amt;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import java.util.Map;
import java.util.HashMap;
/**
* Converts hydrophobicity values from various algorithms to normalized
* values. Stores information about all known hydrophobicity algorithms.
*
* Normalized values have a mean of 0 and a standard deviation of 1 when
* calculated for the entire IPI human database.
*
* Normalization of an algorithm is performed by calculating the mean and
* standard deviation of that algorithm on the all peptides in theentire
* IPI human database. Minimum peptide length 6, one missed cleavage allowed.
*
* Individual values are normalized by scaling and shifting the value
* appropriately.
*
* The IPI human database used for these calculations is the human IPI
* database released on July 13, 2006, filename ipi.HUMAN.fasta.20060713
*
*/
public class HydrophobicityNormalizer
{
private static Logger _log = Logger.getLogger(HydrophobicityNormalizer.class);
//constants for easy reference
//Krokhin's algorithm
public static final double KROKHIN_1_MEAN=30.763700485229492;
public static final double KROKHIN_1_STDDEV=21.886646343829856;
public static final double KROKHIN_3_MEAN=29.575716018676758;
public static final double KROKHIN_3_STDDEV=17.596638381015442;
//redone with no missed cleavages
//public static final double KROKHIN_3_MEAN=27.470834732055664;
//public static final double KROKHIN_3_STDDEV=17.091617040482113;
//predictNET
public static final double PREDICTNET_1_MEAN=0.337;
public static final double PREDICTNET_1_STDDEV=0.210;
//Key is based on algorithm name and version, as built by constructKey().
//Value is a pair containing mean and standard deviation, in that order.
protected static Map<String, Pair<Double,Double>> _algStatsMap = null;
static
{
_algStatsMap = new HashMap<String, Pair<Double,Double>>();
addAlgorithmStatistics("krokhin", 1.0,
KROKHIN_1_MEAN,KROKHIN_1_STDDEV);
addAlgorithmStatistics("krokhin", 3.0,
KROKHIN_3_MEAN,KROKHIN_3_STDDEV);
addAlgorithmStatistics("predictnet", 1.0,
PREDICTNET_1_MEAN,PREDICTNET_1_STDDEV);
}
/**
* For adding stats on new algorithms. Public, so it can be added by any
* standard or custom piece of code
* @param algorithmName
* @param algorithmVersion
* @param mean
* @param stddev
*/
public static void addAlgorithmStatistics(String algorithmName,
double algorithmVersion,
double mean, double stddev)
{
_algStatsMap.put(constructKey(algorithmName, algorithmVersion),
new Pair<Double,Double>(mean, stddev));
}
protected static String constructKey(String algorithmName,
double algorithmVersion)
{
return algorithmName.toLowerCase() + "_" +
algorithmVersion;
}
public static boolean algorithmVersionKnown(String algorithmName,
double algorithmVersion)
{
return _algStatsMap.containsKey(constructKey(algorithmName, algorithmVersion));
}
/**
* Normalize a hydrophobicity value calculated using the specified
* algorithm name and version: subtract the mean, divide by the
* standard deviation.
*
* Will throw NullPointerException if algorithm info isn't there
* @param inputHydrophobicity
* @param algorithmName
* @param algorithmVersion
* @return
*/
public static double normalize(double inputHydrophobicity,
String algorithmName,
double algorithmVersion)
{
double result;
try
{
Pair<Double,Double> algInfo =
_algStatsMap.get(constructKey(algorithmName, algorithmVersion));
//shift by the mean, to center on 0
result = inputHydrophobicity - algInfo.first;
//divide by the standard deviation, so that the standard deviation of
//normalized values will be 1
result = result / algInfo.second;
}
catch (NullPointerException npe)
{
_log.error("normalize: Failed to find algorithm information for algorithm " +
algorithmName + ", version " + algorithmVersion);
throw npe;
}
return result;
}
}