/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.commandline.modules; import org.fhcrc.cpl.toolbox.commandline.arguments.ArgumentValidationException; import org.fhcrc.cpl.toolbox.commandline.arguments.CommandLineArgumentDefinition; import org.fhcrc.cpl.toolbox.commandline.arguments.FileToReadArgumentDefinition; import org.fhcrc.cpl.toolbox.proteomics.ProteinUtilities; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.statistics.BasicStatistics; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.fhcrc.cpl.toolbox.proteomics.Protein; import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator; import org.fhcrc.cpl.toolbox.proteomics.Peptide; import org.apache.log4j.Logger; import java.util.*; import java.io.File; /** * Command line module for analyzing a hydrophobicity algorithm. * This implementation analyzes Krokhin's v3 algorithm. To analyze another algorithm, * make that algorithm available as a Java routine and call it from * calculateHydrophobicityWithAlgorithm */ public class HydrophobicityAlgorithmAnalyzerCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(HydrophobicityAlgorithmAnalyzerCLM.class); protected File fastaFile; public HydrophobicityAlgorithmAnalyzerCLM() { init(); } /** * This method does the actual calling of the hydrophobicity algorithm. * * To change the algorithm analyzed by this class, edit this method. * @param peptideSequence * @return */ protected double calculateHydrophobicityWithAlgorithm(String peptideSequence) { Protein fakeProtein = new Protein("fake",peptideSequence.getBytes()); Peptide peptide = new Peptide(fakeProtein,0,fakeProtein.getBytes().length); return peptide.getHydrophobicity3(); } protected void init() { mCommandName = "analyzehydroalg"; mShortDescription = "Template command for analyzing a hydrophobicity algorithm"; mHelpMessage = "This command analyzes the Krokhin peptide hydrophobicity prediction algorithm (v3). It runs the algorithm on all tryptic peptides from a given FASTA file, and returns the mean and standard deviation.\n" + "It also serves as a template for analyzing other algorithms."; CommandLineArgumentDefinition[] argDefs = { new FileToReadArgumentDefinition("fasta",true,"fasta file containing database to digest") }; addArgumentDefinitions(argDefs); } public void assignArgumentValues() throws ArgumentValidationException { fastaFile = getFileArgumentValue("fasta"); } /** * do the actual work */ public void execute() throws CommandLineModuleExecutionException { List<Protein> proteins = ProteinUtilities.loadProteinsFromFasta(fastaFile); Set<String> peptideStrings = new HashSet<String>(); ApplicationContext.infoMessage("FASTA file contains " + proteins.size() + " proteins. Digesting..."); Map<String,Double> peptideHydrophobicityScores = new HashMap<String,Double>(); int i=0; for (Protein protein : proteins) { i++; if (i > 0 && (i % (proteins.size()/5) == 0)) System.err.println(""+ (i*100/proteins.size()) + " % complete"); PeptideGenerator peptideGenerator = new PeptideGenerator(); //use tryptic digest peptideGenerator.setDigest(PeptideGenerator.DIGEST_TRYPTIC); //allow one missed cleavage peptideGenerator.setMaxMissedCleavages(0); //set minimum residues peptideGenerator.setMinResidues(6 - 1); Peptide[] thisProteinPeptides = peptideGenerator.digestProtein(protein); for (Peptide peptide : thisProteinPeptides) peptideStrings.add(new String(peptide.getChars())); } Date startTime = new Date(); i=0; int numPeptides = peptideStrings.size(); ApplicationContext.infoMessage("Found " + numPeptides + " peptides in FASTA file"); for (String peptideString : peptideStrings) { //if (numPeptides > 25000) //{ // System.err.println("Hit peptide limit"); // break; //} i++; if (i > 0 && (i % (numPeptides/20) == 0)) ApplicationContext.infoMessage("***** "+ (i*100/numPeptides) + " % complete *****"); peptideHydrophobicityScores.put(peptideString, calculateHydrophobicityWithAlgorithm(peptideString)); } long deltaMS = (new Date().getTime() - startTime.getTime()); ApplicationContext.infoMessage("Elapsed time: " + ( deltaMS/ 1000) + " seconds, " + deltaMS + " milliseconds"); ApplicationContext.infoMessage("database contains " + peptideHydrophobicityScores.size() + " distinct peptides"); double[] hydroValues = new double[peptideHydrophobicityScores.size()]; i=0; for (Double hydro : peptideHydrophobicityScores.values()) hydroValues[i++] = hydro; double meanHydro = BasicStatistics.mean(hydroValues); double stddevHydro = BasicStatistics.standardDeviation(hydroValues); ApplicationContext.infoMessage("Mean hydrophobicity: " + meanHydro); ApplicationContext.infoMessage("Hydrophobicity standard deviation: " + stddevHydro); } }