/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.ms2.commandline; import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.proteomics.ProteinUtilities; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.proteomics.Protein; import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator; import org.fhcrc.cpl.toolbox.proteomics.Peptide; import java.io.File; import java.util.Set; import java.util.HashSet; /** * Command linemodule for plotting the mass calibration of a feature file */ public class CompareFastasCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(CompareFastasCLM.class); protected File fastaFile1 = null; protected File fastaFile2 = null; protected File outFile = null; int minPeptideLength = 1; public CompareFastasCLM() { init(); } protected void init() { mCommandName = "comparefastas"; mShortDescription = "compare fastas"; mHelpMessage = "compare fastas"; CommandLineArgumentDefinition[] argDefs = { new FileToReadArgumentDefinition("fasta1",true, "FASTA file 1"), new FileToReadArgumentDefinition("fasta2",true, "FASTA file 2"), new FileToWriteArgumentDefinition("out",true,"output file"), new IntegerArgumentDefinition("minpeptidelength", false, "minimum peptide length to consider", minPeptideLength), }; addArgumentDefinitions(argDefs); } public void assignArgumentValues() throws ArgumentValidationException { fastaFile1 = getFileArgumentValue("fasta1"); fastaFile2 = getFileArgumentValue("fasta2"); outFile = getFileArgumentValue("out"); minPeptideLength = getIntegerArgumentValue("minpeptidelength"); } /** * do the actual work */ public void execute() throws CommandLineModuleExecutionException { Set<String> peptides1 = loadPeptidesFromFasta(fastaFile1); Set<String> peptides2 = loadPeptidesFromFasta(fastaFile2); Set<String> peptidesUnique1 = new HashSet<String>(); Set<String> peptidesUnique2 = new HashSet<String>(); Set<String> commonPeptides = new HashSet<String>(); Set<String> allPeptides = new HashSet<String>(); for (String peptide1 : peptides1) { allPeptides.add(peptide1); if (peptides2.contains(peptide1)) commonPeptides.add(peptide1); else peptidesUnique1.add(peptide1); } for (String peptide2 : peptides2) { allPeptides.add(peptide2); if (!peptides1.contains(peptide2)) peptidesUnique2.add(peptide2); } ApplicationContext.infoMessage("Total peptides: " + allPeptides.size()); ApplicationContext.infoMessage("Unique peptides in fasta 1: " + peptidesUnique1.size() + " (" + (100 * peptidesUnique1.size() / allPeptides.size()) + "%)"); ApplicationContext.infoMessage("Unique peptides in fasta 2: " + peptidesUnique2.size()+ " (" + (100 * peptidesUnique2.size() / allPeptides.size()) + "%)"); ApplicationContext.infoMessage("Common peptides: " + commonPeptides.size()+ " (" + (100 * commonPeptides.size() / allPeptides.size()) + "%)"); } protected Set<String> loadPeptidesFromFasta(File fastaFile) { Protein[] fastaProteins = ProteinUtilities.loadProteinsFromFasta(fastaFile).toArray(new Protein[0]); PeptideGenerator pg = new PeptideGenerator(); Set<String> result = new HashSet<String>(); for (Protein protein : fastaProteins) { Peptide[] peptidesThisProtein = pg.digestProtein(protein); for (Peptide peptideThisProtein : peptidesThisProtein) { if (peptideThisProtein.getChars().length >= minPeptideLength) result.add(new String(peptideThisProtein.getChars())); } } return result; } }