/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.ms2.commandline; import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef; import org.fhcrc.cpl.toolbox.proteomics.ProteinUtilities; import org.fhcrc.cpl.viewer.ms2.GeneMappingUtilities; import org.fhcrc.cpl.toolbox.filehandler.TabWriter; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithHistogram; import org.fhcrc.cpl.toolbox.gui.chart.ChartDialog; import org.apache.log4j.Logger; import java.io.*; import java.util.*; /** * */ public class SpectralCountCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(SpectralCountCLM.class); protected File protXmlFile = null; protected FeatureSet[] ms2FeatureSets = null; protected File geneLookupFile = null; protected File outFile = null; protected boolean showCharts = false; protected int mode; protected final static String[] modeStrings = { "peptide", "gene", "proteingroup", "protein" }; protected final static String[] modeExplanations = { "Peptide-level counts", "Gene-level counts", "protein group-level counts", "protein-level counts" }; protected static final int MODE_PEPTIDE = 0; protected static final int MODE_GENE = 1; protected static final int MODE_PROTEIN_GROUP = 2; protected static final int MODE_PROTEIN = 3; public SpectralCountCLM() { init(); } protected void init() { mCommandName = "spectralcount"; mShortDescription = "Create a spreadsheet with spectral count information"; mHelpMessage = "Create a spreadsheet with spectral count information"; CommandLineArgumentDefinition[] argDefs = { new EnumeratedValuesArgumentDefinition("mode",true,modeStrings, modeExplanations), new FileToReadArgumentDefinition("protxml", false, "ProtXML file"), createUnnamedSeriesFileArgumentDefinition(true, "MS2 Feature file(s)"), new FileToReadArgumentDefinition("genelookupfile", false, "Gene lookup file for IPI numbers"), new FileToWriteArgumentDefinition("out", false, "output file"), new BooleanArgumentDefinition("showcharts", false, "show charts?", showCharts), }; addArgumentDefinitions(argDefs); } public void assignArgumentValues() throws ArgumentValidationException { protXmlFile = getFileArgumentValue("protxml"); File[] ms2FeatureFiles = this.getUnnamedSeriesFileArgumentValues(); ms2FeatureSets = new FeatureSet[ms2FeatureFiles.length]; try { for (int i=0; i<ms2FeatureFiles.length; i++) ms2FeatureSets[i] = new FeatureSet(ms2FeatureFiles[i]); } catch (Exception e) { throw new ArgumentValidationException(e); } geneLookupFile = getFileArgumentValue("genelookupfile"); mode = ((EnumeratedValuesArgumentDefinition) getArgumentDefinition("mode")).getIndexForArgumentValue(getStringArgumentValue("mode")); switch(mode) { case MODE_PEPTIDE: assertArgumentAbsent("protxml","mode"); assertArgumentAbsent("genelookupfile","mode"); break; case MODE_GENE: assertArgumentPresent("protxml","mode"); assertArgumentPresent("genelookupfile","mode"); break; case MODE_PROTEIN_GROUP: case MODE_PROTEIN: assertArgumentPresent("protxml","mode"); break; } outFile = getFileArgumentValue("out"); showCharts = getBooleanArgumentValue("showcharts"); } /** * do the actual work */ public void execute() throws CommandLineModuleExecutionException { Map<String,Integer>[] peptideSpectralCountMaps = new Map[ms2FeatureSets.length]; for (int i=0; i<ms2FeatureSets.length; i++) { Map<String,Integer> peptideSpectralCountMap = new HashMap<String,Integer>(); for (Feature feature : ms2FeatureSets[i].getFeatures()) { String featurePeptide = MS2ExtraInfoDef.getFirstPeptide(feature); if (featurePeptide == null) continue; Integer currentCount = peptideSpectralCountMap.get(featurePeptide); if (currentCount == null) { currentCount = 0; } peptideSpectralCountMap.put(featurePeptide, currentCount+1); } peptideSpectralCountMaps[i] = peptideSpectralCountMap; } switch(mode) { case MODE_PEPTIDE: doPeptides(peptideSpectralCountMaps); break; case MODE_GENE: doGenes(peptideSpectralCountMaps); break; case MODE_PROTEIN_GROUP: doProteinGroups(peptideSpectralCountMaps); break; case MODE_PROTEIN: doProteins(peptideSpectralCountMaps); break; } } protected void doPeptides(Map<String,Integer>[] peptideSpectralCountMaps) throws CommandLineModuleExecutionException { if (outFile != null) { Map<String,Integer> peptideSpectralCountMap = peptideSpectralCountMaps[0]; String[] columns = new String[] {"peptide","spectra"}; List<Map<String,Object>> rowsList = new ArrayList<Map<String,Object>>(peptideSpectralCountMap.size()); for (String peptide : peptideSpectralCountMap.keySet()) { Map<String,Object> row = new HashMap<String,Object>(); row.put("peptide",peptide); row.put("spectra",peptideSpectralCountMap.get(peptide)); rowsList.add(row); } TabWriter tabWriter = new TabWriter(columns, rowsList, outFile); try { tabWriter.write(); } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } } if (showCharts) { PanelWithHistogram pwh = new PanelWithHistogram("peptide spectral counts"); pwh.setOffsetSeries(true); for (int i=0; i<peptideSpectralCountMaps.length; i++) { List<Double> specCounts = new ArrayList<Double>(); for (Integer specCount : peptideSpectralCountMaps[i].values()) specCounts.add((double) specCount); pwh.addData(specCounts, ms2FeatureSets[i].getSourceFile().getName()); } ChartDialog cd = new ChartDialog(pwh); cd.setVisible(true); } } protected void doProteins(Map<String,Integer>[] peptideSpectralCountMaps) throws CommandLineModuleExecutionException { Map<String,Integer> peptideSpectralCountMap = peptideSpectralCountMaps[0]; Map<String, Set<String>> peptideProteinMap; try { peptideProteinMap = ProteinUtilities.loadPeptideProteinMapFromProtXML(protXmlFile,0); } catch (Exception e) { throw new CommandLineModuleExecutionException("Error parsing protxml file",e); } Map<String, Integer> proteinSpectralCountMap = new HashMap<String,Integer>(); for (String peptide : peptideProteinMap.keySet()) { if (!peptideSpectralCountMap.containsKey(peptide)) continue; Set<String> proteinsThisPeptide = peptideProteinMap.get(peptide); for (String protein : proteinsThisPeptide) { Integer proteinGroupSpectralCount = proteinSpectralCountMap.get(protein); if (proteinGroupSpectralCount == null) { proteinGroupSpectralCount = 0; } proteinSpectralCountMap.put(protein, proteinGroupSpectralCount + peptideSpectralCountMap.get(peptide)); } } String[] columns = new String[] {"protein","spectra"}; List<Map<String,Object>> rowsList = new ArrayList<Map<String,Object>>(proteinSpectralCountMap.size()); for (String protein : proteinSpectralCountMap.keySet()) { Map<String,Object> row = new HashMap<String,Object>(); row.put("protein",protein); row.put("spectra",proteinSpectralCountMap.get(protein)); rowsList.add(row); } if (outFile != null) { TabWriter tabWriter = new TabWriter(columns, rowsList, outFile); try { tabWriter.write(); } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } } if (showCharts) { List<Float> specCounts = new ArrayList<Float>(); for (Integer specCount : proteinSpectralCountMap.values()) specCounts.add((float) specCount); PanelWithHistogram pwh = new PanelWithHistogram(specCounts, "protein spectral counts"); pwh.displayInTab(); } } protected void doProteinGroups(Map<String,Integer>[] peptideSpectralCountMaps) throws CommandLineModuleExecutionException { Map<String,Integer> peptideSpectralCountMap = peptideSpectralCountMaps[0]; Map<String, Set<Integer>> peptideGroupMap; try { peptideGroupMap = ProteinUtilities.loadPeptideProteinGroupMapFromProtXML(protXmlFile,0); } catch (Exception e) { throw new CommandLineModuleExecutionException("Error parsing protxml file",e); } Map<Integer, Integer> proteinGroupSpectralCountMap = new HashMap<Integer,Integer>(); for (String peptide : peptideGroupMap.keySet()) { if (!peptideSpectralCountMap.containsKey(peptide)) continue; Set<Integer> proteinGroupsThisPeptide = peptideGroupMap.get(peptide); for (Integer proteinGroup : proteinGroupsThisPeptide) { Integer proteinGroupSpectralCount = proteinGroupSpectralCountMap.get(proteinGroup); if (proteinGroupSpectralCount == null) { proteinGroupSpectralCount = 0; } proteinGroupSpectralCountMap.put(proteinGroup, proteinGroupSpectralCount + peptideSpectralCountMap.get(peptide)); } } String[] columns = new String[] {"proteingroup","spectra"}; List<Map<String,Object>> rowsList = new ArrayList<Map<String,Object>>(proteinGroupSpectralCountMap.size()); for (Integer proteinGroup : proteinGroupSpectralCountMap.keySet()) { Map<String,Object> row = new HashMap<String,Object>(); row.put("proteingroup",proteinGroup); row.put("spectra",proteinGroupSpectralCountMap.get(proteinGroup)); rowsList.add(row); } if (outFile != null) { TabWriter tabWriter = new TabWriter(columns, rowsList, outFile); try { tabWriter.write(); } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } } if (showCharts) { List<Float> specCounts = new ArrayList<Float>(); for (Integer specCount : proteinGroupSpectralCountMap.values()) specCounts.add((float) specCount); PanelWithHistogram pwh = new PanelWithHistogram(specCounts, "protein spectral counts"); pwh.displayInTab(); } } protected void doGenes(Map<String,Integer>[] peptideSpectralCountMaps) throws CommandLineModuleExecutionException { Map<String,Integer> peptideSpectralCountMap = peptideSpectralCountMaps[0]; Map<String, Set<String>> peptideIPIMap; try { peptideIPIMap = ProteinUtilities.loadPeptideProteinMapFromProtXML(protXmlFile,0); } catch (Exception e) { throw new CommandLineModuleExecutionException("Error parsing protxml file",e); } Map<String,List<String>> ipiGeneArrayMap = GeneMappingUtilities.loadIPIGeneMap(geneLookupFile); Set<String> geneSet = new HashSet<String>(); for (List<String> genearray : ipiGeneArrayMap.values()) for (String gene : genearray) geneSet.add(gene); Map<String, Integer> geneSpectralCountMap = new HashMap<String,Integer>(); for (String peptide : peptideSpectralCountMap.keySet()) { Set<String> genesAlreadyUpdated = new HashSet<String>(); Set<String> proteinsThisPeptide = peptideIPIMap.get(peptide); if (proteinsThisPeptide == null) continue; for (String ipi : proteinsThisPeptide) { List<String> genesThisIPI = ipiGeneArrayMap.get(ipi); if (genesThisIPI == null) continue; for (String gene : genesThisIPI) { if (genesAlreadyUpdated.contains(gene)) continue; Integer geneSpectralCount = geneSpectralCountMap.get(gene); if (geneSpectralCount == null) geneSpectralCount = 0; geneSpectralCountMap.put(gene, geneSpectralCount + peptideSpectralCountMap.get(peptide)); genesAlreadyUpdated.add(gene); } } } String[] columns = new String[] {"gene","spectra"}; List<Map<String,Object>> rowsList = new ArrayList<Map<String,Object>>(geneSpectralCountMap.size()); for (String peptide : peptideSpectralCountMap.keySet()) { Map<String,Object> row = new HashMap<String,Object>(); row.put("gene",peptide); row.put("spectra",peptideSpectralCountMap.get(peptide)); rowsList.add(row); } if (outFile != null) { TabWriter tabWriter = new TabWriter(columns, rowsList, outFile); try { tabWriter.write(); } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } } } }