/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.ms2.commandline;
import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl;
import org.fhcrc.cpl.toolbox.commandline.arguments.*;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.IsotopicLabelExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.ProteinUtilities;
import org.fhcrc.cpl.toolbox.filehandler.TabWriter;
import org.fhcrc.cpl.toolbox.statistics.BasicStatistics;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModule;
import org.apache.log4j.Logger;
import java.io.File;
import java.util.*;
/**
* Command linemodule for plotting the mass calibration of a feature file
*/
public class ProteinFractionsSpreadsheetCLM extends BaseViewerCommandLineModuleImpl
implements CommandLineModule
{
protected static Logger _log = Logger.getLogger(ProteinFractionsSpreadsheetCLM.class);
protected File[] featureFiles;
protected File outFile;
protected File protXmlFile;
double minPeptideProphet = 0.0;
double minProteinProphet = 0.0;
protected boolean groupLevel = false;
public ProteinFractionsSpreadsheetCLM()
{
init();
}
protected void init()
{
mCommandName = "proteinfractionsspreadsheet";
mShortDescription = "Create a spreadsheet assigning proteins to fractions";
mHelpMessage = "asdfasdf";
CommandLineArgumentDefinition[] argDefs =
{
createUnnamedSeriesFileArgumentDefinition(true, "MS2 feature files"),
new FileToReadArgumentDefinition("protxml",true,"ProtXML File"),
new DecimalArgumentDefinition("minpprophet",false,"Minimum peptideprophet",minPeptideProphet),
new DecimalArgumentDefinition("minproteinprophet",false,"Minimum proteinprophet",minProteinProphet),
new BooleanArgumentDefinition("grouplevel", false, "Group-level? (default is accesion-number level)", groupLevel),
new FileToWriteArgumentDefinition("out",true, null),
};
addArgumentDefinitions(argDefs);
}
public void assignArgumentValues()
throws ArgumentValidationException
{
featureFiles = this.getUnnamedSeriesFileArgumentValues();
protXmlFile = getFileArgumentValue("protxml");
minPeptideProphet = getDoubleArgumentValue("minpprophet");
minProteinProphet = getDoubleArgumentValue("minproteinprophet");
outFile = getFileArgumentValue("out");
groupLevel = getBooleanArgumentValue("grouplevel");
}
/**
* do the actual work
*/
public void execute() throws CommandLineModuleExecutionException
{
try
{
Map peptideProteinThingyMap = null;
if (groupLevel)
{
Map<String, Set<String>> peptideProteinMap =
ProteinUtilities.loadPeptideProteinMapFromProtXML(protXmlFile,minProteinProphet);
peptideProteinThingyMap = peptideProteinMap;
}
else
{
Map<String, Set<Integer>> peptideProteinGroupMap =
ProteinUtilities.loadPeptideProteinGroupMapFromProtXML(protXmlFile, minProteinProphet);
peptideProteinThingyMap = peptideProteinGroupMap;
}
Map<Object, Set<File>> proteinFileMap = new HashMap<Object, Set<File>>();
List<String> columnNamesList = new ArrayList<String>();
columnNamesList.add("protein");
List<Float> numProteinsPerFile = new ArrayList<Float>();
List<Float> numQuantProteinsPerFile = new ArrayList<Float>();
for (File featureFile : featureFiles)
{
columnNamesList.add(featureFile.getName());
Set proteinsThisFile = new HashSet();
Set quantProteinsThisFile = new HashSet();
FeatureSet featureSet = new FeatureSet(featureFile);
for (Feature feature : featureSet.getFeatures())
{
if (minPeptideProphet > 0 &&
(MS2ExtraInfoDef.getPeptideProphet(feature) < minPeptideProphet))
continue;
String peptide = MS2ExtraInfoDef.getFirstPeptide(feature);
if (peptide == null)
continue;
Set proteinsThisPeptide = (Set) peptideProteinThingyMap.get(peptide);
if (proteinsThisPeptide != null)
{
proteinsThisFile.addAll(proteinsThisPeptide);
if (IsotopicLabelExtraInfoDef.hasRatio(feature))
quantProteinsThisFile.addAll(proteinsThisPeptide);
}
}
for (Object protein : proteinsThisFile)
{
String key = protein.toString();
Set<File> filesThisProtein = proteinFileMap.get(key);
if (filesThisProtein == null)
{
filesThisProtein = new HashSet<File>();
proteinFileMap.put(key,filesThisProtein);
}
filesThisProtein.add(featureFile);
}
numProteinsPerFile.add((float) proteinsThisFile.size());
numQuantProteinsPerFile.add((float) quantProteinsThisFile.size());
}
ApplicationContext.infoMessage("Mean proteins per file: " + BasicStatistics.mean(numProteinsPerFile));
ApplicationContext.infoMessage("Mean QUANT proteins per file: " + BasicStatistics.mean(numQuantProteinsPerFile));
List<Float> filesPerProtein = new ArrayList<Float>();
TabWriter tw = new TabWriter(columnNamesList.toArray(new String[columnNamesList.size()]));
tw.setOutFile(outFile);
for (Object protein : proteinFileMap.keySet())
{
Map<String,Object> row = new HashMap<String,Object>();
row.put("protein",protein);
filesPerProtein.add((float)proteinFileMap.get(protein).size());
for (File file : proteinFileMap.get(protein))
{
row.put(file.getName(), "X");
}
tw.addRow(row);
}
tw.write();
ApplicationContext.infoMessage("Mean files per protein: " + BasicStatistics.mean(filesPerProtein));
}
catch (Exception e)
{
throw new CommandLineModuleExecutionException(e);
}
}
}