/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.commandline.modules; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.filehandler.TempFileManager; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.viewer.align.commandline.PeptideArrayCommandLineModule; import org.fhcrc.cpl.viewer.feature.extraction.strategy.FeatureStrategySmallMolecule; import org.fhcrc.cpl.viewer.feature.extraction.strategy.FeatureStrategySmallMoleculeNeg; import java.io.File; import java.util.*; /** * Command Module for feature finding, small molecule version */ public class FindSmallMoleculesCLM extends FindPeptidesCommandLineModule implements CommandLineModule { //used for multiple levels of log messages protected static Logger _log = Logger.getLogger(FindSmallMoleculesCLM.class); //more appropriate default than the smaller default for peptides public static final int DEFAULT_SCAN_WINDOW=10000; public static final int DEFAULT_STITCH_ELUTION_SECONDS = 600; protected int stitchElutionSeconds = DEFAULT_STITCH_ELUTION_SECONDS; protected boolean shouldMassFilter = true; protected boolean shouldStitch = false; public static final int SM_MOL_MINPEAKS = 1; public static final float SM_MOL_MAXMZ = 1000; public static final float SM_MOL_MAXMASS = 1000; public static final float SM_MOL_MINMZ = 100; public static final float SM_MOL_MAXKL = 5; public static final int SM_MOL_MAXCHARGE = 1; public static final float SM_MOL_MININTENSITY = 30; protected float stitchMassTolPPM = 1; protected float mfDivideBy = 1000f; protected float mfAdd = 0.1f; public void init() { super.init(); //override default from superclass scanWindowSize = FindSmallMoleculesCLM.DEFAULT_SCAN_WINDOW; //command name mCommandName = "findsmallmolecules"; //A longer help message mHelpMessage = "find small-molecule features, in positive ion mode. This command adjusts several of the " + "default parameters from findpeptides and uses the appropriate small-molecule-finding feature strategy " + "for positive or negative ion mode"; //A short (single-sentence) description of this command mShortDescription = "Find small-molecule features in an mzXML file, from a positive ion mode run"; addArgumentDefinition(new BooleanArgumentDefinition("posionmode", false, "Positive ion mode? (false: negative ion mode)", true)); addArgumentDefinition(new BooleanArgumentDefinition("stitch", false, "Stitch features together if they have the same mass and drop below threshold?", true)); addArgumentDefinition(new IntegerArgumentDefinition("stitchseconds", false, "Number of seconds over which to stitch together serially-eluting features with the same mass", DEFAULT_STITCH_ELUTION_SECONDS)); addArgumentDefinition(new BooleanArgumentDefinition("massfilter", false, "Apply mass filter? (d <= .0001m + 0.1, where d is the decimal part of the mass and m is the mass)", true)); addArgumentDefinition(new DecimalArgumentDefinition("maxtime", false, "Maximum retention time for features (seconds)", Float.MAX_VALUE)); addArgumentDefinition(new DecimalArgumentDefinition("maxmass", false, "Maximum mass for features", SM_MOL_MAXMASS)); } public void assignArgumentValues() throws ArgumentValidationException { super.assignArgumentValues(); shouldStitch = getBooleanArgumentValue("stitch"); //this is a bit of a hack for backward-compatibility. Include charge-2 features in the first //stage of feature-finding. They'll be filtered out later based on the maxcharge argument maxCharge = 2; //we only want metabolites if they have accurate mzs known featureSelector.setAccurateMzOnly(true); if (!hasArgumentValue("scanwindow")) { scanWindowSize = DEFAULT_SCAN_WINDOW; } if (!hasArgumentValue("minpeaks")) featureSelector.setMinPeaks(SM_MOL_MINPEAKS); if (!hasArgumentValue("maxmz")) featureSelector.setMaxMz(SM_MOL_MAXMZ); if (!hasArgumentValue("minmz")) featureSelector.setMinMz(SM_MOL_MINMZ); if (!hasArgumentValue("maxkl")) featureSelector.setMaxKL(SM_MOL_MAXKL); if (!hasArgumentValue("maxcharge")) { featureSelector.setMaxCharge(SM_MOL_MAXCHARGE); } else featureSelector.setMaxCharge(getIntegerArgumentValue("maxcharge")); if (!hasArgumentValue("minintensity")) { featureSelector.setMinIntensity(SM_MOL_MININTENSITY); } featureSelector.setMaxTime(getFloatArgumentValue("maxtime")); featureSelector.setMaxMass(getFloatArgumentValue("maxmass")); //This is hacky. Properly I should remove that argument definition, but superclass code will fail if it's //not there if (hasArgumentValue("strategy")) throw new ArgumentValidationException("'strategy' argument not allowed for this command. " + "Apologies for the confusion."); if (getBooleanArgumentValue("posionmode")) featureStrategyClass = FeatureStrategySmallMolecule.class; else featureStrategyClass = FeatureStrategySmallMoleculeNeg.class; stitchElutionSeconds = getIntegerArgumentValue("stitchseconds"); shouldMassFilter = getBooleanArgumentValue("massfilter"); } /** * Cosmetic override for superclass function. Names files ".features.tsv" instead of ".peptides.tsv" * @param mzXmlFile * @return */ protected File calcOutputFile(File mzXmlFile) { String mzXmlFileName = mzXmlFile.getName(); int mzXmlFileNameLength = mzXmlFileName.length(); String outputFileName; if (mzXmlFileName.toLowerCase().endsWith(".mzxml")) outputFileName = mzXmlFileName.substring(0, mzXmlFileNameLength - ".mzxml".length()); else if (mzXmlFileName.endsWith(".xml")) outputFileName = mzXmlFileName.substring(0, mzXmlFileNameLength - ".xml".length()); else outputFileName = mzXmlFileName; //This is the only bit that's different String outputExtension = ".features.tsv"; if ("APML".equalsIgnoreCase(featureFileFormat)) outputExtension = ".apml.xml"; outputFileName = outputFileName + outputExtension; return new File(outDir, outputFileName); } /** * workflow */ public void execute() throws CommandLineModuleExecutionException { System.err.println("Finding features with strategy " + featureStrategyClass.getName() + "..."); super.execute(); System.err.println("Features found. Post-processing..."); List<File> origOutputFiles = new ArrayList<File>(); if (outFile != null) origOutputFiles.add(outFile); else { for (File file : mzXmlFiles) { origOutputFiles.add(calcOutputFile(file)); } } if (shouldMassFilter) { System.err.println("Applying mass filter..."); for (File file : origOutputFiles) { try { FeatureSet featureSet = new FeatureSet(file); List<Feature> features = Arrays.asList(featureSet.getFeatures()); List<Feature> newFeatures = new ArrayList<Feature>(); for (Feature feature : features) { double d = feature.mz - Math.floor(feature.mz); if (d <= (feature.mz / mfDivideBy + mfAdd)) newFeatures.add(feature); } featureSet.setFeatures(newFeatures.toArray(new Feature[newFeatures.size()])); featureSet.save(file); _log.debug("Removed " + (features.size() - newFeatures.size()) + " features."); } catch (Exception e) { throw new CommandLineModuleExecutionException("Error filtering masses for output file " + file.getAbsolutePath(), e); } } } if (shouldStitch) { for (File origOutputFile : origOutputFiles) { File arrFile = TempFileManager.createTempFile(origOutputFile.getName() + ".peparray.tsv", this); PeptideArrayCommandLineModule pepCLM = new PeptideArrayCommandLineModule(); pepCLM.setInputFiles(new File[] { arrFile }); pepCLM.setOutFile(arrFile); Map<String, String> arrayCLMArgMap = new HashMap<String, String>(); arrayCLMArgMap.put("align", "false"); //hardcoded tolerance but doesn't matter -- this is just identity arrayCLMArgMap.put("masswindow", "" + stitchMassTolPPM); arrayCLMArgMap.put("masstype", "ppm"); arrayCLMArgMap.put("elutionwindow", "" + stitchElutionSeconds); arrayCLMArgMap.put("elutionmode", "time"); arrayCLMArgMap.put("normalize", "false"); arrayCLMArgMap.put(CommandLineArgumentDefinition.UNNAMED_PARAMETER_VALUE_SERIES_ARGUMENT, origOutputFile.getAbsolutePath()); arrayCLMArgMap.put("out",arrFile.getAbsolutePath()); try { pepCLM.digestArguments(arrayCLMArgMap); } catch (Exception e) { throw new CommandLineModuleExecutionException(e); } ApplicationContext.infoMessage("Building ion array " + arrFile.getAbsolutePath() + "..."); pepCLM.execute(); ApplicationContext.infoMessage("Built ion array " + arrFile.getAbsolutePath() ); ConsensusFeatureFileCLM consensusCLM = new ConsensusFeatureFileCLM(); Map<String, String> consensusCLMArgMap = new HashMap<String, String>(); consensusCLMArgMap.put("peparray", arrFile.getAbsolutePath()); consensusCLMArgMap.put("out", origOutputFile.getAbsolutePath()); consensusCLMArgMap.put("intensitymode", "max"); consensusCLMArgMap.put("minfeatureruns", "1"); try { consensusCLM.digestArguments(consensusCLMArgMap); } catch (Exception e) { throw new CommandLineModuleExecutionException(e); } ApplicationContext.infoMessage("Building stitched feature file " + origOutputFile.getAbsolutePath() + "..."); consensusCLM.execute(); ApplicationContext.infoMessage("Built stitched feature file " + origOutputFile.getAbsolutePath()); } } ApplicationContext.infoMessage("Done."); } }