/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.commandline.modules; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.filehandler.TabLoader; import org.fhcrc.cpl.toolbox.statistics.BasicStatistics; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithHistogram; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot; import org.fhcrc.cpl.toolbox.proteomics.MS2Modification; import org.fhcrc.cpl.toolbox.proteomics.MassUtilities; import org.fhcrc.cpl.toolbox.proteomics.PeptideUtilities; import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef; import org.fhcrc.cpl.toolbox.proteomics.commandline.arguments.ModificationListArgumentDefinition; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.fhcrc.cpl.toolbox.proteomics.feature.matching.FeatureSetMatcher; import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl; import org.fhcrc.cpl.viewer.amt.AmtPeptideEntry; import org.fhcrc.cpl.viewer.amt.AmtDatabaseFeatureSetGenerator; import org.apache.log4j.Logger; import java.io.*; import java.util.*; /** * Calculates mz values for peptides for a targeted MS/MS experiment */ public class MatchFeatureMassesCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(MatchFeatureMassesCLM.class); protected File[] inputFiles = null; protected File outFile = null; protected File outDir = null; public static final float DEFAULT_DELTA_PPM = 3; public static final float DEFAULT_CALIBRATE_DELTA_PPM = 10; protected float deltaPPM = DEFAULT_DELTA_PPM; protected float calibrateDeltaPPM = DEFAULT_CALIBRATE_DELTA_PPM; List<MapWithMass> mapsWithMassesAsc = new ArrayList<MapWithMass>(); List<String> otherMassFileColNames = new ArrayList<String>(); protected boolean showCharts = false; protected boolean shouldCalibrate = false; public MatchFeatureMassesCLM() { init(); } protected void init() { mCommandName = "matchfeaturemasses"; mShortDescription = "Match a feature set(s) by mass to a specified list of masses"; mHelpMessage = mShortDescription; CommandLineArgumentDefinition[] argDefs = { createUnnamedSeriesFileArgumentDefinition(true, "Feature file(s)"), new FileToReadArgumentDefinition("massesfile", true, "File of masses to match, in column 'mass'. " + "Any other columns in the file will be carried forward into output"), new FileToWriteArgumentDefinition("out", false, null), new DirectoryToWriteArgumentDefinition("outdir", false, null), new DecimalArgumentDefinition("deltappm", false, "Mass tolerance (ppm)", DEFAULT_DELTA_PPM), new DecimalArgumentDefinition("calibratedeltappm", false, "Mass tolerance (ppm) for calibration", DEFAULT_CALIBRATE_DELTA_PPM), new BooleanArgumentDefinition("showcharts", false, "show charts?", showCharts), new BooleanArgumentDefinition("calibrate", false, "calibrate masses based on matches? Beware, this will increase bad matches if " + "there isn't a strong signal of good matches. Calibration is very simple, " + "a ppm linear transformation based on median of uncalibrated match delta mass ppm.", false) }; addArgumentDefinitions(argDefs); } public void assignArgumentValues() throws ArgumentValidationException { inputFiles = getUnnamedSeriesFileArgumentValues(); outFile = getFileArgumentValue("out"); outDir = getFileArgumentValue("outdir"); if (!hasArgumentValue("out") && !hasArgumentValue("outdir")) throw new ArgumentValidationException("No output location specified"); if (hasArgumentValue("out") && hasArgumentValue("outdir")) throw new ArgumentValidationException("out and outdir both specified"); if (inputFiles.length > 1) { assertArgumentPresent("outdir"); } deltaPPM = getFloatArgumentValue("deltappm"); calibrateDeltaPPM = getFloatArgumentValue("calibratedeltappm"); File massesFile = getFileArgumentValue("massesfile"); try { TabLoader loader = new TabLoader(massesFile); boolean foundMass = false; for (TabLoader.ColumnDescriptor column : loader.getColumns()) { if ("mass".equals(column.name)) foundMass = true; else otherMassFileColNames.add(column.name); } if (!foundMass) throw new ArgumentValidationException("masses file doesn't have 'mass' column."); for (Object mapObj : loader.load()) mapsWithMassesAsc.add(new MapWithMass( (Map<String, Object>) mapObj)); } catch (IOException e) { throw new ArgumentValidationException("Failed to load file " + massesFile.getAbsolutePath(),e); } Collections.sort(mapsWithMassesAsc, new MapWithMassComparatorAsc()); _log.info("Loaded " + mapsWithMassesAsc.size() + ". Min=" + mapsWithMassesAsc.get(0).mass + ", Max=" + mapsWithMassesAsc.get(mapsWithMassesAsc.size()-1).mass); showCharts = getBooleanArgumentValue("showcharts"); shouldCalibrate = getBooleanArgumentValue("calibrate"); } /** * do the actual work */ public void execute() throws CommandLineModuleExecutionException { for (File file : inputFiles) processFile(file); } public void processFile(File inputFile) throws CommandLineModuleExecutionException { _log.info("Processing file " + inputFile.getAbsolutePath()); FeatureSet featureSet = null; try { featureSet = new FeatureSet(inputFile); } catch (Exception e) { throw new CommandLineModuleExecutionException(e); } List<Feature> featuresMassAsc = Arrays.asList(featureSet.getFeatures()); Collections.sort(featuresMassAsc, new Feature.MassAscComparator()); if (shouldCalibrate) { _log.info("Calibrating..."); Map<MapWithMass, List<Feature>> matchingResult = massMatchFull(featuresMassAsc, mapsWithMassesAsc, calibrateDeltaPPM); List<Float> matchDeltaMassesPPM = new ArrayList<Float>(); for (MapWithMass map : matchingResult.keySet()) { for (Feature feature : matchingResult.get(map)) { float deltaMass = feature.mass - map.mass; float deltaMassPPM = MassUtilities.convertDaToPPM(deltaMass, map.mass); matchDeltaMassesPPM.add(deltaMassPPM); } } float meanDeltaMassPPM = (float) BasicStatistics.median(matchDeltaMassesPPM); for (Feature feature : featuresMassAsc) { float adjustmentThisFeature = MassUtilities.convertPPMToDa(meanDeltaMassPPM, feature.getMass()); feature.setMass(feature.getMass() - adjustmentThisFeature); feature.updateMz(); } _log.info("Done calibrating. Changed all feature masses by " + -meanDeltaMassPPM + "ppm."); if (showCharts && !matchDeltaMassesPPM.isEmpty()) { new PanelWithHistogram(matchDeltaMassesPPM, "deltaMassPPM preCalibrate").displayInTab(); } } Map<MapWithMass, List<Feature>> matchingResult = massMatchFull(featuresMassAsc, mapsWithMassesAsc, deltaPPM); _log.info("Masses matched: " + matchingResult.size()); File outputFile; if (outFile != null) outputFile = outFile; else { outputFile = new File(outDir, inputFile.getName() + ".matched.tsv"); } List<Float> matchDeltaMassesPPM = new ArrayList<Float>(); List<Float> matchMasses = new ArrayList<Float>(); List<Float> matchLogInts = new ArrayList<Float>(); List<Integer> massMatchedFeatureCounts = new ArrayList<Integer>(); List<Integer> massMatchedChargeStateCounts = new ArrayList<Integer>(); Map<Feature, Integer> featureMatchcountMap = new HashMap<Feature, Integer>(); try { PrintWriter outPW = new PrintWriter(outputFile); StringBuffer headerLineBuf = new StringBuffer("targetmass"); for (String colName : otherMassFileColNames) headerLineBuf.append("\t" + colName); headerLineBuf.append("\tdeltamass\tdeltappm\t"); headerLineBuf.append(Feature.getFeatureHeader(featureSet.getExtraInformationTypesArray())); outPW.println(headerLineBuf.toString()); outPW.flush(); for (MapWithMass map : matchingResult.keySet()) { massMatchedFeatureCounts.add(matchingResult.get(map).size()); Set<Integer> chargeStatesMatched = new HashSet<Integer>(); for (Feature feature : matchingResult.get(map)) { float deltaMass = feature.mass - map.mass; float deltaMassPPM = MassUtilities.convertDaToPPM(deltaMass, map.mass); matchDeltaMassesPPM.add(deltaMassPPM); matchMasses.add(map.mass); matchLogInts.add((float) Math.log(feature.intensity)); chargeStatesMatched.add(feature.charge); int matchCountThisFeature = 0; if (featureMatchcountMap.containsKey(feature)) matchCountThisFeature = featureMatchcountMap.get(feature); featureMatchcountMap.put(feature, matchCountThisFeature + 1); List<String> chunks = new ArrayList<String>(); chunks.add("" + map.mass); for (String otherCol : otherMassFileColNames) chunks.add("" + map.map.get(otherCol)); chunks.add("" + deltaMass); chunks.add("" + deltaMassPPM); String lineStart = MS2ExtraInfoDef.convertStringListToString(chunks, "\t"); String line = lineStart + "\t" + feature.toString(featureSet.getExtraInformationTypesArray()); outPW.println(line); outPW.flush(); } massMatchedChargeStateCounts.add(chargeStatesMatched.size()); } outPW.close(); _log.info("Wrote output file " + outputFile.getAbsolutePath()); } catch (Exception e) { throw new CommandLineModuleExecutionException("Failed to write output file " + outFile.getAbsolutePath()); } if (showCharts && !matchDeltaMassesPPM.isEmpty()) { new PanelWithHistogram(matchDeltaMassesPPM, "deltaMassPPM").displayInTab(); new PanelWithScatterPlot(matchMasses, matchDeltaMassesPPM, "mass_v_deltappm").displayInTab(); new PanelWithHistogram(new ArrayList<Integer>(featureMatchcountMap.values()), "MatchesPerFeature").displayInTab(); new PanelWithHistogram(new ArrayList<Integer>(massMatchedFeatureCounts), "MatchesPerMass").displayInTab(); new PanelWithHistogram(new ArrayList<Integer>(massMatchedChargeStateCounts), "ChargeStatesPerMass").displayInTab(); new PanelWithScatterPlot(matchDeltaMassesPPM, matchLogInts, "deltappm_v_logint").displayInTab(); } } protected static class MapWithMass { public float mass; public Map<String, Object> map; public MapWithMass(float mass, Map<String, Object> map) { this.mass = mass; this.map = map; } public MapWithMass(Map<String, Object> map) { float mass = ((Double) map.get("mass")).floatValue(); this.mass = mass; this.map = map; } } public static class MapWithMassComparatorAsc implements Comparator<MapWithMass> { public int compare(MapWithMass o1, MapWithMass o2) { return o1.mass == o2.mass ? 0 : o1.mass < o2.mass ? -1 : 1; } } /** * This version takes a map from formulas to adducts * @param featuresSortedMassAsc * * @return */ public Map<MapWithMass, List<Feature>> massMatchFull(List<Feature> featuresSortedMassAsc, List<MapWithMass> mapsSortedMassAsc, float deltaPPMThisMatch) { int minPossibleMassIndex = 0; Map<MapWithMass, List<Feature>> result = new HashMap<MapWithMass, List<Feature>>(); for (int featureIndex = 0; featureIndex<featuresSortedMassAsc.size(); featureIndex++) { Feature feature = featuresSortedMassAsc.get(featureIndex); //not using calculated feature mass, because that presumes M+H and subtracts the H float featureMass = feature.getMass(); float massToleranceDa = MassUtilities.calculateAbsoluteDeltaMass(featureMass, deltaPPMThisMatch, FeatureSetMatcher.DELTA_MASS_TYPE_PPM); float minMatchMass = featureMass - massToleranceDa; float maxMatchMass = featureMass + massToleranceDa; while (mapsSortedMassAsc.get(minPossibleMassIndex).mass < minMatchMass && minPossibleMassIndex < mapsSortedMassAsc.size()-1) { minPossibleMassIndex++; } for (int i=minPossibleMassIndex ; i< mapsSortedMassAsc.size(); i++) { MapWithMass mapWithMass = mapsSortedMassAsc.get(i); float mass = mapsSortedMassAsc.get(i).mass; if (mass < minMatchMass) continue; else if (maxMatchMass < mass) break; else { List<Feature> featuresMatchedThisMap = result.get(mapWithMass); if (featuresMatchedThisMap == null) { featuresMatchedThisMap = new ArrayList<Feature>(); result.put(mapWithMass, featuresMatchedThisMap); } featuresMatchedThisMap.add(feature); } } } return result; } }