/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.metabologna.commandline; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.filehandler.TabLoader; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.Rounder; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.chem.*; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleUtilities; import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl; import org.fhcrc.cpl.viewer.metabologna.MetaboliteDatabaseMatcher; import org.fhcrc.cpl.viewer.metabologna.ReduceDoubleBondAdd2HMod; import org.fhcrc.cpl.viewer.metabologna.ReduceDoubleBondAddWaterMod; import org.apache.log4j.Logger; import org.fhcrc.cpl.viewer.metabologna.UnknownHAdditionMod; import java.io.*; import java.util.*; /** */ public class MatchMetMassesCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(MatchMetMassesCLM.class); protected File[] inputFiles; protected MetaboliteDatabaseMatcher metDBMatcher; protected File outFile; protected File outDir; protected float massTolerancePPM = 2f; protected List<ChemicalCompound> databaseCompoundsByMass = null; protected TabLoader loader; protected boolean shouldCollapseByMax = false; protected boolean shouldFixFeatureMasses = false; protected boolean shouldUseBaseMod = false; public MatchMetMassesCLM() { init(); } protected void init() { mCommandName = "matchmetmasses"; mShortDescription = "Match metabolite masses"; mHelpMessage = mShortDescription; CommandLineArgumentDefinition[] argDefs = { this.createUnnamedSeriesFileArgumentDefinition(true, "Input masses spreadsheet file"), new FileToReadArgumentDefinition("metdb", true, "Metabolite database file"), new FileToWriteArgumentDefinition("out", false, "out"), new DirectoryToWriteArgumentDefinition("outdir", false, "outdir"), new BooleanArgumentDefinition("fixfeaturemasses", false, "Should fix feature masses so that " + "they are mz * z? (different from peptide mass)", shouldFixFeatureMasses), new DecimalArgumentDefinition("deltappm", false, "Delta mass (ppm)", massTolerancePPM), new BooleanArgumentDefinition("usebasemod", false, "Use base mass, i.e., [M]?", shouldUseBaseMod), }; addArgumentDefinitions(argDefs); } public void assignArgumentValues() throws ArgumentValidationException { inputFiles = this.getUnnamedSeriesFileArgumentValues(); shouldFixFeatureMasses = getBooleanArgumentValue("fixfeaturemasses"); shouldUseBaseMod = getBooleanArgumentValue("usebasemod"); metDBMatcher = new MetaboliteDatabaseMatcher(); List<ChemicalModification> mods = new ArrayList<ChemicalModification>(); //todo: paramterize //mods.add(new ReduceDoubleBondAdd2HMod()); //mods.add(new ReduceDoubleBondAddWaterMod()); mods.add(new UnknownHAdditionMod()); metDBMatcher.setChemicalModifications(mods); try { databaseCompoundsByMass = ChemicalCompound.loadCompoundsFromFile(getFileArgumentValue("metdb"), 2); Collections.sort(databaseCompoundsByMass, new ChemicalCompound.ComparatorMassAsc()); } catch (IOException e) { throw new ArgumentValidationException("Failed to load metabolite database",e); } metDBMatcher.setDatabaseCompounds(databaseCompoundsByMass); metDBMatcher.setUseUnmodifiedAdduct(shouldUseBaseMod); massTolerancePPM = getFloatArgumentValue("deltappm"); outFile = getFileArgumentValue("out"); outDir= getFileArgumentValue("outdir"); if (inputFiles.length > 1 && outDir == null) throw new ArgumentValidationException("multiple input files, no output directory specified"); } public void execute() throws CommandLineModuleExecutionException { for (File file : inputFiles) { File outputFile = outFile; if (outFile == null) outputFile = CommandLineModuleUtilities.createOutputFile(file, ".matched.tsv", outDir); processFile(file, outputFile); } } public void processFile(File massesFile, File outputFile) throws CommandLineModuleExecutionException { Map<String,Object>[] rows = null; try { loader = new TabLoader(massesFile); rows = (Map<String,Object>[]) loader.load(); for (int i=0; i<rows.length; i++) { if (rows[i].get("mass") == null) throw new CommandLineModuleExecutionException("ERROR! Missing mass values in masses file, row " + (i+1)); if (shouldFixFeatureMasses) { //fix masses double oldMass = Double.parseDouble(rows[i].get("mass").toString()); if (rows[i].get("mz") == null || rows[i].get("charge") == null) throw new CommandLineModuleExecutionException("ERROR! Missing mass values in masses file, row " + (i+1)); rows[i].put("mass", Double.parseDouble(rows[i].get("mz").toString()) * Integer.parseInt(rows[i].get("charge").toString())); //System.err.println("Change: " + oldMass + " -> " + Double.parseDouble(rows[i].get("mass").toString())); } } ApplicationContext.infoMessage("Loaded " + rows.length + " rows from masses file"); //for (ChemicalCompound comp : databaseCompoundsByMass) System.err.println(comp.getMass()); } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } Arrays.sort(rows, new Comparator<Map<String, Object>>() { public int compare(Map<String, Object> o1, Map<String, Object> o2) { double mass1 = Double.parseDouble(o1.get("mass").toString()); double mass2 = Double.parseDouble(o2.get("mass").toString()); return mass1 == mass2 ? 0 : mass1 < mass2 ? -1 : 1; } }); List<Feature> featuresForMasses = new ArrayList<Feature>(); for (Map<String, Object> row : rows) { float mass = (float) Double.parseDouble(row.get("mass").toString()); Feature feature = new Feature(0, mass, 200); feature.setMass(mass); feature.setMz(mass); feature.setCharge(1); featuresForMasses.add(feature); } ApplicationContext.infoMessage("Matching " + rows.length + " masses..."); Map<Feature, Map<ChemicalFormula, List<Adduct>>> matchingResult = metDBMatcher.massMatchFull(featuresForMasses.toArray(new Feature[featuresForMasses.size()]), massTolerancePPM, 1); ApplicationContext.infoMessage("Done matching"); StringBuffer headerLineBuf = new StringBuffer("id\t"); try { PrintWriter outPW = new PrintWriter(outputFile); for (int i=0; i<loader.getColumns().length; i++) { TabLoader.ColumnDescriptor column = loader.getColumns()[i]; if (i>0) headerLineBuf.append("\t"); headerLineBuf.append(column.name); } headerLineBuf.append("\tformula\tcompound\tiontype\tdeltamass\tSMILES"); outPW.println(headerLineBuf); outPW.flush(); int matchId=0; for (int i=0; i<rows.length; i++) { Feature feature = featuresForMasses.get(i); Map<ChemicalFormula, List<Adduct>> featureMatchingResult = matchingResult.get(feature); if (featureMatchingResult == null) continue; matchId++; int matchIdForOutput = matchId; if (rows[i].containsKey("id")) matchIdForOutput = (Integer) rows[i].get("id"); StringBuffer lineBufOrigCols = new StringBuffer("" + matchIdForOutput); Map<String, Object> row = rows[i]; for (int j=0; j<loader.getColumns().length; j++) { TabLoader.ColumnDescriptor column = loader.getColumns()[j]; lineBufOrigCols.append("\t"); Object val = row.get(column.name); if (val != null) lineBufOrigCols.append(val); } for (ChemicalFormula formula : featureMatchingResult.keySet()) { for (Adduct adduct : featureMatchingResult.get(formula)) { outPW.println(lineBufOrigCols.toString() + "\t" + formula + "\t" + adduct.getCompound().getName() + "\t" + adduct.getIonTypeString() + "\t" + Rounder.round(feature.getMass() - adduct.getCommonestIsotopeMass(), 5) + "\t" + ChemCalcs.createSMILESString(adduct.getMolecule())); outPW.flush(); } } } ApplicationContext.infoMessage("Matched " + matchId + " out of " + rows.length + " rows"); outPW.close(); } catch (Exception e) { throw new CommandLineModuleExecutionException("Failed writing output file",e); } } }