/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.metabologna.commandline; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.filehandler.TabLoader; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.Rounder; import org.fhcrc.cpl.toolbox.proteomics.MassUtilities; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.chem.*; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleUtilities; import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum; import org.fhcrc.cpl.viewer.align.PeptideArrayAnalyzer; import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl; import org.fhcrc.cpl.viewer.metabologna.*; import org.apache.log4j.Logger; import java.io.*; import java.util.*; /** * Module for matching the masses in a 'peptide' array to a metabolite database. Uses the mass in the middle * of each cell. */ public class MatchArrayMetMassesCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(MatchArrayMetMassesCLM.class); protected File arrayFile; protected MetaboliteDatabaseMatcher metDBMatcher; protected File outFile; protected float massTolerancePPM = 2f; protected List<ChemicalCompound> databaseCompoundsByMass = null; protected boolean shouldFixArrayMasses = true; protected boolean shouldUseBaseMod = false; protected String[] annotationColumnNames = new String[] { "class","Accession_code","cas_number","kegg_id","subclass","species","pathway1","pathway2"}; Map<String,Map<String,Object>> annotations = null; public MatchArrayMetMassesCLM() { init(); } protected void init() { mCommandName = "matcharrmetmasses"; mShortDescription = "Match array metabolite masses to a metabolite database"; mHelpMessage = "Metabolite database should be a .tsv file with (at least) the following columns " + "(all but the first 3 of which may be empty): " + "name, formula, smiles, class, Accession_code, cas_number, kegg_id, subclass, species, pathway1, pathway2"; CommandLineArgumentDefinition[] argDefs = { this.createUnnamedFileArgumentDefinition(true, "Input array file"), new FileToReadArgumentDefinition("metdb", true, "Metabolite database file"), new FileToWriteArgumentDefinition("out", false, "out"), new DirectoryToWriteArgumentDefinition("outdir", false, "outdir"), new BooleanArgumentDefinition("fixarraymasses", false, "Should fix array masses so that " + "they are mz * z? (this is a 'correction' for arrays whose masses represent " + "M+H ion masses)", shouldFixArrayMasses), new DecimalArgumentDefinition("deltappm", false, "Delta mass (ppm) between the center of the array cell and the mass of the metabolite", massTolerancePPM), new BooleanArgumentDefinition("usebasemod", false, "Include base mass, i.e., [M]?", shouldUseBaseMod), new StringListArgumentDefinition("mods", true, "modifications to use on every compound"), new BooleanArgumentDefinition("showcharts", false, "show charts?", false), }; addArgumentDefinitions(argDefs); } public static ChemicalModification createModForString(String modString) { if (modString.equals("M+H")) return new UnknownHAdditionMod(); if (modString.equals("M+Na")) return new UnknownFormulaAdditionMod(new ChemicalFormula("Na1"), "M+Na","UnknownNaAdditionMod"); if (modString.equals("2M+H")) return new DoubleFormulaPlusSomethingMod(new ChemicalFormula("H"), "2M+H","DoubleMassPlusHMod"); if (modString.equals("2M+Na")) return new DoubleFormulaPlusSomethingMod(new ChemicalFormula("Na"), "2M+Na","DoubleMassPlusNaMod"); return null; } public void assignArgumentValues() throws ArgumentValidationException { arrayFile = this.getUnnamedFileArgumentValue(); shouldFixArrayMasses = getBooleanArgumentValue("fixarraymasses"); if (shouldFixArrayMasses) System.err.println("'Fixing' feature masses by making them 1Da higher than would be the case in proteomics."); shouldUseBaseMod = getBooleanArgumentValue("usebasemod"); metDBMatcher = new MetaboliteDatabaseMatcher(); List<ChemicalModification> mods = new ArrayList<ChemicalModification>(); for (String modString : getStringListArgumentValue("mods")) { mods.add(createModForString(modString)); } metDBMatcher.setChemicalModifications(mods); try { databaseCompoundsByMass = ChemicalCompound.loadCompoundsFromFile(getFileArgumentValue("metdb"), 2); Collections.sort(databaseCompoundsByMass, new ChemicalCompound.ComparatorMassAsc()); annotations = loadAnnotations(getFileArgumentValue("metdb")); } catch (IOException e) { throw new ArgumentValidationException("Failed to load metabolite database",e); } metDBMatcher.setDatabaseCompounds(databaseCompoundsByMass); metDBMatcher.setUseUnmodifiedAdduct(shouldUseBaseMod); massTolerancePPM = getFloatArgumentValue("deltappm"); outFile = getFileArgumentValue("out"); List<ChemicalModification> chemicalModsToAdd = new ArrayList<ChemicalModification>(); for (String modString : getStringListArgumentValue("mods")) { ChemicalModification mod = createModForString(modString); if (mod == null) throw new ArgumentValidationException("Failed to process modification string: " + modString); chemicalModsToAdd.add(mod); ApplicationContext.infoMessage("Adding mod: " + mod.getSymbol()); } metDBMatcher.setChemicalModifications(chemicalModsToAdd); metDBMatcher.setShowCharts(getBooleanArgumentValue("showcharts")); } public void execute() throws CommandLineModuleExecutionException { // PeptideArrayAnalyzer arrayAnalyzer = null; Map<String, Object>[] arrayRows; List<Feature> rowFeatures = new ArrayList<Feature>(); TabLoader loader ; String headerLine; try { // arrayAnalyzer = new PeptideArrayAnalyzer(arrayFile); // Map<Integer, Map<String, List<Feature>>> detailsRowMap = arrayAnalyzer.loadDetailsRowMapsById(); loader = new TabLoader(arrayFile); StringBuffer headerLineBuf = new StringBuffer(""); try { for (TabLoader.ColumnDescriptor column : loader.getColumns()) headerLineBuf.append(column.name + "\t"); headerLineBuf.append("formula\tmatchcount\tcompound\tppm\thmdb_id\tcas_number\tkegg_id\tiontype\tdeltamass\tSMILES\tclass\tsubclass\tspecies\tpathway1\tpathway2"); } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } headerLine = headerLineBuf.toString(); arrayRows = (Map<String, Object>[]) loader.load(); for (Map<String, Object> row : arrayRows) { int id = Integer.parseInt(row.get("id").toString()); // Map<String, List<Feature>> runFeaturesMap = detailsRowMap.get(id); int charge = (Integer) row.get("charge"); double minMass = (Double) row.get("minMass"); double maxMass = (Double) row.get("maxMass"); if (shouldFixArrayMasses) { minMass += (charge * Spectrum.HYDROGEN_ION_MASS); maxMass += (charge * Spectrum.HYDROGEN_ION_MASS); row.put("minMass", minMass); row.put("maxMass", minMass); } double meanMass = (minMass + maxMass) / 2; Feature feature = new Feature(0, (float) meanMass, 200); feature.setMass((float) meanMass); feature.setMz((float) meanMass / charge); feature.setCharge(charge); rowFeatures.add(feature); } } catch (IOException e) { throw new CommandLineModuleExecutionException(e); } ApplicationContext.infoMessage("Matching " + rowFeatures.size() + " masses..."); Map<Feature, Map<ChemicalFormula, List<Adduct>>> matchingResult = metDBMatcher.massMatchFull(rowFeatures.toArray(new Feature[rowFeatures.size()]), massTolerancePPM, 1); ApplicationContext.infoMessage("Done matching"); try { PrintWriter outPW = new PrintWriter(outFile); outPW.println(headerLine); outPW.flush(); int matchCount = 0; ApplicationContext.infoMessage("Writing output array..."); for (int i=0; i<arrayRows.length; i++) { int matchCountThisRow = 0; StringBuffer lineBuf = new StringBuffer(); for (TabLoader.ColumnDescriptor column : loader.getColumns()) { Object val = arrayRows[i].get(column.name); if (val != null) lineBuf.append(val.toString()); lineBuf.append( "\t"); } Feature feature = rowFeatures.get(i); Map<ChemicalFormula, List<Adduct>> featureMatchingResult = matchingResult.get(feature); StringBuffer formulasBuf = new StringBuffer(); StringBuffer namesBuf = new StringBuffer(); StringBuffer ionTypesBuf = new StringBuffer(); StringBuffer massDiffsBuf = new StringBuffer(); StringBuffer smilesBuf = new StringBuffer(); StringBuffer classBuf = new StringBuffer(); StringBuffer subclassBuf = new StringBuffer(); StringBuffer speciesBuf = new StringBuffer(); StringBuffer pathway1Buf = new StringBuffer(); StringBuffer pathway2Buf = new StringBuffer(); StringBuffer hmdbIdBuf = new StringBuffer(); StringBuffer casNumberBuf = new StringBuffer(); StringBuffer ppmBuf = new StringBuffer(); StringBuffer keggIdBuf = new StringBuffer(); List<StringBuffer> allBufs = Arrays.asList( new StringBuffer[] {formulasBuf, namesBuf, ppmBuf, hmdbIdBuf, casNumberBuf, keggIdBuf, ionTypesBuf,massDiffsBuf,smilesBuf,classBuf,subclassBuf, speciesBuf,pathway1Buf,pathway2Buf}); if (featureMatchingResult != null) { matchCount++; boolean first = true; for (ChemicalFormula formula : featureMatchingResult.keySet()) { for (Adduct adduct : featureMatchingResult.get(formula)) { matchCountThisRow++; if (!first) { for (StringBuffer buf : allBufs) buf.append(";"); } formulasBuf.append(formula); namesBuf.append(adduct.getCompound().getName()); ionTypesBuf.append(adduct.getIonTypeString()); massDiffsBuf.append(Rounder.round(feature.getMass() - adduct.getCommonestIsotopeMass(), 5)); if (adduct.getMolecule() != null) smilesBuf.append(ChemCalcs.createSMILESString(adduct.getMolecule())); String name = adduct.getCompound().getName(); classBuf.append(annotations.get("class").get(name)); subclassBuf.append(annotations.get("subclass").get(name)); speciesBuf.append(annotations.get("species").get(name)); pathway1Buf.append(annotations.get("pathway1").get(name)); pathway2Buf.append(annotations.get("pathway2").get(name)); hmdbIdBuf.append(annotations.get("Accession_code").get(name)); casNumberBuf.append(annotations.get("cas_number").get(name)); keggIdBuf.append(annotations.get("kegg_id").get(name)); float ppmDiff = (float) MassUtilities.convertDaToPPM( feature.getMz() - (float) adduct.getCommonestIsotopeMass(), (float) adduct.getCommonestIsotopeMass()); ppmBuf.append(Rounder.round(ppmDiff,1)); first = false; } } } lineBuf.append("\t" + matchCountThisRow); boolean firstBuf = true; for (StringBuffer buf : allBufs) { if (!firstBuf) lineBuf.append("\t" + buf.toString()); firstBuf = false; } outPW.println(lineBuf.toString()); outPW.flush(); } ApplicationContext.infoMessage("Matched " + matchCount + " out of " + arrayRows.length + " array rows"); outPW.close(); } catch (Exception e) { throw new CommandLineModuleExecutionException("Failed writing output file",e); } } protected Map<String,Map<String,Object>> loadAnnotations(File file) throws IOException { ApplicationContext.infoMessage("Loading annotations from file " + file.getAbsolutePath()); TabLoader loader = new TabLoader(new FileReader(file),true); ApplicationContext.infoMessage("Annotation file columns:"); for (TabLoader.ColumnDescriptor col : loader.getColumns()) ApplicationContext.infoMessage("\t" + col.name); //a separate map for each annotation column Map<String,Map<String,Object>> result = new HashMap<String,Map<String,Object>>(); //System.err.println("Loading annotations..."); //Set<String> allElements = new HashSet<String>(); for (Map row : (Map[]) loader.load()) { //try{ //allElements.addAll(ChemCalcs.emp2atomCount((String)row.get("formula")).keySet()); //}catch(Exception e){} //System.err.println("ROW"); //for (Object key1 : row.keySet()) System.err.println("\t" + key1.toString()); Object key = row.get("name"); if (key != null) { //System.err.println("CHECK"); String keyString = key.toString(); for (String annotationColumnName : annotationColumnNames) { //System.err.println("Checking " + annotationColumnName + "..."); if (row.get(annotationColumnName) == null) continue; Map<String, Object> mapThisAnnotColumn = result.get(annotationColumnName); if (mapThisAnnotColumn == null) { mapThisAnnotColumn = new HashMap<String, Object>(); result.put(annotationColumnName, mapThisAnnotColumn); } mapThisAnnotColumn.put(keyString,row.get(annotationColumnName)); //System.err.println("\t found " +row.get(annotationColumnName) ); } } } ApplicationContext.infoMessage("Loaded annotation values for " + result.size() + " columns:"); for (String annot : result.keySet()) { ApplicationContext.infoMessage("\t" + annot + ": " + result.get(annot).size()); } //for (String element : allElements) System.err.println(element); return result; } }