/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.metabologna.commandline; import org.fhcrc.cpl.toolbox.commandline.arguments.*; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.statistics.RegressionUtilities; import org.fhcrc.cpl.toolbox.statistics.BasicStatistics; import org.fhcrc.cpl.toolbox.chem.ChemicalCompound; import org.fhcrc.cpl.toolbox.chem.ChemCalcs; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithHistogram; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import org.fhcrc.cpl.toolbox.commandline.CommandLineModule; import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl; import org.fhcrc.cpl.viewer.metabologna.MetaboliteUtilities; import org.apache.log4j.Logger; import org.openscience.cdk.reaction.type.*; import org.openscience.cdk.reaction.type.parameters.IParameterReact; import org.openscience.cdk.reaction.type.parameters.SetReactionCenter; import org.openscience.cdk.reaction.ReactionEngine; import org.openscience.cdk.reaction.IReactionProcess; import org.openscience.cdk.interfaces.IMolecule; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.Molecule; import org.openscience.cdk.Atom; import org.openscience.cdk.AtomContainer; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.openscience.cdk.dict.DictionaryDatabase; import java.io.*; import java.util.*; /** */ public class AnalyzeFormulasCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule { protected static Logger _log = Logger.getLogger(AnalyzeFormulasCLM.class); protected File file; protected boolean shouldCollapseByMax = false; protected String compoundClass = null; public AnalyzeFormulasCLM() { init(); } protected void init() { mCommandName = "analyzeformulas"; mShortDescription = "Analyze chemical formulas"; mHelpMessage = mShortDescription; CommandLineArgumentDefinition[] argDefs = { this.createUnnamedFileArgumentDefinition(true, "Input file"), new StringArgumentDefinition("class", false, "Class of compounds to consider") }; addArgumentDefinitions(argDefs); } public void assignArgumentValues() throws ArgumentValidationException { file = this.getUnnamedFileArgumentValue(); compoundClass = getStringArgumentValue("class"); } public void execute() throws CommandLineModuleExecutionException { List<ChemicalCompound> compounds = null; try { compounds = ChemicalCompound.loadCompoundsFromFile(file, 2); // System.err.println(formula + ", " + numOfElement); } catch (Exception e) { throw new CommandLineModuleExecutionException("Failed to load file " + file.getAbsolutePath(),e); } if (compoundClass != null) { List<ChemicalCompound> newCompounds = new ArrayList<ChemicalCompound>(); for (ChemicalCompound compound: compounds) { if (compound.getCompoundClass().equals(compoundClass)) newCompounds.add(compound); } compounds = newCompounds; } ApplicationContext.setMessage("Loaded " + compounds.size() + " compounds"); List<Float> log12PeakRatios = new ArrayList<Float>(); List<Float> masses = new ArrayList<Float>(); List<Float> unitMassDefects = new ArrayList<Float>(); List<Float> totalMassDefects = new ArrayList<Float>(); List<Float> totalMassOffsets = new ArrayList<Float>(); List<Float> unitMassOffsets = new ArrayList<Float>(); List<IParameterReact> paramList1 = new ArrayList<IParameterReact>(); //Reaction is not localized IParameterReact param = new SetReactionCenter(); param.setParameter(Boolean.FALSE); paramList1.add(param); IReactionProcess reduceDoubleBondReaction = new AdductionProtonPBReaction(); IReactionProcess addHydrogenReaction = new AdductionProtonLPReaction(); List<IReactionProcess> allReactions = new ArrayList<IReactionProcess>(); allReactions.add(reduceDoubleBondReaction); // allReactions.add(addHydrogenReaction); IMolecule oneHydrogenMol = new Molecule(); oneHydrogenMol.addAtom(new Atom("H")); List<IMolecule> otherReactants = new ArrayList<IMolecule>(); // otherReactants.add(oneHydrogenMol); try { for (IReactionProcess reaction : allReactions) reaction.setParameterList(paramList1); } catch (CDKException e) { throw new CommandLineModuleExecutionException(e); } for (ChemicalCompound compound : compounds) { try { IMolecularFormula cdkFormula = MolecularFormulaManipulator.getMolecularFormula(compound.getCdkMolecule()); ApplicationContext.infoMessage(compound.toString() + ", " + compound.getCommonestIsotopeMass() + ", " + MolecularFormulaManipulator.getString(cdkFormula) + ", " + MolecularFormulaManipulator.getMajorIsotopeMass(cdkFormula)); List<ChemicalCompound> products = compound.applyReactions(allReactions, otherReactants); System.err.println("\tProducts: " + products.size()); for (ChemicalCompound productCompound : products) { ApplicationContext.infoMessage("\t\t" + productCompound.getFormula()); } } catch (Exception e) { throw new CommandLineModuleExecutionException(e); } if (compound.getPeakMasses().length > 1) { float mass = (float)compound.getCommonestIsotopeMass(); if (compound.getPeakFrequencies()[0] == 0 || compound.getPeakFrequencies()[1] == 0) continue; float peakRatio = (float) (compound.getPeakFrequencies()[0] / compound.getPeakFrequencies()[1]); float peakLogRatio = (float) Math.log(peakRatio); if (peakLogRatio < 0) continue; if (mass < 100 && peakLogRatio < 1.5) continue; if (mass < 50 || mass > 1000) continue; // System.err.println(peakLogRatio); if (Float.isInfinite(peakLogRatio)) System.err.println(compound.getPeakFrequencies()[0] + "," + compound.getPeakFrequencies()[1]); log12PeakRatios.add(peakLogRatio); masses.add(mass); float totalMassDefect = (float) compound.getFormula().calcTotalMassDefect(); float unitMassDefect = (float) compound.getFormula().calcUnitMassDefect(); unitMassDefects.add(unitMassDefect); totalMassDefects.add(totalMassDefect); float totalMassOffset = (float) MetaboliteUtilities.calcTotalMassDefectOffset(mass); totalMassOffsets.add(totalMassOffset); unitMassOffsets.add((float) MetaboliteUtilities.calcUnitMassDefectOffset(mass)); } } new PanelWithHistogram(log12PeakRatios, "log peak ratios").displayInTab(); new PanelWithHistogram(unitMassDefects, "unit defects").displayInTab(); System.err.println("Mean unit mass defect: " + BasicStatistics.mean(unitMassDefects)); new PanelWithScatterPlot(masses, unitMassDefects, "mass vs unit mass defect").displayInTab(); new PanelWithScatterPlot(masses, totalMassDefects, "mass vs total mass defect").displayInTab(); new PanelWithScatterPlot(masses, totalMassOffsets, "mass vs total mass offset").displayInTab(); new PanelWithHistogram(totalMassOffsets, "total mass offsets").displayInTab(); new PanelWithHistogram(unitMassOffsets, "unit mass offsets").displayInTab(); new PanelWithScatterPlot(masses, unitMassOffsets, "mass vs unit mass offset").displayInTab(); PanelWithScatterPlot pwsp = new PanelWithScatterPlot(masses, log12PeakRatios, "mass v logratio"); try { double[] coeffs = RegressionUtilities.modalRegression(masses, log12PeakRatios, 4); pwsp.addLineOrCurve(coeffs, BasicStatistics.min(masses), BasicStatistics.max(masses)); pwsp.displayInTab(); System.err.print("Coefficients: "); for (int i=0; i<coeffs.length; i++) System.err.print(", " + coeffs[i]); System.err.println(); List<Double> logRatioResiduals = new ArrayList<Double>(); for (int i=0; i<masses.size(); i++) { logRatioResiduals.add(log12PeakRatios.get(i) - RegressionUtilities.mapValueUsingCoefficients(coeffs, masses.get(i))); } double sd = BasicStatistics.standardDeviation(logRatioResiduals); System.err.println("SD residual: " + sd); coeffs[0] += sd; pwsp.addLineOrCurve(coeffs, BasicStatistics.min(masses), BasicStatistics.max(masses)); coeffs[0] -= 2*sd; pwsp.addLineOrCurve(coeffs, BasicStatistics.min(masses), BasicStatistics.max(masses)); new PanelWithHistogram(logRatioResiduals, "SD errors").displayInTab(); } catch (Exception e) { e.printStackTrace(System.err); throw new CommandLineModuleExecutionException(e); } // PanelWithScatterPlot pwsp = new PanelWithScatterPlot(); // pwsp.setName("mass vs " + element + "s"); // pwsp.addData(massesInRange, numsInRange, "in range"); // pwsp.addData(masses, numsOfElements, "mass vs " + element + "s"); // pwsp.displayInTab(); // new PanelWithHistogram(numsOfElements, "num " + element).displayInTab(); } }