/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.chem; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.config.IsotopeFactory; import org.openscience.cdk.formula.MassToFormulaTool; import org.openscience.cdk.nonotify.NoNotificationChemObjectBuilder; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.interfaces.IMolecularFormulaSet; import org.openscience.cdk.interfaces.IMolecule; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.openscience.cdk.formula.MolecularFormulaRange; import org.openscience.cdk.formula.rules.*; import org.openscience.cdk.AtomContainer; import org.openscience.cdk.smiles.SmilesGenerator; import java.util.*; import java.util.regex.Pattern; import java.util.regex.Matcher; /** * Created by IntelliJ IDEA. * User: tholzman * Date: Mar 25, 2010 * Time: 12:39:19 PM * To change this template use File | Settings | File Templates. */ public final class ChemCalcs { protected static final Pattern atomCounter = Pattern.compile("[A-Z][a-z]?[0-9]*"); protected static final IChemObjectBuilder CDKObjBuilder = NoNotificationChemObjectBuilder.getInstance(); protected static IsotopeFactory ifac= null; protected static final MassToFormulaTool mfTool = new MassToFormulaTool(NoNotificationChemObjectBuilder.getInstance()); //Default constraints on numbers of each atom in formulas. //todo: is this redundant with Seven Golden Rules? protected static IRule _defaultMFRangeRule = null; //this will contain default IRules that are appropriate for the following ranges of masses, in order: //0-500 //500-1000 //1000-2000 //2000-3000 //3000+ protected static List<IRule>[] _defaultRulesForMasses = null; //Initialize CDK isotope factory and default IRules static { try { ifac = IsotopeFactory.getInstance(CDKObjBuilder); } catch (Exception e) { System.err.println("CDK initialization failure: "+e); } MolecularFormulaRange mfRange = new MolecularFormulaRange(); mfRange.addIsotope( ifac.getMajorIsotope("C"), 1, 50); mfRange.addIsotope( ifac.getMajorIsotope("H"), 1, 100); mfRange.addIsotope( ifac.getMajorIsotope("O"), 0, 50); mfRange.addIsotope( ifac.getMajorIsotope("N"), 0, 50); mfRange.addIsotope( ifac.getMajorIsotope("S"), 0, 5); mfRange.addIsotope( ifac.getMajorIsotope("P"), 0, 5); mfRange.addIsotope( ifac.getMajorIsotope("Cl"), 0, 2); _defaultMFRangeRule = new ElementRule(); Object[] params = new Object[1]; params[0] = mfRange; try { _defaultMFRangeRule.setParameters(params); } catch (Exception e) { throw new RuntimeException("Failed to create default MolecularFormulaRange rule",e); } Object[] mmParams = new Object[] { MMElementRule.RangeMass.Minus500, MMElementRule.RangeMass.Minus1000, MMElementRule.RangeMass.Minus2000, MMElementRule.RangeMass.Minus3000 }; _defaultRulesForMasses = (List<IRule>[]) new ArrayList[mmParams.length + 1]; for (int i=0; i<mmParams.length; i++) { MMElementRule sevenGolden = new MMElementRule(); Object[] paramsMM = new Object[2]; paramsMM[0] = MMElementRule.Database.WILEY; paramsMM[1] = mmParams[i]; try { sevenGolden.setParameters(paramsMM); } catch (Exception e) { throw new RuntimeException("Failed to create default SevenGolden rule",e); } List<IRule> rules = new ArrayList<IRule>(); rules.add(_defaultMFRangeRule); rules.add(sevenGolden); _defaultRulesForMasses[i] = rules; } _defaultRulesForMasses[_defaultRulesForMasses.length-1] = new ArrayList<IRule>(); _defaultRulesForMasses[_defaultRulesForMasses.length-1].add(_defaultMFRangeRule); } /** * Calculate the "commonest isotope" (i.e., "monoisotope" mass for a chemical formula * @param atomCountMap * @return * @throws IllegalArgumentException */ public static double calcCommonestIsotopeMass(Map<String, Integer> atomCountMap) throws IllegalArgumentException { Double retval = 0D; for (String atom : atomCountMap.keySet()) { Element curEl = Elements.get(atom); if (curEl == null) throw new IllegalArgumentException("Bad formula " + atomCount2FormulaString(atomCountMap) + " contains unknown element " + curEl); retval += (curEl.getCommonestIsotopeMass() * atomCountMap.get(atom)); } return retval; } public static String atomCount2FormulaString(Map<String, Integer> atomCountMap) { List<String> atoms = new ArrayList<String>(atomCountMap.keySet()); Collections.sort(atoms); StringBuffer resultBuf = new StringBuffer(); for (String atom : atoms) resultBuf.append(atom + atomCountMap.get(atom)); return resultBuf.toString(); } /** * Take a formula and return a map from element symbols to counts of atoms. * AtomCounts will have CommonestIsotopeMass * @param emp * @return * @throws IllegalArgumentException */ public static HashMap<String, Integer> chemicalFormula2AtomCount(String emp) throws IllegalArgumentException { HashMap<String, Integer> retVal = new HashMap<String,Integer>(); try { Matcher formulaSplit = atomCounter.matcher(emp); while(formulaSplit.find()) { String curAtomCount = formulaSplit.group(); if(!Character.isDigit(curAtomCount.charAt(curAtomCount.length()-1))) curAtomCount += "1"; String atom = curAtomCount.split("[0-9]")[0]; Element curEl = Elements.get(atom); if (curEl == null) throw new IllegalArgumentException("Bad formula " + emp + " contains unknown element " + curEl); int curRep = Integer.parseInt(curAtomCount.split("[A-Za-z]+")[1]); retVal.put(atom, curRep); } } catch (IllegalArgumentException iae) { throw iae; } catch (Exception e) { throw new IllegalArgumentException ("Bad formula " + emp); } return retVal; } public static ChemicalFormula CDKMolForm2ChemForm(IMolecularFormula f, boolean shouldPopulatePeaks) { return new ChemicalFormula(MolecularFormulaManipulator.getString(f),shouldPopulatePeaks); } public static ChemicalFormula CDKMolForm2ChemForm(IMolecularFormula f) { return new ChemicalFormula(MolecularFormulaManipulator.getString(f),false); } public static IMolecularFormula ChemForm2CDKMolForm(ChemicalFormula f){ return MolecularFormulaManipulator.getMajorIsotopeMolecularFormula(f.toString(),CDKObjBuilder); } /** * Create an IRule that constrains formula results to a PPM tolerance around a mass * @param mass * @param ppmerr * @return */ public static IRule createIRuleForMassAndTolerance(double mass, double ppmerr) { ToleranceRangeRule ruleToleran = new ToleranceRangeRule(); Object[] paramsT = new Object[2]; double ppm = ppmerr*mass/1.0E6; paramsT[0] = mass; paramsT[1] = ppm; try { ruleToleran.setParameters(paramsT); } catch (Exception e) { throw new RuntimeException(e); } return ruleToleran; } /** * Get the default IRules for a given mass. Note: two of these are defaults that are used over and over * again, and the third (Seven Golden Rules) is generated uniquely for this call. So don't do anything * to change the first two rules of this result. * @param mass * @param ppmerr * @return * @throws Exception */ public static List<IRule> defaultRules(double mass, double ppmerr) throws Exception { List<IRule> basicRules = null; int masscode = (int) Math.floor(mass/500); switch (masscode) { case 0: basicRules = _defaultRulesForMasses[0]; break; case 1: basicRules = _defaultRulesForMasses[1]; break; case 2: case 3: basicRules = _defaultRulesForMasses[2]; break; case 4: case 5: basicRules = _defaultRulesForMasses[3]; break; default: basicRules = _defaultRulesForMasses[4]; } List<IRule> result = new ArrayList<IRule>(basicRules); result.add(createIRuleForMassAndTolerance(mass, ppmerr)); return result; } /** * Generate a set of formulas for a given mass and set of constraints. Note: time-consuming * @param mass * @param constraints * @return * @throws Exception */ public static IMolecularFormulaSet calcMass2Formulas(double mass, ArrayList<IRule> constraints) throws Exception { mfTool.setRestrictions(constraints); return mfTool.generate(mass); } /** * Generate a default set of constraints for the specified mass and tolerance, and return all formulas * that satisfy those constraints. Note: time-consuming * @param mass * @param ppm * @return * @throws Exception */ public static IMolecularFormulaSet calcMass2Formulas(double mass, double ppm) throws Exception { mfTool.setRestrictions(defaultRules(mass,ppm)); return mfTool.generate(mass); } public static List<ChemicalFormula> CDKFormulaSet2ChemFormList(IMolecularFormulaSet imfs) { ArrayList<ChemicalFormula> retVal = new ArrayList<ChemicalFormula>(); if(imfs == null) return retVal; for(IMolecularFormula imf: imfs.molecularFormulas()) { retVal.add(CDKMolForm2ChemForm(imf)); } return retVal; } public static void main(String argv[]) { System.out.print(argv[0]); Double d = calcCommonestIsotopeMass(chemicalFormula2AtomCount(argv[0])); System.out.println("\t"+d); HashMap<String, Integer> formula = chemicalFormula2AtomCount(argv[0]); System.err.println(formula); ChemicalFormula cf = new ChemicalFormula(argv[0]); System.out.println("A new chemical formula: "+cf); IMolecularFormula imf = ChemForm2CDKMolForm(cf); System.out.println("A new ImolForm: "+imf); try { IMolecularFormulaSet imfs = calcMass2Formulas(MolecularFormulaManipulator.getMajorIsotopeMass(imf),2.0); System.out.println("Formulas calculated for natural mass of "+argv[0]+" ("+MolecularFormulaManipulator.getMajorIsotopeMass(imf)+")"); for(IMolecularFormula i: imfs.molecularFormulas()) { System.out.println(MolecularFormulaManipulator.getString(i)); } } catch (Exception e) { System.err.println("Formula calculation from mass failed: "+e); } } public static double[] calcPeakProbabilities(String formula, int maxPeaksToCalc) throws IllegalArgumentException { return calcPeakMassesAndProbabilities(formula, maxPeaksToCalc).second; } public static double[] calcPeakMasses(String formula, int maxPeaksToCalc) throws IllegalArgumentException { return calcPeakMassesAndProbabilities(formula, maxPeaksToCalc).first; } /** * This method calculates the relative abundance of isotopic peaks within the specified * chemical formula. It uses the algorithm described here: * * Efficient Calculation of Accurate Masses of Isotopic Peaks * Alan L. Rockwood, Perttu Haimi * Journal of the American Society for Mass Spectrometry * Volume 17, Issue 3, March 2006, Pages 415-419 * * Specifically, I begin with one single atom from the chemical formula and then * add each remaining atom, one at a time. Adding each atom involves creating a * 'super-atom' from the cumulative peak frequencies and the new atom's peak frequencies. * * Throws an IllegalArgumentException if the formula isn't valid * @param formula * @param maxPeaksToCalc * @return */ public static Pair<double[],double[]> calcPeakMassesAndProbabilities(String formula, int maxPeaksToCalc) throws IllegalArgumentException { HashMap<String, Integer> elementAtomCountMap = chemicalFormula2AtomCount(formula); return calcPeakMassesAndProbabilities(elementAtomCountMap, maxPeaksToCalc); } public static Pair<double[],double[]> calcPeakMassesAndProbabilities(Map<String, Integer> elementAtomCountMap, int maxPeaksToCalc) { //parse the formula into a map of atoms to counts //This array holds our peak probabilities. It changes with each atom added double[] peakMassesCum = new double[maxPeaksToCalc]; double[] peakProbsCum = new double[maxPeaksToCalc]; //special handling for first atom considered boolean firstAtom = true; for (String element : elementAtomCountMap.keySet()) { double[] elementFrequencies = Elements.get(element).getIsotopicPeakFrequenciesWithMissing(); double[] elementMasses = Elements.get(element).getIsotopicPeakMassesWithMissing(); //how many of this element do we have? int elementCount = elementAtomCountMap.get(element); //one by one, add the atoms of this element for (int elemCount=0; elemCount < elementCount; elemCount++) { //If first atom, start off with the frequencies for that atom if (firstAtom) { //simply set the first elements of peakProbsCum to that atom's freqs System.arraycopy(elementFrequencies, 0, peakProbsCum, 0, Math.min(elementFrequencies.length, maxPeaksToCalc)); System.arraycopy(elementMasses, 0, peakMassesCum, 0, Math.min(elementMasses.length, maxPeaksToCalc)); firstAtom = false; } else { //new peak probabilities array values will replace old ones double[] newPeakProbsCum = new double[maxPeaksToCalc]; double[] newPeakMassesCum = new double[maxPeaksToCalc]; //for every atom after the first, "combine" it with the accumulated 'super-atom'. //Define gp(i) as the new atom's probability for peak i. //Define fp(i) as the super-atom's probability for peak i. //The formula for each peak's probability hp(k) is: //hp(k) = sum over all i (gp(i) * fp(k-i)) // //masses: //Define gm(i) as the new atom's mass for peak i. //Define fm(i) as the super-atom's mass for peak i. //formula for each peak's mass hm(k) is: // (sum over all i (gp(i) * fp(k-i) * (gm(i) + fm(k-i)) ) ) / hp(k) for (int k=0; k<maxPeaksToCalc; k++) { //begin with 0, sum up components double hpk = 0; double massFormulaNumerator = 0; for (int i=0; i<Math.min(maxPeaksToCalc,elementFrequencies.length); i++) { int kminusi = k-i; //check indexes both valid if (kminusi < 0 || kminusi >= maxPeaksToCalc) continue; double gpi = elementFrequencies[i]; double fpkminusi = peakProbsCum[kminusi]; hpk += gpi * fpkminusi; massFormulaNumerator += gpi * fpkminusi * (elementMasses[i] + peakMassesCum[kminusi]); } newPeakProbsCum[k] = hpk; newPeakMassesCum[k] = (hpk == 0) ? peakMassesCum[k] : massFormulaNumerator / hpk; } peakProbsCum = newPeakProbsCum; peakMassesCum = newPeakMassesCum; } } } int lastNonzeroMassPeak = 0; for (int i=0; i<maxPeaksToCalc; i++) { if (peakMassesCum[i] != 0) lastNonzeroMassPeak = i; } if (lastNonzeroMassPeak < maxPeaksToCalc-1) { double[] peakProbsCumShorter = new double[lastNonzeroMassPeak+1]; double[] peakMassesCumShorter = new double[lastNonzeroMassPeak+1]; System.arraycopy(peakMassesCum, 0, peakMassesCumShorter, 0, lastNonzeroMassPeak+1); System.arraycopy(peakProbsCum, 0, peakProbsCumShorter, 0, lastNonzeroMassPeak+1); peakMassesCum = peakMassesCumShorter; peakProbsCum = peakProbsCumShorter; } //if (peakMassesCum.length > 1 && (peakMassesCum[1] - peakMassesCum[0] > 1.5)) //{ // for (String elem : elementAtomCountMap.keySet()) System.err.print(elem); // System.err.println("******" + peakMassesCum[0] + ", " + peakMassesCum[1]); //} return new Pair<double[],double[]>(peakMassesCum, peakProbsCum); } public static String createSMILESString(IMolecule molecule) { SmilesGenerator sg = new SmilesGenerator(); return sg.createSMILES(molecule); } }