/* $Revision$ $Author$ $Date$ * * Copyright (C) 2007 Miguel Rojasch <miguelrojasch@users.sf.net> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.formula; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.config.AtomTypeFactory; import org.openscience.cdk.config.IsotopeFactory; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.formula.rules.ChargeRule; import org.openscience.cdk.formula.rules.ElementRule; import org.openscience.cdk.formula.rules.IRule; import org.openscience.cdk.formula.rules.ToleranceRangeRule; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.interfaces.IElement; import org.openscience.cdk.interfaces.IIsotope; import org.openscience.cdk.interfaces.IMolecularFormula; import org.openscience.cdk.interfaces.IMolecularFormulaSet; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator; import org.openscience.cdk.tools.manipulator.MolecularFormulaRangeManipulator; /** * <p>Tool to determine molecular formula consistent with a given accurate mass. The * molecular formulas are not validate. It only consist in generate combination according * object (see MolecularFormulaChecker). * * <pre> * MassToFormulaTool mf = new MassToFormulaTool(); * double myMass = 133.004242; * IMolecularFormulaSet mfSet = mf.generate(myMass); * </pre> * * <p>The elements are listed according on difference with the proposed mass. * * * @cdk.module formula * @author miguelrojasch * @cdk.created 2007-03-01 * @cdk.githash */ @TestClass("org.openscience.cdk.formula.MassToFormulaToolTest") public class MassToFormulaTool { private ILoggingTool logger = LoggingToolFactory.createLoggingTool(MassToFormulaTool.class); private IChemObjectBuilder builder; /** */ AtomTypeFactory factory; /** matrix to follow for the permutations.*/ private int[][] matrix_Base; /** Array listing the order of the elements to be shown according probability occurrence.*/ private String[] orderElements; /** A List with all rules to be applied. see IRule.*/ private List<IRule> rules; private MolecularFormulaRange mfRange; private Double charge; private Double tolerance; /** * Construct an instance of MassToFormulaTool. It is necessary because different * matrix have to build. Furthermore the default restrictions are initiated. * * @see #setDefaultRestrictions() */ public MassToFormulaTool(IChemObjectBuilder builder) { this.builder = builder; logger.info("Initiate MassToForumlaTool"); factory = AtomTypeFactory.getInstance(builder); this.orderElements = generateOrderE(); setDefaultRestrictions(); } /** * Set the restrictions that must be presents in the molecular formula. * * @param rulesNew The restrictions to impose * * @see #getRestrictions() * @see #setDefaultRestrictions() * @see IRule */ @TestMethod("testSetRestrictions_List") public void setRestrictions(List<IRule> rulesNew) throws CDKException { Iterator<IRule> itRules = rulesNew.iterator(); while(itRules.hasNext()){ IRule rule = itRules.next(); if(rule instanceof ElementRule){ mfRange = (MolecularFormulaRange) ((Object[])rule.getParameters())[0]; //removing the rule Iterator<IRule> oldRuleIt = rules.iterator(); while(oldRuleIt.hasNext()){ IRule oldRule = oldRuleIt.next(); if(oldRule instanceof ElementRule){ rules.remove(oldRule); rules.add(rule); break; } } this.matrix_Base = getMatrix(mfRange.getIsotopeCount()); }else if(rule instanceof ChargeRule){ this.charge = (Double) ((Object[])rule.getParameters())[0]; //removing the rule Iterator<IRule> oldRuleIt = rules.iterator(); while(oldRuleIt.hasNext()){ IRule oldRule = oldRuleIt.next(); if(oldRule instanceof ChargeRule){ rules.remove(oldRule); rules.add(rule); break; } } }else if(rule instanceof ToleranceRangeRule){ this.tolerance = (Double) ((Object[])rule.getParameters())[1]; //removing the rule Iterator<IRule> oldRuleIt = rules.iterator(); while(oldRuleIt.hasNext()){ IRule oldRule = oldRuleIt.next(); if(oldRule instanceof ToleranceRangeRule){ rules.remove(oldRule); rules.add(rule); break; } } }else{ rules.add(rule); } } } /** * Get the restrictions that must be presents in the molecular formula. * * @return The restrictions to be imposed * * @see #setDefaultRestrictions() */ @TestMethod("testGetRestrictions") public List<IRule> getRestrictions(){ return this.rules; } /** * Set the default restrictions that must be presents in the molecular formula. * * @see #getRestrictions() */ @TestMethod("testSetDefaultRestrictions") public void setDefaultRestrictions(){ try { callDefaultRestrictions(); } catch (CDKException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * Create the default restrictions. They are:<p> * * The major isotopes = C, H, O and N<p> * Charge = 0.0, indicating neutral compound<p> * Tolerance = 0.05 amu<p> * @throws ClassNotFoundException * @throws IOException * @throws CDKException * @throws IOException * */ private void callDefaultRestrictions() throws CDKException, IOException { List<IRule> rules1 = new ArrayList<IRule>(); IsotopeFactory ifac = IsotopeFactory.getInstance(builder); // restriction for occurrence elements MolecularFormulaRange mfRange1 = new MolecularFormulaRange(); mfRange1.addIsotope( ifac.getMajorIsotope("C"), 0, 15); mfRange1.addIsotope( ifac.getMajorIsotope("H"), 0, 15); mfRange1.addIsotope( ifac.getMajorIsotope("N"), 0, 15); mfRange1.addIsotope( ifac.getMajorIsotope("O"), 0, 15); IRule rule = new ElementRule(); Object[] params = new Object[1]; params[0] = mfRange1; rule.setParameters(params); rules1.add(rule); // occurrence for charge rule = new ChargeRule(); // default 0.0 neutral rules1.add(rule); charge = (Double) ((Object[])rule.getParameters())[0]; // occurrence for tolerance rule = new ToleranceRangeRule(); // default 0.05 rules1.add(rule); this.tolerance = (Double) ((Object[])rule.getParameters())[1]; this.matrix_Base = getMatrix(mfRange1.getIsotopeCount()); this.mfRange = mfRange1; this.rules = rules1; } /** * Method that actually does the work of extracting the molecular formula. * * @param mass molecular formula to create from the mass * @return the filled molecular formulas as IMolecularFormulaSet */ @TestMethod("testGenerate_double") public IMolecularFormulaSet generate(double mass) { if(mass <= 0.0){ logger.error("Proposed mass is not valid: ",mass); return null; } IMolecularFormula minimalMF = MolecularFormulaRangeManipulator.getMinimalFormula(mfRange,builder); IMolecularFormula maximalMF = MolecularFormulaRangeManipulator.getMaximalFormula(mfRange,builder); double massMim = MolecularFormulaManipulator.getTotalExactMass(minimalMF)-tolerance; double massMap = MolecularFormulaManipulator.getTotalExactMass(maximalMF)+tolerance; if(massMim > mass || massMap < mass){ logger.error("Proposed mass is out of the range: ",mass); return null; } IMolecularFormulaSet molecularFormulaSet = builder.newMolecularFormulaSet(); int[][] matrix = this.matrix_Base; int numberElements = mfRange.getIsotopeCount(); // put IIsotope into a list List<IIsotope> isotopes_TO = new ArrayList<IIsotope>(); Iterator<IIsotope> isIt = mfRange.isotopes().iterator(); while(isIt.hasNext()) isotopes_TO.add(isIt.next()); isotopes_TO = orderList(isotopes_TO); for(int i = 0; i < matrix.length ; i++){ /*constructing initial combinations*/ int[] value_In = new int[numberElements]; for(int j= 0; j < numberElements ; j++){ if(matrix[i][j] == 0) value_In[j] = 0; else value_In[j] = 1; } /*find number of element to combine*/ int count_E = 0; ArrayList<Integer> elem_Pos = new ArrayList<Integer>(); for(int j= 0 ; j< matrix[1].length; j++) if(value_In[j] != 0){ count_E++; elem_Pos.add(j); } boolean flag = true; /*first position those first starting at the left*/ int possChan = 0; String lastMFString = ""; while(flag){ // // print all combinations -------------------------------------------------- // System.out.print(elem_Pos.get(possChan).intValue()+">"); // for(int j= 0 ; j< matrix[1].length; j++) // System.out.print(isotopes_TO.get(j).getSymbol()+value_In[j]+"-"); // System.out.println(); // // print all combinations -------------------------------------------------- // control if some of the element is contained. E.g. C(1-3)H(1-3) // the matrix 01 or 10 can not exist boolean flagBreak = false; for(int j= 0 ; j< matrix[1].length; j++){ int min = mfRange.getIsotopeCountMin(isotopes_TO.get(j)); if(value_In[j] == 0) if(min != 0) flagBreak = true; } if(flagBreak) break; /*Find max occurence given a mass for a element with minimal elements*/ int occurence = getMaxOccurence(mass, elem_Pos.get(possChan).intValue(),value_In,isotopes_TO); /*at least one*/ if (occurence == 0) break; int maxx = mfRange.getIsotopeCountMax(isotopes_TO.get(elem_Pos.get(possChan).intValue())); int minn = mfRange.getIsotopeCountMin(isotopes_TO.get(elem_Pos.get(possChan).intValue())); /*restriction of the number of max and min number for a element*/ if(occurence < minn | maxx < occurence){ /* when is not in the occurrence that means that we have to * restart one value to the predecessor.*/ if (possChan < elem_Pos.size()-1){ /*Means that is possible to fit the next*/ if (maxx < occurence) value_In[elem_Pos.get(possChan).intValue()] = maxx; possChan++; }else{ boolean foundZ = false; for(int z= possChan-1; z >= 0 ; z--){ if (value_In[elem_Pos.get(z).intValue()] != 1){ possChan = z; foundZ = true; int newValue = value_In[elem_Pos.get(possChan).intValue()]-1; value_In[elem_Pos.get(possChan).intValue()] = newValue; for(int j= possChan+1; j < elem_Pos.size() ; j++){ int p = elem_Pos.get(j).intValue(); value_In[p] = 1; } possChan++; break; } } if(!foundZ) break; } continue; } /*final not occurrence*/ /*set the occurrence into the matrix*/ value_In[elem_Pos.get(possChan).intValue()] = occurence; double massT = calculateMassT(isotopes_TO,value_In); double diff_new = Math.abs(mass - (massT)); if(diff_new < tolerance){ IMolecularFormula myMF = getFormula(isotopes_TO,value_In); String newMFString = MolecularFormulaManipulator.getString(myMF); if(!newMFString.equals(lastMFString)){ molecularFormulaSet.addMolecularFormula(myMF); lastMFString = newMFString; } } if(count_E == 1)/*only valid for the first random 1000*/ break; if (possChan < elem_Pos.size()-1){ /*Means that is possible to fit the next*/ // value_In[elem_Pos.get(possChan).intValue()] = maxx; possChan++; }else{ boolean foundZ = false; for(int z= possChan-1; z >= 0 ; z--){ if (value_In[elem_Pos.get(z).intValue()] != 1){ possChan = z; foundZ = true; int newValue = value_In[elem_Pos.get(possChan).intValue()]-1; value_In[elem_Pos.get(possChan).intValue()] = newValue; for(int j= possChan+1; j < elem_Pos.size() ; j++){ int p = elem_Pos.get(j).intValue(); value_In[p] = 1; } possChan++; break; } } if(!foundZ) break; } } } return returnOrdered(mass, molecularFormulaSet); } /** * Put the order the List of IIsotope according the probability occurrence. * * @param isotopes_TO The List of IIsotope * @return The list of IIsotope ordered */ private List<IIsotope> orderList(List<IIsotope> isotopes_TO) { List<IIsotope> newOrderList = new ArrayList<IIsotope>(); for(int i = 0 ; i < orderElements.length; i++){ String symbol = orderElements[i]; Iterator<IIsotope> itIso = isotopes_TO.iterator(); while(itIso.hasNext()){ IIsotope isotopeToCo = itIso.next(); if(isotopeToCo.getSymbol().equals(symbol)){ newOrderList.add(isotopeToCo); } } } return newOrderList; } /** * generate the order of the Elements according probability occurrence., * beginning the C, H, O, N, Si, P, S, F, Cl, Br, I, Sn, B, Pb, Tl, Ba, In, Pd, * Pt, Os, Ag, Zr, Se, Zn, Cu, Ni, Co, Fe, Cr, Ti, Ca, K, Al, Mg, Na, Ce, * Hg, Au, Ir, Re, W, Ta, Hf, Lu, Yb, Tm, Er, Ho, Dy, Tb, Gd, Eu, Sm, Pm, * Nd, Pr, La, Cs, Xe, Te, Sb, Cd, Rh, Ru, Tc, Mo, Nb, Y, Sr, Rb, Kr, As, * Ge, Ga, Mn, V, Sc, Ar, Ne, Be, Li, Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, * Th, Pa, U, Np, Pu. * * @return Array with the elements ordered. * */ private String[] generateOrderE(){ String[] listElements = new String[]{ "C", "H", "O", "N", "Si", "P", "S", "F", "Cl", "Br", "I", "Sn", "B", "Pb", "Tl", "Ba", "In", "Pd", "Pt", "Os", "Ag", "Zr", "Se", "Zn", "Cu", "Ni", "Co", "Fe", "Cr", "Ti", "Ca", "K", "Al", "Mg", "Na", "Ce", "Hg", "Au", "Ir", "Re", "W", "Ta", "Hf", "Lu", "Yb", "Tm", "Er", "Ho", "Dy", "Tb", "Gd", "Eu", "Sm", "Pm", "Nd", "Pr", "La", "Cs", "Xe", "Te", "Sb", "Cd", "Rh", "Ru", "Tc", "Mo", "Nb", "Y", "Sr", "Rb", "Kr", "As", "Ge", "Ga", "Mn", "V", "Sc", "Ar", "Ne", "Be", "Li", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu"}; return listElements; } /** * Get the maximal occurrence of this List. * * @param massTo * @param element_pos * @param matrix * @param elemToCond_new * @return The occurrence value */ private int getMaxOccurence(double massTo, int element_pos, int[] matrix,List<IIsotope> isoToCond_new) { double massIn = isoToCond_new.get(element_pos).getExactMass(); double massToM = massTo; for(int i = 0; i < matrix.length ; i++) if (i != element_pos) if(matrix[i] != 0) massToM -= isoToCond_new.get(i).getExactMass()*matrix[i]; int value = (int)((massToM+1)/massIn); return value; } /** * Set the formula molecular as IMolecularFormula object. * * @param elemToCond_new List with IIsotope * @param value_In Array matrix with occurrences * @return The IMolecularFormula */ private IMolecularFormula getFormula(List<IIsotope> isoToCond_new, int[] value_In) { IMolecularFormula mf = builder.newMolecularFormula();; for(int i = 0; i < isoToCond_new.size() ; i++){ if(value_In[i] != 0){ for(int j = 0 ; j < value_In[i] ; j ++) mf.addIsotope(isoToCond_new.get(i)); } } mf = putInOrder(mf); return mf; } /** * Put in order the elements of the molecular formula. * * @param formula The IMolecularFormula to put in order * @return IMolecularFormula object */ private IMolecularFormula putInOrder(IMolecularFormula formula) { IMolecularFormula new_formula = formula.getBuilder().newMolecularFormula(); for(int i = 0 ; i < orderElements.length; i++){ IElement element = builder.newElement(orderElements[i]); if(MolecularFormulaManipulator.containsElement(formula,element)){ Iterator<IIsotope> isotopes = MolecularFormulaManipulator.getIsotopes(formula, element).iterator(); while(isotopes.hasNext()){ IIsotope isotope = isotopes.next(); new_formula.addIsotope(isotope,formula.getIsotopeCount(isotope)); } } } // new_mf.setCharge(charge); return new_formula; } /** * Calculate the mass total given the elements and their respective occurrences. * * @param elemToCond_new The IIsotope to calculate * @param value_In Array matrix with occurrences * @return The sum total */ private double calculateMassT(List<IIsotope> isoToCond_new, int[] value_In) { double result = 0; for(int i = 0; i < isoToCond_new.size() ; i++){ if(value_In[i] != 0){ result += isoToCond_new.get(i).getExactMass()*value_In[i]; } } return result; } /** * Return all molecular formulas but ordered according the tolerance difference between masses. * * @param mass The mass to analyze * @param formulaSet The IMolecularFormulaSet to order * @return The IMolecularFormulaSet ordered */ private IMolecularFormulaSet returnOrdered(double mass, IMolecularFormulaSet formulaSet){ IMolecularFormulaSet solutions_new = null; if(formulaSet.size() != 0){ double valueMin = 100; int i_final = 0; solutions_new = formulaSet.getBuilder().newMolecularFormulaSet(); List<Integer> listI = new ArrayList<Integer>(); for (int j = 0; j < formulaSet.size() ; j++){ for (int i = 0; i < formulaSet.size() ; i++){ if(listI.contains(i)) continue; double value = MolecularFormulaManipulator.getTotalExactMass(formulaSet.getMolecularFormula(i)); double diff = Math.abs(mass - Math.abs(value)); if (valueMin > diff){ valueMin = diff; i_final = i; } } valueMin = 100; solutions_new.addMolecularFormula(formulaSet.getMolecularFormula(i_final)); listI.add(i_final); } } return solutions_new; } /** * Get the corresponding matrix and create it. * * @param size Size of the matrix to be created * @return the matrix with the permutations */ private int[][] getMatrix(int size){ logger.info("Creating matrix for isotopes combination"); int lengthM = (int) Math.pow(2, size); lengthM--;// less 1 because the matrix 00000 we don't need int[][] matrix = new int[lengthM][size]; int[] combi = new int[size]; for(int j = 0 ; j < size ; j ++){ combi[j] = 0; } int posChang = size - 1; int posRemov = size - 1; for(int i = 0 ; i < lengthM; i++){ // cleaning to zeros for(int j = posRemov ; j < size ; j ++){ combi[j] = 0; } combi[posChang] = 1; for(int j = 0 ; j < size ; j ++) matrix[i][j] = combi[j]; if(posChang == size - 1){ //find where is zero position, place to change for(int j = posChang ; j >= 0 ; j --){ if(combi[j] == 0){ posChang = j; posRemov = j+1; break; } } }else{ //look for the last zero for(int j = posChang ; j < size ; j ++){ if(combi[j] == 0){ posChang = j; } } } } return matrix; } }