/* $RCSfile$
* $Author$
* $Date$
* $Revision$
*
* Copyright (C) 2007 Miguel Rojasch <miguelrojasch@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.formula.rules;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IElement;
import org.openscience.cdk.interfaces.IMolecularFormula;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
/**
* This class validate if the occurrence of the IElements in the IMolecularFormula, for
* metabolites, are into a maximal limit according paper: . The study is from 2 different mass spectral
* databases and according different mass of the metabolites. The analysis don't
* take account if the IElement is not contained in the matrix. It will be jumped. <p>
* The rules is based from Tobias Kind paper with the title "Seven Golden Rules for heuristic
* filtering of molecular formula" {@cdk.cite kind2007}.
*
* <p>This rule uses these parameters:
* <table border="1">
* <tr>
* <td>Name</td>
* <td>Default</td>
* <td>Description</td>
* </tr>
* <tr>
* <td>database</td>
* <td>willey</td>
* <td>Mass spectral databases extraction</td>
* </tr>
* <tr>
* <td>massRange</td>
* <td>< 500</td>
* <td>Mass to take account</td>
* </tr>
* </table>
*
* @cdk.module formula
* @author miguelrojasch
* @cdk.created 2007-11-20
*/
public class MMElementRule implements IRule{
/** Database used. As default Willey.*/
private Database databaseUsed = Database.WILEY;
/** Mass range used. As default lower than 500.*/
private RangeMass rangeMassUsed = RangeMass.Minus500;
private HashMap<String, Integer> hashMap;
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(MMElementRule.class);
/** A enumeration of the possible mass range
* according the rules */
public static enum RangeMass {
/** IMolecularFormula from a metabolite with a mass lower than 500 Da. */
Minus500,
/** IMolecularFormula from a metabolite with a mass lower than 1000 Da. */
Minus1000,
/** IMolecularFormula from a metabolite with a mass lower than 2000 Da. */
Minus2000,
/** IMolecularFormula from a metabolite with a mass lower than 3000 Da. */
Minus3000
}
/** A enumeration of the possible databases
* according the rules */
public static enum Database {
/** Wiley mass spectral database. */
WILEY,
/** Dictionary of Natural Products Online mass spectral database. */
DNP
}
/**
* Constructor for the MMElementRule object.
*
* @throws IOException If an error occurs when reading atom type information
* @throws ClassNotFoundException If an error occurs during tom typing
*/
public MMElementRule(){
// initiate Hashmap default
this.hashMap = getWisley_500();
}
/**
* Sets the parameters attribute of the MMElementRule object.
*
* @param params The new parameters value
* @throws CDKException Description of the Exception
*
* @see #getParameters
*/
public void setParameters(Object[] params) throws CDKException {
if (params.length > 2)
throw new CDKException("MMElementRule only expects maximal two parameters");
if(params[0] != null){
if (!(params[0] instanceof Database) )
throw new CDKException("The parameter must be of type Database enum");
databaseUsed = (Database) params[0];
}
if(params.length > 1 && params[1] != null){
if (!(params[1] instanceof RangeMass) )
throw new CDKException("The parameter must be of type RangeMass enum");
rangeMassUsed = (RangeMass) params[1];
}
if((databaseUsed == Database.DNP)&&(rangeMassUsed == RangeMass.Minus500))
this.hashMap = getDNP_500();
else if((databaseUsed == Database.DNP)&&(rangeMassUsed == RangeMass.Minus1000))
this.hashMap = getDNP_1000();
else if((databaseUsed == Database.DNP)&&(rangeMassUsed == RangeMass.Minus2000))
this.hashMap = getDNP_2000();
else if((databaseUsed == Database.DNP)&&(rangeMassUsed == RangeMass.Minus3000))
this.hashMap = getDNP_3000();
else if((databaseUsed == Database.WILEY)&&(rangeMassUsed == RangeMass.Minus500))
this.hashMap = getWisley_500();
else if((databaseUsed == Database.WILEY)&&(rangeMassUsed == RangeMass.Minus1000))
this.hashMap = getWisley_1000();
else if((databaseUsed == Database.WILEY)&&(rangeMassUsed == RangeMass.Minus2000))
this.hashMap = getWisley_2000();
}
/**
* Gets the parameters attribute of the MMElementRule object.
*
* @return The parameters value
* @see #setParameters
*/
public Object[] getParameters() {
// return the parameters as used for the rule validation
Object[] params = new Object[2];
params[0] = databaseUsed;
params[1] = rangeMassUsed;
return params;
}
/**
* Validate the occurrence of this IMolecularFormula.
*
* @param formula Parameter is the IMolecularFormula
* @return An ArrayList containing 9 elements in the order described above
*/
public double validate(IMolecularFormula formula) throws CDKException {
logger.info("Start validation of ",formula);
double isValid = 1.0;
Iterator<IElement> itElem = MolecularFormulaManipulator.elements(formula).iterator();
while(itElem.hasNext()){
IElement element = itElem.next();
int occur = MolecularFormulaManipulator.getElementCount(formula, element);
if(occur > hashMap.get(element.getSymbol())){
isValid = 0.0;
break;
}
}
return isValid;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the DNP database and mass lower than 500 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getDNP_500(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 29);
map.put("H", 72);
map.put("N", 10);
map.put("O", 18);
map.put("P", 4);
map.put("S", 7);
map.put("F", 15);
map.put("Cl", 8);
map.put("Br", 5);
return map;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the DNP database and mass lower than 1000 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getDNP_1000(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 66);
map.put("H", 126);
map.put("N", 25);
map.put("O", 27);
map.put("P", 6);
map.put("S", 8);
map.put("F", 16);
map.put("Cl", 11);
map.put("Br", 8);
return map;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the DNP database and mass lower than 2000 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getDNP_2000(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 115);
map.put("H", 236);
map.put("N", 32);
map.put("O", 63);
map.put("P", 6);
map.put("S", 8);
map.put("F", 16);
map.put("Cl", 11);
map.put("Br", 8);
return map;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the DNP database and mass lower than 3000 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getDNP_3000(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 162);
map.put("H", 208);
map.put("N", 48);
map.put("O", 78);
map.put("P", 6);
map.put("S", 9);
map.put("F", 16);
map.put("Cl", 11);
map.put("Br", 8);
return map;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the Wisley database and mass lower than 500 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getWisley_500(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 39);
map.put("H", 72);
map.put("N", 20);
map.put("O", 20);
map.put("P", 9);
map.put("S", 10);
map.put("F", 16);
map.put("Cl", 10);
map.put("Br", 4);
map.put("Br", 8);
return map;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the Wisley database and mass lower than 1000 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getWisley_1000(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 78);
map.put("H", 126);
map.put("N", 20);
map.put("O", 27);
map.put("P", 9);
map.put("S", 14);
map.put("F", 34);
map.put("Cl", 12);
map.put("Br", 8);
map.put("Si", 14);
return map;
}
/**
* Get the map linking the symbol of the element and number maximum of occurrence.
* For the analysis with the Wisley database and mass lower than 2000 Da.
*
* @return The HashMap of the symbol linked with the maximum occurrence
*/
private HashMap<String, Integer> getWisley_2000(){
HashMap<String, Integer> map = new HashMap<String, Integer>();
map.put("C", 156);
map.put("H", 180);
map.put("N", 20);
map.put("O", 40);
map.put("P", 9);
map.put("S", 14);
map.put("F", 48);
map.put("Cl", 12);
map.put("Br", 10);
map.put("Si", 15);
return map;
}
}