/* $Revision$ $Author$ $Date$
*
* Copyright (C) 2007 Miguel Rojasch <miguelrojasch@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* */
package org.openscience.cdk.tools.manipulator;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.config.AtomTypeFactory;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* Class with convenience methods that provide methods to manipulate
* {@link IMolecularFormula}'s. For example:
*
*
* @cdk.module formula
* @author miguelrojasch
* @cdk.created 2007-11-20
*/
@TestClass("org.openscience.cdk.formula.MolecularFormulaManipulatorTest")
public class MolecularFormulaManipulator {
/**
* Checks a set of Nodes for the occurrence of each isotopes
* instance in the molecular formula. In short number of atoms.
*
* @param formula The MolecularFormula to check
* @return The occurrence total
*/
@TestMethod("testGetAtomCount_IMolecularFormula")
public static int getAtomCount(IMolecularFormula formula){
int count = 0;
for (IIsotope isotope : formula.isotopes()) {
count += formula.getIsotopeCount(isotope);
}
return count;
}
/**
* Checks a set of Nodes for the occurrence of the isotopes in the
* molecular formula from a particular IElement. It returns 0 if the
* element does not exist. The search is based only on the IElement.
*
*@param formula The MolecularFormula to check
*@param element The IElement object
*@return The occurrence of this element in this molecular formula
*/
@TestMethod("testGetElementCount_IMolecularFormula_IElement")
public static int getElementCount(IMolecularFormula formula, IElement element){
int count = 0;
for (IIsotope isotope : formula.isotopes()) {
if (isotope.getSymbol().equals(element.getSymbol()))
count += formula.getIsotopeCount(isotope);
}
return count;
}
/**
* Get a list of IIsotope from a given IElement which is contained
* molecular. The search is based only on the IElement.
*
*@param formula The MolecularFormula to check
*@param element The IElement object
*@return The list with the IIsotopes in this molecular formula
*/
@TestMethod("testGetIsotopes_IMolecularFormula_IElement")
public static List<IIsotope> getIsotopes(IMolecularFormula formula, IElement element){
List<IIsotope> isotopeList = new ArrayList<IIsotope>();
for (IIsotope isotope : formula.isotopes()) {
if (isotope.getSymbol().equals(element.getSymbol()))
isotopeList.add(isotope);
}
return isotopeList;
}
/**
* Get a list of all Elements which are contained
* molecular.
*
*@param formula The MolecularFormula to check
*@return The list with the IElements in this molecular formula
*/
@TestMethod("testElements_IMolecularFormula")
public static List<IElement> elements(IMolecularFormula formula){
List<IElement> elementList = new ArrayList<IElement>();
List<String> stringList = new ArrayList<String>();
for (IIsotope isotope : formula.isotopes()) {
if (!stringList.contains(isotope.getSymbol())) {
elementList.add(isotope);
stringList.add(isotope.getSymbol());
}
}
return elementList;
}
/**
* True, if the MolecularFormula contains the given element as IIsotope object.
*
* @param formula IMolecularFormula molecularFormula
* @param element The element this MolecularFormula is searched for
* @return True, if the MolecularFormula contains the given element object
*/
@TestMethod("testContainsElement_IMolecularFormula_IElement")
public static boolean containsElement(IMolecularFormula formula, IElement element){
for (IIsotope isotope : formula.isotopes()) {
if (element.getSymbol().equals(isotope.getSymbol())) return true;
}
return false;
}
/**
* Removes all isotopes from a given element in the MolecularFormula.
*
* @param formula IMolecularFormula molecularFormula
* @param element The IElement of the IIsotopes to be removed
* @return The molecularFormula with the isotopes removed
*/
@TestMethod("testRemoveElement_IMolecularFormula_IElement")
public static IMolecularFormula removeElement(IMolecularFormula formula, IElement element){
for (IIsotope isotope : getIsotopes(formula, element)) {
formula.removeIsotope(isotope);
}
return formula;
}
/**
* Returns the string representation of the molecule formula.
*
* @param formula The IMolecularFormula Object
* @param orderElements The order of Elements
* @param setOne True, when must be set the value 1 for elements with
* one atom
* @return A String containing the molecular formula
*
* @see #getHTML(IMolecularFormula)
* @see #generateOrderEle()
* @see #generateOrderEle_Hill_NoCarbons()
* @see #generateOrderEle_Hill_WithCarbons()
*
*/
@TestMethod("testGetString_IMolecularFormula_arrayString_boolean")
public static String getString(IMolecularFormula formula, String[] orderElements, boolean setOne) {
String stringMF = "";
List<IIsotope> isotopesList = putInOrder(orderElements, formula);
for (IIsotope isotope : isotopesList) {
int elemCount = getElementCount(formula, isotope);
if (elemCount == 1 && !setOne)
stringMF = stringMF + isotope.getSymbol();
else
stringMF = stringMF + isotope.getSymbol() + getElementCount(formula, isotope);
}
return stringMF;
}
/**
* Returns the string representation of the molecule formula.
* Based on Hill System. The Hill system is a system of writing
* chemical formulas such that the number of carbon atoms in a
* molecule is indicated first, the number of hydrogen atoms next,
* and then the number of all other chemical elements subsequently,
* in alphabetical order. When the formula contains no carbon, all
* the elements, including hydrogen, are listed alphabetically.
*
* @param formula The IMolecularFormula Object
* @return A String containing the molecular formula
*
* @see #getHTML(IMolecularFormula)
*/
@TestMethod("testGetString_IMolecularFormula")
public static String getString(IMolecularFormula formula) {
return getString(formula, false);
}
/**
* Returns the string representation of the molecule formula.
* Based on Hill System. The Hill system is a system of writing
* chemical formulas such that the number of carbon atoms in a
* molecule is indicated first, the number of hydrogen atoms next,
* and then the number of all other chemical elements subsequently,
* in alphabetical order. When the formula contains no carbon, all
* the elements, including hydrogen, are listed alphabetically.
*
* @param formula The IMolecularFormula Object
* @param setOne True, when must be set the value 1 for elements with
* one atom
* @return A String containing the molecular formula
*
* @see #getHTML(IMolecularFormula)
*/
@TestMethod("testGetString_IMolecularFormula_boolean")
public static String getString(IMolecularFormula formula, boolean setOne) {
if(containsElement(formula, formula.getBuilder().newElement("C")))
return getString(formula, generateOrderEle_Hill_WithCarbons(), setOne);
else
return getString(formula, generateOrderEle_Hill_NoCarbons(), setOne);
}
public static List<IIsotope> putInOrder(String[] orderElements, IMolecularFormula formula) {
List<IIsotope> isotopesList = new ArrayList<IIsotope>();
for (String orderElement : orderElements) {
IElement element = formula.getBuilder().newElement(orderElement);
if (containsElement(formula, element)) {
List<IIsotope> isotopes = getIsotopes(formula, element);
for (IIsotope isotope : isotopes) {
isotopesList.add(isotope);
}
}
}
return isotopesList;
}
@TestMethod("testGetHillString_IMolecularFormula")
public static String getHillString(IMolecularFormula formula) {
StringBuffer hillString = new StringBuffer();
Map<String, Integer> hillMap = new TreeMap<String, Integer>();
for (IIsotope isotope : formula.isotopes()) {
String symbol = isotope.getSymbol();
if (hillMap.containsKey(symbol))
hillMap.put(symbol, hillMap.get(symbol) + formula.getIsotopeCount(isotope));
else hillMap.put(symbol, formula.getIsotopeCount(isotope));
}
// if we have a C append it and also add in the H
// and then remove these elements
int count;
if (hillMap.containsKey("C")) {
hillString.append("C");
count = hillMap.get("C");
if (count> 1) hillString.append(count);
hillMap.remove("C");
if (hillMap.containsKey("H")) {
hillString.append("H");
count = hillMap.get("H");
if (count > 1) hillString.append(count);
hillMap.remove("H");
}
}
// now take all the rest in alphabetical order
for (String key : hillMap.keySet()) {
hillString.append(key);
count = hillMap.get(key);
if (count > 1) hillString.append(count);
}
return hillString.toString();
}
/**
* Returns the string representation of the molecule formula with
* numbers wrapped in <sub></sub> tags.
* Useful for displaying formulae in Swing components or on the web.
*
*
* @param formula The IMolecularFormula object
* @return A HTML representation of the molecular formula
* @see #getHTML(IMolecularFormula, boolean, boolean)
*
*/
@TestMethod("testGetHTML_IMolecularFormula")
public static String getHTML(IMolecularFormula formula) {
return getHTML(formula, false, false);
}
/**
* Returns the string representation of the molecule formula with
* numbers wrapped in <sub></sub> tags and the isotope
* of each Element in <sup></sup> tags and the total
* charge of IMolecularFormula in <sup></sup> tags.
* Useful for displaying formulae in Swing components or on the web.
*
*
* @param formula The IMolecularFormula object
* @param chargeB True, If it has to show the charge
* @param isotopeB True, If it has to show the Isotope mass
* @return A HTML representation of the molecular formula
* @see #getHTML(IMolecularFormula)
*
*/
@TestMethod("testGetHTML_IMolecularFormula_boolean_boolean")
public static String getHTML(IMolecularFormula formula, boolean chargeB, boolean isotopeB) {
String htmlString = "";
String[] orderElements = generateOrderEle();
for (String orderElement : orderElements) {
IElement element = formula.getBuilder().newElement(orderElement);
if (containsElement(formula, element)) {
if (!isotopeB) {
String eleToAdd = element.getSymbol() + "<sub>" + getElementCount(formula, element) + "</sub>";
htmlString += eleToAdd;
} else {
for (IIsotope isotope : getIsotopes(formula, element)) {
String isoToAdd = "<sup>" + isotope.getMassNumber() + "</sup>"
+ isotope.getSymbol() + "<sub>" + formula.getIsotopeCount(isotope) + "</sub>";
htmlString += isoToAdd;
}
}
}
}
if(chargeB){
Integer charge = formula.getCharge();
if((charge == CDKConstants.UNSET) || (charge == 0)){
return htmlString;
} else if (charge < 0) {
return htmlString + "<sup>" + charge * -1 + "-" + "</sup>";
} else {
return htmlString + "<sup>" + charge +"+" + "</sup>";
}
}
return htmlString;
}
/**
* Construct an instance of IMolecularFormula, initialized with a molecular
* formula string. The string is immediately analyzed and a set of Nodes
* is built based on this analysis
* <p> The hydrogens must be implicit.
*
* @param stringMF The molecularFormula string
* @param builder a IChemObjectBuilder which is used to construct atoms
* @return The filled IMolecularFormula
* @see #getMolecularFormula(String,IMolecularFormula)
*/
@TestMethod("testGetMolecularFormula_String_IChemObjectBuilder")
public static IMolecularFormula getMolecularFormula(String stringMF, IChemObjectBuilder builder) {
return getMolecularFormula(stringMF, false, builder);
}
/**
* Construct an instance of IMolecularFormula, initialized with a molecular
* formula string. The string is immediately analyzed and a set of Nodes
* is built based on this analysis. The hydrogens must be implicit. Major
* isotopes are being used.
*
* @param stringMF The molecularFormula string
* @param builder a IChemObjectBuilder which is used to construct atoms
* @return The filled IMolecularFormula
* @see #getMolecularFormula(String,IMolecularFormula)
*/
@TestMethod("testGetMajorIsotopeMolecularFormula_String_IChemObjectBuilder")
public static IMolecularFormula getMajorIsotopeMolecularFormula(String stringMF, IChemObjectBuilder builder) {
return getMolecularFormula(stringMF, true, builder);
}
private static IMolecularFormula getMolecularFormula(String stringMF, boolean assumeMajorIsotope, IChemObjectBuilder builder) {
IMolecularFormula formula = builder.newMolecularFormula();
return getMolecularFormula(stringMF, formula, assumeMajorIsotope);
}
/**
* add in a instance of IMolecularFormula the elements extracts form
* molecular formula string. The string is immediately analyzed and a set of Nodes
* is built based on this analysis
* <p> The hydrogens must be implicit.
*
* @param stringMF The molecularFormula string
* @return The filled IMolecularFormula
* @see #getMolecularFormula(String, IChemObjectBuilder)
*/
@TestMethod("testGetMolecularFormula_String_IMolecularFormula")
public static IMolecularFormula getMolecularFormula(String stringMF, IMolecularFormula formula) {
return getMolecularFormula(stringMF, formula, false);
}
/**
* Add to an instance of IMolecularFormula the elements extracts form
* molecular formula string. The string is immediately analyzed and a set of Nodes
* is built based on this analysis. The hydrogens are assumed to be implicit.
* The boolean indicates if the major isotope is to be assumed, or if no
* assumption is to be made.
*
* @param stringMF The molecularFormula string
* @param assumeMajorIsotope If true, it will take the major isotope for each element
* @return The filled IMolecularFormula
* @see #getMolecularFormula(String, org.openscience.cdk.interfaces.IChemObjectBuilder)
* @see #getMolecularFormula(String, boolean, org.openscience.cdk.interfaces.IChemObjectBuilder)
*/
private static IMolecularFormula getMolecularFormula(String stringMF, IMolecularFormula formula, boolean assumeMajorIsotope) {
if(stringMF.contains(".") || stringMF.contains("(") || stringMF.charAt(0) >= '0' && stringMF.charAt(0) <= '9')
stringMF = simplifyMolecularFormula(stringMF);
// Extract charge from String when contains []X- format
Integer charge = null;
if((stringMF.contains("[") && stringMF.contains("]") )
&& (stringMF.contains("+") || stringMF.contains("-"))){
charge = extractCharge(stringMF);
stringMF = cleanMFfromCharge(stringMF);
}
// FIXME: MF: variables with lower case first char
char ThisChar;
/*
* Buffer for
*/
String RecentElementSymbol = new String();
String RecentElementCountString = new String("0");
/*
* String to be converted to an integer
*/
int RecentElementCount;
if (stringMF.length() == 0) {
return null;
}
for (int f = 0; f < stringMF.length(); f++) {
ThisChar = stringMF.charAt(f);
if (f < stringMF.length()) {
if (ThisChar >= 'A' && ThisChar <= 'Z') {
/*
* New Element begins
*/
RecentElementSymbol = java.lang.String.valueOf(ThisChar);
RecentElementCountString = "0";
}
if (ThisChar >= 'a' && ThisChar <= 'z') {
/*
* Two-letter Element continued
*/
RecentElementSymbol += ThisChar;
}
if (ThisChar >= '0' && ThisChar <= '9') {
/*
* Two-letter Element continued
*/
RecentElementCountString += ThisChar;
}
}
if (f == stringMF.length() - 1 || (stringMF.charAt(f + 1) >= 'A' && stringMF.charAt(f + 1) <= 'Z')) {
/*
* Here an element symbol as well as its number should have been read completely
*/
RecentElementCount = Integer.valueOf(RecentElementCountString);
if (RecentElementCount == 0) {
RecentElementCount = 1;
}
IIsotope isotope = formula.getBuilder().newIsotope(RecentElementSymbol);
if (assumeMajorIsotope) {
try {
isotope = IsotopeFactory.getInstance(isotope.getBuilder()).getMajorIsotope(RecentElementSymbol);
} catch (IOException e) {
throw new RuntimeException("Cannot load the IsotopeFactory");
}
}
formula.addIsotope(isotope, RecentElementCount);
}
}
if(charge != null)
formula.setCharge(charge);
return formula;
}
/**
* Extract the molecular formula when it is defined with charge. e.g. [O3S]2-.
* @param formula The formula to inspect
* @return The corrected formula
*/
private static String cleanMFfromCharge(String formula) {
if(!(formula.contains("[") && formula.contains("]") ))
return formula;
boolean startBreak = false;
String finalFormula = "";
for (int f = 0; f < formula.length(); f++) {
char thisChar = formula.charAt(f);
if(thisChar == '['){
// start
startBreak = true;
}else if(thisChar == ']'){
break;
}else if(startBreak)
finalFormula += thisChar;
}
return finalFormula;
}
/**
* Extract the charge given a molecular formula format [O3S]2-.
*
* @param formula The formula to inspect
* @return The charge
*/
private static int extractCharge(String formula) {
if(!((formula.contains("[") && formula.contains("]"))
&& (formula.contains("+") || formula.contains("-"))))
return 0;
boolean finishBreak = false;
String multiple = "";
for (int f = 0; f < formula.length(); f++) {
char thisChar = formula.charAt(f);
if(thisChar == ']'){
// finish
finishBreak = true;
}else if(thisChar == '-'){
multiple = thisChar + multiple;
break;
}else if(thisChar == '+' )
break;
else if(finishBreak)
multiple += thisChar;
}
if(multiple.equals("") || multiple.equals("-"))
multiple += 1;
return new Integer(multiple);
}
/**
* Get the summed exact mass of all isotopes from an MolecularFormula. It
* assumes isotope masses to be preset, and returns 0.0 if not.
*
* @param formula The IMolecularFormula to calculate
* @return The summed exact mass of all atoms in this MolecularFormula
*/
@TestMethod("testGetTotalExactMass_IMolecularFormula")
public static double getTotalExactMass(IMolecularFormula formula) {
Double mass = 0.0;
IChemObjectBuilder builder = formula.getBuilder();
for (IIsotope isotope : formula.isotopes()) {
if (isotope.getExactMass() == CDKConstants.UNSET) {
try {
mass += IsotopeFactory.getInstance(builder).getMajorIsotope(isotope.getSymbol()).getExactMass() *
formula.getIsotopeCount(isotope);
} catch (IOException e) {
throw new RuntimeException("Could not instantiate the IsotopeFactory.");
}
} else
mass += isotope.getExactMass() * formula.getIsotopeCount(isotope);
}
if(formula.getCharge() != null)
mass = correctMass(mass,formula.getCharge());
return mass;
}
/**
* Correct the mass according the charge of the IMmoleculeFormula.
* Negative charge will add the mass of one electron to the mass.
*
* @param mass The mass to correct
* @param charge The charge
* @return The mass with the correction
*/
private static double correctMass(double mass, Integer charge) {
double massE = 0.00054857990927;
if(charge > 0)
mass -= massE*charge;
else if(charge < 0)
mass += massE*Math.abs(charge);
return mass;
}
/**
* Get the summed mass number of all isotopes from an MolecularFormula. It
* assumes isotope masses to be preset, and returns 0.0 if not.
*
* @param formula The IMolecularFormula to calculate
* @return The summed nominal mass of all atoms in this MolecularFormula
*/
@TestMethod("testGetTotalMassNumber_IMolecularFormula")
public static double getTotalMassNumber(IMolecularFormula formula) {
double mass = 0.0;
for (IIsotope isotope : formula.isotopes()) {
try {
IIsotope isotope2 = IsotopeFactory.getInstance(formula.getBuilder()).getMajorIsotope(isotope.getSymbol());
mass += isotope2.getAtomicNumber() * formula.getIsotopeCount(isotope);
} catch (IOException e) {
e.printStackTrace();
}
}
return mass;
}
/**
* Get the summed natural mass of all elements from an MolecularFormula.
*
* @param formula The IMolecularFormula to calculate
* @return The summed exact mass of all atoms in this MolecularFormula
*/
@TestMethod("testGetNaturalExactMass_IMolecularFormula")
public static double getNaturalExactMass(IMolecularFormula formula) {
double mass = 0.0;
IsotopeFactory factory;
try {
factory = IsotopeFactory.getInstance(formula.getBuilder());
} catch (IOException e) {
throw new RuntimeException("Could not instantiate the IsotopeFactory.");
}
for (IIsotope isotope : formula.isotopes()) {
IElement isotopesElement = isotope.getBuilder().newElement(isotope);
mass += factory.getNaturalMass(isotopesElement) * formula.getIsotopeCount(isotope);
}
return mass;
}
/**
* Get the summed major isotopic mass of all elements from an MolecularFormula.
*
* @param formula The IMolecularFormula to calculate
* @return The summed exact major isotope masses of all atoms in this MolecularFormula
*/
@TestMethod("testGetMajorIsotopeMass_IMolecularFormula")
public static double getMajorIsotopeMass(IMolecularFormula formula) {
double mass = 0.0;
IsotopeFactory factory;
try {
factory = IsotopeFactory.getInstance(formula.getBuilder());
} catch (IOException e) {
throw new RuntimeException("Could not instantiate the IsotopeFactory.");
}
for (IIsotope isotope : formula.isotopes()) {
IIsotope major = factory.getMajorIsotope(isotope.getSymbol());
mass += major.getExactMass() * formula.getIsotopeCount(isotope);
}
return mass;
}
/**
* Get the summed natural abundance of all isotopes from an MolecularFormula. Assumes
* abundances to be preset, and will return 0.0 if not.
*
* @param formula The IMolecularFormula to calculate
* @return The summed natural abundance of all isotopes in this MolecularFormula
*/
@TestMethod("testGetTotalNaturalAbundance_IMolecularFormula")
public static double getTotalNaturalAbundance(IMolecularFormula formula) {
double abundance = 1.0;
for (IIsotope isotope : formula.isotopes()) {
if (isotope.getNaturalAbundance() == null) return 0.0;
abundance = abundance * Math.pow(isotope.getNaturalAbundance(), formula.getIsotopeCount(isotope));
}
return abundance/Math.pow(100,getAtomCount(formula));
}
/**
* Returns the number of double bond equivalents in this molecule.
*
* @param formula The IMolecularFormula to calculate
* @return The number of DBEs
* @throws CDKException if DBE cannot be be evaluated
*
* @cdk.keyword DBE
* @cdk.keyword double bond equivalent
*/
@TestMethod("testGetDBE_IMolecularFormula")
public static double getDBE(IMolecularFormula formula) throws CDKException{
int valencies[] = new int[5];
IAtomContainer ac = getAtomContainer(formula);
AtomTypeFactory factory = AtomTypeFactory.getInstance("org/openscience/cdk/config/data/structgen_atomtypes.xml", ac.getBuilder());
for (int f = 0; f < ac.getAtomCount(); f++) {
IAtomType[] types = factory.getAtomTypes(ac.getAtom(f).getSymbol());
if(types.length==0)
throw new CDKException("Calculation of double bond equivalents not possible due to problems with element "+ac.getAtom(f).getSymbol());
// valencies[(int) (types[0].getBondOrderSum() + ac.getAtom(f).getFormalCharge())]++;
valencies[types[0].getBondOrderSum().intValue()]++;
}
return 1 + (valencies[4]) + (valencies[3] /2) - (valencies[1] /2);
}
/**
* Method that actually does the work of convert the atomContainer
* to IMolecularFormula.
* <p> The hydrogens must be implicit.
*
* @param atomContainer IAtomContainer object
* @see #getMolecularFormula(IAtomContainer,IMolecularFormula)
* @return a molecular formula object
*/
@TestMethod("testGetMolecularFormula_IAtomContainer")
public static IMolecularFormula getMolecularFormula(IAtomContainer atomContainer) {
IMolecularFormula formula = atomContainer.getBuilder().newMolecularFormula();
return getMolecularFormula(atomContainer, formula);
}
/**
* Method that actually does the work of convert the atomContainer
* to IMolecularFormula given a IMolecularFormula.
* <p> The hydrogens must be implicit.
*
* @param atomContainer IAtomContainer object
* @param formula IMolecularFormula molecularFormula to put the new Isotopes
* @return the filled AtomContainer
* @see #getMolecularFormula(IAtomContainer)
*/
@TestMethod("testGetMolecularFormula_IAtomContainer_IMolecularFormula")
public static IMolecularFormula getMolecularFormula(IAtomContainer atomContainer, IMolecularFormula formula) {
int charge = 0;
for (IAtom iAtom : atomContainer.atoms()) {
formula.addIsotope(iAtom);
charge += iAtom.getFormalCharge();
}
formula.setCharge(charge);
return formula;
}
/**
* Method that actually does the work of convert the IMolecularFormula
* to IAtomContainer.
* <p> The hydrogens must be implicit.
*
* @param formula IMolecularFormula object
* @return the filled AtomContainer
* @see #getAtomContainer(IMolecularFormula, IAtomContainer)
*/
@TestMethod("testGetAtomContainer_IMolecularFormula")
public static IAtomContainer getAtomContainer(IMolecularFormula formula) {
IAtomContainer atomContainer = formula.getBuilder().newAtomContainer();
return getAtomContainer(formula, atomContainer);
}
/**
* Method that actually does the work of convert the IMolecularFormula
* to IAtomContainer given a IAtomContainer.
* <p> The hydrogens must be implicit.
*
* @param formula IMolecularFormula object
* @param atomContainer IAtomContainer to put the new Elements
* @return the filled AtomContainer
* @see #getAtomContainer(IMolecularFormula)
*/
@TestMethod("testGetAtomContainer_IMolecularFormula_IAtomContainer")
public static IAtomContainer getAtomContainer(IMolecularFormula formula, IAtomContainer atomContainer) {
for (IIsotope isotope : formula.isotopes()) {
int occur = formula.getIsotopeCount(isotope);
for (int i = 0; i < occur; i++)
atomContainer.addAtom(formula.getBuilder().newAtom(isotope));
}
return atomContainer;
}
/**
* Generate the order of the Elements according probability occurrence.,
* beginning the C, H, O, N, Si, P, S, F, Cl, Br, I, Sn, B, Pb, Tl, Ba, In, Pd,
* Pt, Os, Ag, Zr, Se, Zn, Cu, Ni, Co, Fe, Cr, Ti, Ca, K, Al, Mg, Na, Ce,
* Hg, Au, Ir, Re, W, Ta, Hf, Lu, Yb, Tm, Er, Ho, Dy, Tb, Gd, Eu, Sm, Pm,
* Nd, Pr, La, Cs, Xe, Te, Sb, Cd, Rh, Ru, Tc, Mo, Nb, Y, Sr, Rb, Kr, As,
* Ge, Ga, Mn, V, Sc, Ar, Ne, Be, Li, Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac,
* Th, Pa, U, Np, Pu.
*
* @return Array with the elements ordered
*
*/
public static String[] generateOrderEle(){
return new String[]{
"C", "H", "O", "N", "Si", "P", "S", "F", "Cl",
"Br", "I", "Sn", "B", "Pb", "Tl", "Ba", "In", "Pd",
"Pt", "Os", "Ag", "Zr", "Se", "Zn", "Cu", "Ni", "Co",
"Fe", "Cr", "Ti", "Ca", "K", "Al", "Mg", "Na", "Ce",
"Hg", "Au", "Ir", "Re", "W", "Ta", "Hf", "Lu", "Yb",
"Tm", "Er", "Ho", "Dy", "Tb", "Gd", "Eu", "Sm", "Pm",
"Nd", "Pr", "La", "Cs", "Xe", "Te", "Sb", "Cd", "Rh",
"Ru", "Tc", "Mo", "Nb", "Y", "Sr", "Rb", "Kr", "As",
"Ge", "Ga", "Mn", "V", "Sc", "Ar", "Ne", "Be", "Li",
"Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac",
"Th", "Pa", "U", "Np", "Pu",
"R"}; // Extract object typically from molecular formulas
}
/**
* Generate the order of the Elements according Hill system
* when doesn't contain carbons.
*
* @return Array with the elements ordered
*/
private static String[] generateOrderEle_Hill_NoCarbons(){
return new String[]{
"Ac", "Ag", "Al", "Ar", "As", "At", "Au",
"B", "Ba", "Be", "Bi", "Br", "Ca", "Cd", "Ce", "Cl", "Co", "Cr", "Cs", "Cu",
"Dy", "Er", "Eu", "F", "Fe", "Fr",
"Ga", "Gd", "Ge", "H", "Hf", "Hg", "Ho", "I", "In", "Ir",
"K", "Kr", "La", "Li", "Lu", "Mg", "Mn", "Mo",
"N", "Na", "Nb", "Nd", "Ne", "Ni", "Np", "O", "Os",
"P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
"Ra", "Rb", "Re", "Rh", "Rn", "Ru",
"S", "Sb", "Sc", "Se", "Si", "Sr", "Sm", "Sn",
"Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", "Tm",
"U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr",
"R"}; // Extract object typically from molecular formulas
}
/**
* Generate the order of the Elements according Hill system
* when contains carbons.
*
* @return Array with the elements ordered
*/
private static String[] generateOrderEle_Hill_WithCarbons(){
return new String[]{
"C", "H", "Ac", "Ag", "Al", "Ar", "As", "At", "Au",
"B", "Ba", "Be", "Bi", "Br", "Ca", "Cd", "Ce", "Cl", "Co", "Cr", "Cs", "Cu",
"Dy", "Er", "Eu", "F", "Fe", "Fr",
"Ga", "Gd", "Ge", "Hf", "Hg", "Ho", "I", "In", "Ir",
"K", "Kr", "La", "Li", "Lu", "Mg", "Mn", "Mo",
"N", "Na", "Nb", "Nd", "Ne", "Ni", "Np", "O", "Os",
"P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
"Ra", "Rb", "Re", "Rh", "Rn", "Ru",
"S", "Sb", "Sc", "Se", "Si", "Sr", "Sm", "Sn",
"Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", "Tm",
"U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr",
"R"}; // Extract object typically from molecular formulas
}
/**
* Compare two IMolecularFormula looking at type and number of IIsotope and
* charge of the formula.
*
* @param formula1 The first IMolecularFormula
* @param formula2 The second IMolecularFormula
* @return True, if the both IMolecularFormula are the same
*/
@TestMethod("testCompare_IMolecularFormula_IMolecularFormula")
public static boolean compare(IMolecularFormula formula1, IMolecularFormula formula2){
if(formula1.getCharge() != formula2.getCharge())
return false;
if(formula1.getIsotopeCount() != formula2.getIsotopeCount())
return false;
for (IIsotope isotope : formula1.isotopes()) {
if (!formula2.contains(isotope))
return false;
if (formula1.getIsotopeCount(isotope) != formula2.getIsotopeCount(isotope))
return false;
}
for (IIsotope isotope : formula2.isotopes()) {
if (!formula1.contains(isotope))
return false;
if (formula2.getIsotopeCount(isotope) != formula1.getIsotopeCount(isotope))
return false;
}
return true;
}
/**
* Returns a set of nodes excluding all the hydrogens
*
* @param formula The IMolecularFormula
* @return The heavyElements value into a List
*
* @cdk.keyword hydrogen, removal
*/
@TestMethod("testGetHeavyElements_IMolecularFormula")
public static List<IElement> getHeavyElements(IMolecularFormula formula) {
List<IElement> newEle = new ArrayList<IElement>();
for (IElement element : elements(formula)) {
if (!element.getSymbol().equals("H")) {
newEle.add(element);
}
}
return newEle;
}
/**
* Simplify the molecular formula. E.g the dot '.' character convention is
* used when dividing a formula into parts. In this case any numeral following a dot refers
* to all the elements within that part of the formula that follow it.
*
* @param formula The molecular formula
* @return The simplified molecular formula
*/
@TestMethod("testSimplifyMolecularFormula_String")
public static String simplifyMolecularFormula(String formula) {
String newFormula = formula;
char thisChar;
if(formula.contains(" ")){
newFormula = newFormula.replace(" ", "");
}
if(!formula.contains("."))
return breakExtractor(formula);
List<String> listMF = new ArrayList<String>();
while(newFormula.contains(".")){
int pos = newFormula.indexOf(".");
String thisFormula = newFormula.substring(0, pos);
if(thisFormula.charAt(0) >= '0' && thisFormula.charAt(0) <= '9')
thisFormula = multipleExtractor(thisFormula);
if(thisFormula.contains("("))
thisFormula = breakExtractor(thisFormula);
listMF.add(thisFormula);
thisFormula = newFormula.substring(pos+1,newFormula.length());
if(!thisFormula.contains(".")){
if(thisFormula.charAt(0) >= '0' && thisFormula.charAt(0) <= '9')
thisFormula = multipleExtractor(thisFormula);
if(thisFormula.contains("("))
thisFormula = breakExtractor(thisFormula);
listMF.add(thisFormula);
}
newFormula = thisFormula;
}
if(newFormula.contains("("))
newFormula = breakExtractor(newFormula);
String recentElementSymbol = new String();
String recentElementCountString = new String("0");
List<String> eleSymb = new ArrayList<String>();
List<Integer> eleCount = new ArrayList<Integer>();
for(int i = 0 ; i < listMF.size(); i++){
String thisFormula = listMF.get(i);
for (int f = 0; f < thisFormula.length(); f++) {
thisChar = thisFormula.charAt(f);
if (f < thisFormula.length()) {
if (thisChar >= 'A' && thisChar <= 'Z') {
recentElementSymbol = String.valueOf(thisChar);
recentElementCountString = "0";
}
if (thisChar >= 'a' && thisChar <= 'z') {
recentElementSymbol += thisChar;
}
if (thisChar >= '0' && thisChar <= '9') {
recentElementCountString += thisChar;
}
}
if (f == thisFormula.length() - 1 || (thisFormula.charAt(f + 1) >= 'A' && thisFormula.charAt(f + 1) <= 'Z')) {
int posit = eleSymb.indexOf(recentElementSymbol);
int count = Integer.valueOf(recentElementCountString);
if(posit == -1){
eleSymb.add(recentElementSymbol);
eleCount.add(count);
}else{
int countP = Integer.valueOf(recentElementCountString);
if(countP == 0)
countP = 1;
int countA = eleCount.get(posit);
if(countA == 0)
countA = 1;
int value = countP+countA;
eleCount.remove(posit);
eleCount.add(posit,value);
}
}
}
}
String newF = new String();
for(int i = 0 ; i < eleCount.size(); i++){
String element = eleSymb.get(i);
int num = eleCount.get(i);
if(num == 0)
newF += element;
else
newF += element+num;
}
return newF;
}
/**
* The parenthesis convention is used to show a quantity by which a formula is multiplied.
* For example: (C12H20O11)2 really means that a C24H40O22 unit.
*
* @param formula Formula to correct
* @return Formula with the correction
*/
private static String breakExtractor(String formula) {
boolean finalBreak = false;
String recentformula = new String();
String multiple = new String("0");
for (int f = 0; f < formula.length(); f++) {
char thisChar = formula.charAt(f);
if (thisChar == '('){
// start
}else if(thisChar == ')'){
// final
finalBreak = true;
}else if(!finalBreak)
recentformula += thisChar;
else
multiple += thisChar;
}
String finalformula = muliplier(recentformula, Integer.valueOf(multiple));
return finalformula;
}
/**
* The starting with numeric value is used to show a quantity by which a formula is multiplied.
* For example: 2H2O really means that a H4O2 unit.
*
* @param formula Formula to correct
* @return Formula with the correction
*/
private static String multipleExtractor(String formula) {
String recentCompoundCount = new String("0");
String recentCompound = new String();
boolean found = false;
for (int f = 0; f < formula.length(); f++) {
char thisChar = formula.charAt(f);
if (thisChar >= '0' && thisChar <= '9'){
if(!found){
recentCompoundCount += thisChar;
}else
recentCompound += thisChar;
}else{
found = true;
recentCompound += thisChar;
}
}
return muliplier(recentCompound, Integer.valueOf(recentCompoundCount));
}
/**
* This method multiply all the element over a value.
*
* @param formula Formula to correct
* @param factor Factor to multiply
* @return Formula with the correction
*/
private static String muliplier(String formula, int factor) {
String finalformula = new String();
String recentElementSymbol = new String();
String recentElementCountString = new String("0");
for (int f = 0; f < formula.length(); f++) {
char thisChar = formula.charAt(f);
if (f < formula.length()) {
if (thisChar >= 'A' && thisChar <= 'Z') {
recentElementSymbol = String.valueOf(thisChar);
recentElementCountString = "0";
}
if (thisChar >= 'a' && thisChar <= 'z') {
recentElementSymbol += thisChar;
}
if (thisChar >= '0' && thisChar <= '9') {
recentElementCountString += thisChar;
}
}
if (f == formula.length() - 1 || (formula.charAt(f + 1) >= 'A' && formula.charAt(f + 1) <= 'Z')) {
Integer recentElementCount = Integer.valueOf(recentElementCountString);
if(recentElementCount == 0)
finalformula += recentElementSymbol+factor;
else
finalformula += recentElementSymbol+recentElementCount*factor;
}
}
return finalformula;
}
}