/* $RCSfile$ * $Author$ * $Date$ * $Revision$ * * Copyright (C) 2003 University of Manchester * Copyright (C) 2003-2007 The Chemistry Development Kit (CDK) Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * (or see http://www.gnu.org/copyleft/lesser.html) */ package org.openscience.cdk.iupac.parser; import java.util.Iterator; import java.util.Vector; import org.openscience.cdk.Atom; import org.openscience.cdk.Bond; import org.openscience.cdk.Molecule; import org.openscience.cdk.Ring; import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector; import org.openscience.cdk.atomtype.CDKAtomTypeMatcher; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomType; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IBond.Order; import org.openscience.cdk.templates.MoleculeFactory; import org.openscience.cdk.tools.CDKHydrogenAdder; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.cdk.tools.manipulator.AtomTypeManipulator; /** * Takes in parsed Tokens from NomParser and contains rules * to convert those tokens to a Molecule. * * @see Token * @author David Robinson * @cdk.githash * @author Bhupinder Sandhu * @author Stephen Tomkinson * * @cdk.require ant1.6 */ public class MoleculeBuilder { /** The molecule which is worked upon throughout the class and returned at the end */ private Molecule currentMolecule = new Molecule(); private IAtom endOfChain; /** * Builds the main chain which may act as a foundation for futher working groups. * * @param mainChain The parsed prefix which depicts the chain's length. * @param isMainCyclic A flag to show if the molecule is a ring. 0 means not a ring, 1 means is a ring. * @return A Molecule containing the requested chain. */ private Molecule buildChain(int length, boolean isMainCyclic) { Molecule currentChain; if (length > 0) { //If is cyclic if (isMainCyclic) { //Rely on CDK's ring class constructor to generate our cyclic molecules. currentChain = new Molecule(); currentChain.add(new Ring(length, "C")); } //Else must not be cyclic else { currentChain = MoleculeFactory.makeAlkane(length); } } else { currentChain = new Molecule(); } return currentChain; } /** * Initiates the building of the molecules functional group(s). * Adds the functional group to atom 0 if only one group exists or runs * down the list of positions adding groups as required. * * @param attachedGroups A vector of AttachedGroup's representing functional groups. * @see #addFunGroup */ private void buildFunGroups(Vector attachedGroups) { Iterator groupsIterator = attachedGroups.iterator(); while (groupsIterator.hasNext()) { AttachedGroup attachedGroup = (AttachedGroup) groupsIterator.next(); Iterator locationsIterator = attachedGroup.getLocations().iterator(); while (locationsIterator.hasNext()) { Token locationToken = (Token) locationsIterator.next(); addFunGroup(attachedGroup.getName(), Integer.parseInt(locationToken.image) - 1); } } } /** * Adds a functional group to a given atom in the current molecule. * * @param funGroupToken The token which denotes this specific functional group. * @param addPos The atom to add the group to. */ private void addFunGroup(String funGroupToken, int addPos) { //BOND MODIFICATION //Alkanes - Single bond if (funGroupToken == "an") { //Do nothing since all bonds are single by default. } //Alkenes - Double bond else if (funGroupToken == "en") { //If functional group hasn't had a location specified: if (addPos < 0) { //Set the first bond to an order of 2 (i.e. a double bond) currentMolecule.getBond(0).setOrder(IBond.Order.DOUBLE); } else { //Set the addPos'th bond to an order of 2 (i.e. a double bond) currentMolecule.getBond(addPos).setOrder(IBond.Order.DOUBLE); } } //Alkynes - Tripple bond else if (funGroupToken == "yn") { //If functional group hasn't had a location specified: if (addPos < 0) { //Set the first bond to an order of 3 (i.e. a tripple bond) currentMolecule.getBond(0).setOrder(IBond.Order.TRIPLE); } else { //Set the addPos'th bond to an order of 3 (i.e. a tripple bond) currentMolecule.getBond(addPos).setOrder(IBond.Order.TRIPLE); } } //FUNCTIONAL GROUP SUFFIXES //Ending "e" else if (funGroupToken == "e") { //Do nothing, since the "e" is found at the end of chain names //with a bond modifer but no functional groups. } //Alcohols else if (funGroupToken == "ol" || funGroupToken == "hydroxy") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("O", endOfChain, IBond.Order.SINGLE, 1); } else { addAtom("O", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 1); } } //Aldehydes else if (funGroupToken == "al") { addAtom("O", endOfChain, IBond.Order.DOUBLE, 0); } //Carboxylic acid else if (funGroupToken == "oic acid") { addAtom("O", endOfChain, IBond.Order.DOUBLE, 0); addAtom("O", endOfChain, IBond.Order.SINGLE, 1); } //Carboxylic Acid Chloride else if (funGroupToken == "oyl chloride") { addAtom("O", endOfChain, IBond.Order.DOUBLE, 0); addAtom("Cl", endOfChain, IBond.Order.SINGLE, 0); } //PREFIXES //Halogens //Chlorine else if (funGroupToken == "chloro") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("Cl", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 0); } else { addAtom("Cl", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 0); } } //Fluorine else if (funGroupToken == "fluoro") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("F", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 0); } else { addAtom("F", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 0); } } //Bromine else if (funGroupToken == "bromo") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("Br", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 0); } else { addAtom("Br", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 0); } } //Iodine else if (funGroupToken == "iodo") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("I", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 0); } else { addAtom("I", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 0); } } //Nitro else if (funGroupToken == "nitro") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("N", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 0); } else { addAtom("N", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 0); } //Stuff which applied no matter where the N atom is: IAtom nitrogenAtom = currentMolecule.getLastAtom(); nitrogenAtom.setFormalCharge(+1); addAtom("O", nitrogenAtom, IBond.Order.SINGLE, 0); currentMolecule.getLastAtom().setFormalCharge(-1); addAtom("O", nitrogenAtom, IBond.Order.DOUBLE, 0); } //Oxo else if (funGroupToken == "oxo") { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("O", currentMolecule.getFirstAtom(), IBond.Order.DOUBLE, 0); } else { addAtom("O", currentMolecule.getAtom(addPos), IBond.Order.DOUBLE, 0); } } //Nitrile else if (funGroupToken == "nitrile" ) { addAtom("N", currentMolecule.getFirstAtom(), IBond.Order.TRIPLE, 0); } //Benzene else if (funGroupToken == "phenyl" ) { Molecule benzene = MoleculeFactory.makeBenzene(); //Detect Aromacity in the benzene ring. try { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(benzene); CDKHueckelAromaticityDetector.detectAromaticity(benzene); } catch (Exception exc) { // logger.debug("No atom detected"); } currentMolecule.add(benzene); Bond joiningBond; //If functional group hasn't had a location specified: if (addPos < 0) { joiningBond = new Bond(currentMolecule.getFirstAtom(), benzene.getFirstAtom()); } else { joiningBond = new Bond(currentMolecule.getAtom(addPos), benzene.getFirstAtom()); } currentMolecule.addBond(joiningBond); } else if (funGroupToken == "amino" ) { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("N", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 2); } else { addAtom("N", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 2); } } //ORGANO METALLICS ADDED AS PREFIXES else if (funGroupToken == "alumino" ) { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("Al", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 2); } else { addAtom("Al", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 2); } } else if (funGroupToken == "litho" ) { //If functional group hasn't had a location specified: if (addPos < 0) { addAtom("Li", currentMolecule.getFirstAtom(), IBond.Order.SINGLE, 2); } else { addAtom("Li", currentMolecule.getAtom(addPos), IBond.Order.SINGLE, 2); } } //PRIORITY SUBSTITUENTS //FUNCTIONAL GROUPS WHICH MAY HAVE THEIR OWN SUBSTITUENTS //Esters ("...oate") else if (funGroupToken == "oate") { addAtom("O", endOfChain, IBond.Order.DOUBLE, 0); addAtom("O", endOfChain, IBond.Order.SINGLE, 0); //Set the end of the chain to be built on for unspecified substituents. endOfChain = currentMolecule.getLastAtom(); } //Amines else if (funGroupToken == "amine") { addAtom("N", endOfChain, IBond.Order.SINGLE, 1); //Set the end of the chain to be built on for unspecified substituents. endOfChain = currentMolecule.getLastAtom(); } //Amides else if (funGroupToken =="amide") { addAtom("O", endOfChain, IBond.Order.DOUBLE, 0); addAtom("N", endOfChain, IBond.Order.SINGLE, 1); //Set the end of the chain to be built on for unspecified substituents. endOfChain = currentMolecule.getLastAtom(); } //Ketones else if (funGroupToken == "one") { addAtom("O", endOfChain, IBond.Order.DOUBLE, 2); //End of chain doesn't change in this case } //Organometals else if (getMetalAtomicSymbol (funGroupToken) != null) { currentMolecule.addAtom (new Atom (getMetalAtomicSymbol (funGroupToken))); endOfChain = currentMolecule.getLastAtom(); } else { // logger.debug("Encountered unknown group: " + funGroupToken + " at " + addPos + // "\nThe parser thinks this is valid but the molecule builder has no logic for it"); } } /** * Translates a metal's name into it's atomic symbol. * * @param metalName The name of the metal, e.g. lead * @return The given metal's atomic symbol e.g. Pb or null if none exist. */ String getMetalAtomicSymbol (String metalName) { if (metalName == "aluminium") { return "Al"; } else if (metalName == "magnesium" ) { return "Mg"; } else if (metalName == "gallium") { return "Ga"; } else if (metalName == "indium") { return "In"; } else if (metalName == "thallium") { return "Tl"; } else if (metalName == "germanium") { return "Ge"; } else if (metalName == "tin") { return "Sn"; } else if (metalName == "lead") { return "Pb"; } else if (metalName == "arsenic") { return "As"; } else if (metalName == "antimony") { return "Sb"; } else if (metalName == "bismuth") { return "Bi"; } return null; } /** * Adds an atom to the current molecule. * * @param newAtomType The atomic symbol for the atom. * @param otherConnectingAtom An atom already in the molecule which * the new one should connect to. * @param bondOrder The order of the bond to use to join the two atoms. * @param hydrogenCount The number of hydrogen atoms connected to this atom. */ private void addAtom(String newAtomType, IAtom otherConnectingAtom, Order bondOrder, int hydrogenCount) { //Create the new atom and bond. Atom newAtom = new Atom(newAtomType); newAtom.setHydrogenCount(hydrogenCount); Bond newBond = new Bond(newAtom, otherConnectingAtom, bondOrder); //Add the new atom and bond to the molecule. currentMolecule.addAtom(newAtom); currentMolecule.addBond(newBond); } /** * Adds other chains to the main chain connected at the specified atom. * * @param attachedSubstituents A vector of AttachedGroup's representing substituents. */ private void addHeads(Vector attachedSubstituents) { Iterator substituentsIterator = attachedSubstituents.iterator(); while (substituentsIterator.hasNext()) { AttachedGroup attachedSubstituent = (AttachedGroup) substituentsIterator.next(); Iterator locationsIterator = attachedSubstituent.getLocations().iterator(); while (locationsIterator.hasNext()) { Token locationToken = (Token) locationsIterator.next(); int joinLocation = Integer.parseInt(locationToken.image) - 1; IAtom connectingAtom; //If join location wasn't specified we must be dealing with the "hack" which makes //mainchains a substituent if a real substituent has already been parsed and interpreted as a main chain if (joinLocation < 0) { connectingAtom = endOfChain; } else { connectingAtom = currentMolecule.getAtom(joinLocation); } Molecule subChain = buildChain(attachedSubstituent.getLength(), false); Bond linkingBond = new Bond(subChain.getFirstAtom(), connectingAtom); currentMolecule.addBond(linkingBond); currentMolecule.add(subChain); } } } /** * Start of the process of building a molecule from the parsed data. Passes the parsed * tokens to other functions which build up the Molecule. * * @param mainChain The string representation of the length of the main chain. * @param attachedSubstituents A vector of AttachedGroup's representing substituents. * @param attachedGroups A vector of AttachedGroup's representing functional groups. * @param isMainCyclic An indiacation of if the main chain is cyclic. * @return The molecule as built from the parsed tokens. */ protected Molecule buildMolecule(int mainChain, Vector attachedSubstituents , Vector attachedGroups, boolean isMainCyclic, String name) throws ParseException, CDKException { //Set up the molecle's name currentMolecule.setID(name); //Build the main chain currentMolecule.add(buildChain(mainChain,isMainCyclic)); //Set the last atom here if a main chain has been built, //if not rely on the functional group setting one of it's atoms as last if (mainChain != 0) endOfChain = currentMolecule.getLastAtom(); //Add functional groups buildFunGroups(attachedGroups); //Add on further sub chains addHeads(attachedSubstituents); //Add the hydrogens to create a balanced molecule CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(currentMolecule.getBuilder()); Iterator<IAtom> atoms = currentMolecule.atoms().iterator(); while (atoms.hasNext()) { IAtom atom = atoms.next(); IAtomType type = matcher.findMatchingAtomType(currentMolecule, atom); AtomTypeManipulator.configure(atom, type); } CDKHydrogenAdder hAdder = CDKHydrogenAdder.getInstance(currentMolecule.getBuilder()); hAdder.addImplicitHydrogens(currentMolecule); return currentMolecule; } }