/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.chem;
import org.fhcrc.cpl.toolbox.filehandler.TabLoader;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.openscience.cdk.interfaces.*;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
import org.openscience.cdk.tools.LonePairElectronChecker;
import org.openscience.cdk.reaction.IReactionProcess;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.apache.log4j.Logger;
import java.util.*;
import java.io.File;
import java.io.IOException;
/**
* Represents a named chemical compound. Contains a chemical formula and a name. Lots of passing through
* of methods into the chemical formula
* User: dhmay
* Date: Apr 6, 2010
* Time: 4:06:07 PM
* To change this template use File | Settings | File Templates.
*/
public class ChemicalCompound
{
protected static Logger _log = Logger.getLogger(ChemicalCompound.class);
protected String name;
protected ChemicalFormula formula;
protected IMolecule cdkMolecule;
//this will need to get more sophisticated once we figure out what's useful
protected String compoundClass;
/**
* Creates a ChemicalCompound, does not populate peak masses and intensities
* @param name
* @param formulaString
* @throws IllegalArgumentException
*/
public ChemicalCompound(String name, String formulaString) throws IllegalArgumentException
{
this(name, formulaString, 0);
}
/**
* Creates a ChemicalCompound, populates peak masses and intensities as specified
* @param name
* @param formulaString
* @param numPeaksToPopulate
* @throws IllegalArgumentException
*/
public ChemicalCompound(String name, String formulaString, int numPeaksToPopulate)
throws IllegalArgumentException
{
this(name, new ChemicalFormula(formulaString, numPeaksToPopulate));
}
public static ChemicalCompound createFromSmiles(String name, String smilesString)
throws CDKException
{
SmilesParser smilesParser = new SmilesParser(DefaultChemObjectBuilder.getInstance());
IMolecule cdkMolecule = smilesParser.parseSmiles(smilesString);
return new ChemicalCompound(name, cdkMolecule);
}
/**
*
* @param name
* @param formula
*/
public ChemicalCompound(String name, ChemicalFormula formula)
{
this.name = name;
this.formula = formula;
}
/**
* SIDE EFFECT: converts implicit to explicit hydrogens in molecule, then converts them /back/
* @param name
* @param molecule
*/
public ChemicalCompound(String name, IMolecule molecule)
throws CDKException
{
this.name = name;
this.cdkMolecule = molecule;
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(cdkMolecule);
//When SmilesParser gives us a molecule, the hydrogens are implicit. If you,
//e.g., create a MolecularFormula object from the Molecule and ask it for its mass
//and its formula string, they will be wrong, because they have no H's.
//So I add the hydrogens, create the formula with the hydrogens, and then remove them
AtomContainerManipulator.convertImplicitToExplicitHydrogens(cdkMolecule);
formula = ChemCalcs.CDKMolForm2ChemForm(MolecularFormulaManipulator.getMolecularFormula(cdkMolecule));
//This doesn't actually remove hydrogens in the argument, just returns a new IMolecule
cdkMolecule = (IMolecule) AtomContainerManipulator.removeHydrogens(cdkMolecule);
try
{
new LonePairElectronChecker().saturate(cdkMolecule);
}
catch (Exception e) //failed to saturate. Not sure why this happens sometimes on valid SMILES strings
{}
}
/**
* Create a new ChemicalCompound identical to this one with the additional elements added and the specified name.
* Do not populate peaks
* @param additionFormula
* @return
*/
public ChemicalCompound createCompoundWithAddition(ChemicalFormula additionFormula,
String newCompoundName)
{
return new ChemicalCompound(newCompoundName, formula.createFormulaWithAddition(additionFormula));
}
/**
* Create a new ChemicalCompound identical to this one with the specified elements removed and the specified name.
* Do not populate peaks Throw IllegalArgumentException if the formula doesn't have the specified elements
* @param subtractionFormula
* @return
*/
public ChemicalCompound createCompoundWithSubtraction(ChemicalFormula subtractionFormula,
String newCompoundName)
throws IllegalArgumentException
{
return new ChemicalCompound(newCompoundName, formula.createFormulaWithSubtraction(subtractionFormula));
}
//comparators
public static class ComparatorMassAsc implements Comparator<ChemicalCompound>
{
public int compare(ChemicalCompound o1, ChemicalCompound o2)
{
if (o1.getCommonestIsotopeMass() == o2.getCommonestIsotopeMass())
return 0;
return o1.getCommonestIsotopeMass() == o2.getCommonestIsotopeMass() ? 0 :
o1.getCommonestIsotopeMass() < o2.getCommonestIsotopeMass() ? -1 : 1;
}
}
public static class ComparatorNameAsc implements Comparator<ChemicalCompound>
{
public int compare(ChemicalCompound o1, ChemicalCompound o2)
{
return o1.name.compareTo(o2.getName());
}
}
public String toString()
{
StringBuffer buf = new StringBuffer("Compound: " + name + ", formula=" + formula);
return buf.toString();
}
//getters and setters
public String getCompoundClass()
{
return compoundClass;
}
public void setCompoundClass(String compoundClass)
{
this.compoundClass = compoundClass;
}
public double getCommonestIsotopeMass()
{
return formula.getCommonestIsotopeMass();
}
public String getName()
{
return name;
}
public void setName(String name)
{
this.name = name;
}
public ChemicalFormula getFormula()
{
return formula;
}
public void setFormula(ChemicalFormula formula)
{
this.formula = formula;
}
public double[] getPeakFrequencies()
{
return formula.getPeakFrequencies();
}
public double[] getPeakMasses()
{
return formula.getPeakMasses();
}
public double[] getPeakFrequencies(int numPeaksToCalculate)
{
return formula.getPeakFrequencies(numPeaksToCalculate);
}
public double[] getPeakMasses(int numPeaksToCalculate)
{
return formula.getPeakMasses(numPeaksToCalculate);
}
public Map<String, Integer> getElementCountMap()
{
return formula.getElementCountMap();
}
/**
* Load chemical compounds from a tsv file containing columns called name and formula (at least)
* @param file
* @param numPeaksToPopulate
* @return
* @throws IOException
*/
public static final List<ChemicalCompound> loadCompoundsFromFile(File file, int numPeaksToPopulate)
throws IOException
{
return loadCompoundsFromFile(file, numPeaksToPopulate, "name","formula","smiles");
}
/**
* Load chemical compounds from a tsv file containing columns for name and formula (at least)
*
* @param file
* @param numPeaksToPopulate
* @param nameColName
* @param formulaColName
* @return
* @throws IOException
*/
public static List<ChemicalCompound> loadCompoundsFromFile(File file, int numPeaksToPopulate,
String nameColName, String formulaColName,
String smilesColumnName)
throws IOException
{
TabLoader loader = null;
Map[] rowsAsMaps = null;
try {
loader = new TabLoader(file);
rowsAsMaps = (Map[])loader.load();
}
catch (Exception e) { throw new IOException("Error loading tab-delimited compound file: " + e.getMessage()); }
boolean foundNameCol = false;
boolean foundFormulaCol = false;
boolean foundSmilesCol = false;
for (TabLoader.ColumnDescriptor col : loader.getColumns()) {
if (col.name.equals(nameColName))
foundNameCol = true;
else if (col.name.equals(formulaColName))
foundFormulaCol = true;
else if (col.name.equals(smilesColumnName))
foundSmilesCol = true;
}
if (!foundNameCol || !foundFormulaCol || !foundSmilesCol) {
throw new IOException("Compounds file is missing at least one column of " + nameColName +
", " + formulaColName + ", " + smilesColumnName);
}
List<ChemicalCompound> result = new ArrayList<ChemicalCompound>();
_log.debug("Loading " + rowsAsMaps.length + " compounds...");
for (int i=0; i<rowsAsMaps.length; i++)
{
Map rowMap = rowsAsMaps[i];
if (i % 500 == 0)
_log.debug("\tLoaded " + i + " of " + rowsAsMaps.length + " compounds");
try
{
String name = (String) rowMap.get(nameColName);
String formulaString = rowMap.get(formulaColName).toString();
ChemicalCompound compound = null;
//Load IMolecule using the SMILES formula string
if (rowMap.containsKey(smilesColumnName) && rowMap.get(smilesColumnName) != null)
{
String smilesString = rowMap.get(smilesColumnName).toString();
try
{
compound = createFromSmiles(name, smilesString);
}
catch (Exception e)
{
ApplicationContext.infoMessage("WARNING: Failed to load SMILES formula " + smilesString + " for compound " + name + ": " + e.getMessage());
compound = new ChemicalCompound(name,
formulaString, numPeaksToPopulate);
// throw new IOException(e);
}
}
else
{
compound = new ChemicalCompound(name,
formulaString, numPeaksToPopulate);
}
//experimental
if (rowMap.containsKey("class") && rowMap.get("class") != null)
{
// System.err.println("Class: " + rowMap.get("class").toString());
compound.setCompoundClass(rowMap.get("class").toString());
}
result.add(compound);
}
catch (IllegalArgumentException e)
{
ApplicationContext.setMessage("Skipping bad compound: " + e.getMessage());
}
}
_log.debug("Loaded " + rowsAsMaps.length + " compounds.");
return result;
}
/**
* This is kind of a weird one. Apply multiple reactions. After each reaction, apply the following
* reaction to /all/ the products of the first reaction
* @param reactions
* @param otherReactants
* @return
* @throws CDKException
*/
public List<ChemicalCompound> applyReactions(List<IReactionProcess> reactions, List<IMolecule> otherReactants)
throws CDKException
{
List<ChemicalCompound> reactants = new ArrayList<ChemicalCompound>();
reactants.add(this);
for (IReactionProcess reaction : reactions)
{
List<ChemicalCompound> newReactants = new ArrayList<ChemicalCompound>();
for (ChemicalCompound reactant : reactants)
{
newReactants.addAll(reactant.applyReaction(reaction, otherReactants));
}
reactants = newReactants;
}
return reactants;
}
/**
* Apply a chemical reaction to the cdkMolecule of this compound, possibly with other reactants involved.
*
* Returns ChemicalCompounds for all possible products
*
* todo: do I need to group the products by IReaction?
* @param reaction
* @param otherReactants
* @return
* @throws CDKException
*/
public List<ChemicalCompound> applyReaction(IReactionProcess reaction, List<IMolecule> otherReactants)
throws CDKException
{
IMoleculeSet setOfReactants = DefaultChemObjectBuilder.getInstance().newMoleculeSet();
setOfReactants.addMolecule(cdkMolecule);
if (otherReactants != null)
{
for (IMolecule otherReactant : otherReactants)
setOfReactants.addMolecule(otherReactant);
}
IReactionSet setOfReactions = reaction.initiate(setOfReactants, null);
List<ChemicalCompound> result = new ArrayList<ChemicalCompound>();
for (IReaction outputReaction : setOfReactions.reactions())
{
IMoleculeSet products = outputReaction.getProducts();
for (int i=0; i<products.getMoleculeCount(); i++)
{
result.add(new ChemicalCompound(name, products.getMolecule(i)));
}
}
return result;
}
public IMolecule getCdkMolecule()
{
return cdkMolecule;
}
public void setCdkMolecule(IMolecule cdkMolecule)
{
this.cdkMolecule = cdkMolecule;
}
}