/* $RCSfile$
* $Author$
* $Date$
* $Revision$
*
* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.qsar.descriptors.protein;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.openscience.cdk.Monomer;
import org.openscience.cdk.Strand;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBioPolymer;
import org.openscience.cdk.qsar.DescriptorSpecification;
import org.openscience.cdk.qsar.DescriptorValue;
import org.openscience.cdk.qsar.IMolecularDescriptor;
import org.openscience.cdk.qsar.result.DoubleArrayResult;
import org.openscience.cdk.qsar.result.IDescriptorResult;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
/**
* An implementation of the TAE descriptors for amino acids.
* <p/>
* The TAE descriptors ({@cdk.cite BREN1995} {@cdk.cite BREN1997} {@cdk.cite WHITE2003})
* are derived from pre-calculated quantum mechanical parameters. This class
* uses the paramaters for amino acids and thus evaluates a set of 147 descriptors for peptide
* sequences.
* <p/>
* The class expects that it will be supplied an object which implements the {@link IBioPolymer}. Thus ordinary
* AtomContainer objects will result in an exception.
* <p/>
* The descriptors are returned in the following order (see
* <a href="http://www.chem.rpi.edu/chemweb/recondoc/TAE.doc">here</a>
* for a detailed description of the individual descriptors):
* <pre>
* Energy Population VOLTAE SurfArea
* SIDel.Rho.N Del.Rho.NMin Del.Rho.NMax Del.Rho.NIA Del.Rho.NA1
* Del.Rho.NA2 Del.Rho.NA3 Del.Rho.NA4 Del.Rho.NA5 Del.Rho.NA6
* Del.Rho.NA7 Del.Rho.NA8 Del.Rho.NA9 Del.Rho.NA10 SIDel.K.N
* Del.K.Min Del.K.Max Del.K.IA Del.K.NA1 Del.K.NA2
* Del.K.NA3 Del.K.NA4 Del.K.NA5 Del.K.NA6 Del.K.NA7
* Del.K.NA8 Del.K.NA9 Del.K.NA10 SIK SIKMin
* SIKMax SIKIA SIKA1 SIKA2 SIKA3
* SIKA4 SIKA5 SIKA6 SIKA7 SIKA8
* SIKA9 SIKA10 SIDel.G.N Del.G.NMin Del.G.NMax
* Del.G.NIA Del.G.NA1 Del.G.NA2 Del.G.NA3 Del.G.NA4
* Del.G.NA5 Del.G.NA6 Del.G.NA7 Del.G.NA8 Del.G.NA9
* Del.G.NA10 SIG SIGMin SIGMax SIGIA
* SIGA1 SIGA2 SIGA3 SIGA4 SIGA5
* SIGA6 SIGA7 SIGA8 SIGA9 SIGA10
* SIEP SIEPMin SIEPMax SIEPIA SIEPA1
* SIEPA2 SIEPA3 SIEPA4 SIEPA5 SIEPA6
* SIEPA7 SIEPA8 SIEPA9 SIEPA10 EP1
* EP2 EP3 EP4 EP5 EP6
* EP7 EP8 EP9 EP10 PIPMin
* PIPMax PIPAvg PIP1 PIP2 PIP3
* PIP4 PIP5 PIP6 PIP7 PIP8
* PIP9 PIP10 PIP11 PIP12 PIP13
* PIP14 PIP15 PIP16 PIP17 PIP18
* PIP19 PIP20 Fuk FukMin FukMax
* Fuk1 Fuk2 Fuk3 Fuk4 Fuk5
* Fuk6 Fuk7 Fuk8 Fuk9 Fuk10
* Lapl LaplMin LaplMax Lapl1 Lapl2
* Lapl3 Lapl4 Lapl5 Lapl6 Lapl7
* Lapl8 Lapl9 Lapl10
* </pre>
* <p/>
* <p>This descriptor uses these parameters:
* <table border="1">
* <tr>
* <td>Name</td>
* <td>Default</td>
* <td>Description</td>
* </tr>
* <tr>
* <td></td>
* <td></td>
* <td>no parameters</td>
* </tr>
* </table>
*
* @author Rajarshi Guha
* @cdk.created 2006-08-23
* @cdk.module qsarprotein
* @cdk.githash
* @cdk.set qsar-descriptors
* @cdk.dictref qsar-descriptors:taeAminoAcid
* @see IBioPolymer
*/
public class TaeAminoAcidDescriptor implements IMolecularDescriptor {
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(TaeAminoAcidDescriptor.class);
private Map<String, Double[]> TAEParams = new HashMap<String, Double[]>();
private int ndesc = 147;
private Map<String,String> nametrans = new HashMap<String,String>();
private List getMonomers(IBioPolymer iBioPolymer) {
List monomList = new ArrayList();
Map strands = iBioPolymer.getStrands();
Set strandKeys = strands.keySet();
for (Iterator iterator = strandKeys.iterator(); iterator.hasNext();) {
Object key = iterator.next();
Strand aStrand = (Strand) strands.get(key);
Map tmp = aStrand.getMonomers();
Set keys = tmp.keySet();
for (Iterator iterator1 = keys.iterator(); iterator1.hasNext();) {
Object o1 = iterator1.next();
monomList.add(tmp.get(o1));
}
}
return monomList;
}
private void loadTAEParams() {
String filename = "org/openscience/cdk/qsar/descriptors/data/taepeptides.txt";
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
if (ins == null) {
logger.debug("Could not load the TAE peptide parameter data file");
TAEParams = null;
return;
}
try {
BufferedReader breader = new BufferedReader(new InputStreamReader(ins));
breader.readLine(); // throw away the header
for (int i = 0; i < 60; i++) {
String line = breader.readLine();
String[] components = line.split(",");
if (components.length != (ndesc + 1))
throw new CDKException("TAE peptide data table seems to be corrupt");
String key = components[0].toLowerCase().trim();
Double[] data = new Double[ndesc];
for (int j = 1; j < components.length; j++) data[j - 1] = new Double(components[j]);
TAEParams.put(key, data);
}
} catch (IOException ioe) {
ioe.printStackTrace();
TAEParams = null;
return;
} catch (CDKException e) {
e.printStackTrace();
TAEParams = null;
return;
}
logger.debug("Loaded " + TAEParams.size() + " TAE parameters for amino acids");
}
public TaeAminoAcidDescriptor() {
nametrans.put("a", "ala");
nametrans.put("c", "cys");
nametrans.put("d", "asp");
nametrans.put("e", "glu");
nametrans.put("f", "phe");
nametrans.put("g", "gly");
nametrans.put("h", "his");
nametrans.put("i", "ile");
nametrans.put("k", "lys");
nametrans.put("l", "leu");
nametrans.put("m", "met");
nametrans.put("n", "asn");
nametrans.put("p", "pro");
nametrans.put("q", "gln");
nametrans.put("r", "arg");
nametrans.put("s", "ser");
nametrans.put("t", "thr");
nametrans.put("v", "val");
nametrans.put("w", "trp");
nametrans.put("y", "tyr");
loadTAEParams();
}
public DescriptorSpecification getSpecification() {
return new DescriptorSpecification(
"http://www.blueobelisk.org/ontologies/chemoinformatics-algorithms/#taeAminoAcid",
this.getClass().getName(),
"$Id$",
"The Chemistry Development Kit");
}
/**
* Sets the parameters attribute of the TaeAminoAcidDescriptor object.
*
* @param params The new parameters value
* @throws org.openscience.cdk.exception.CDKException
* Description of the Exception
*/
public void setParameters(Object[] params) throws CDKException {
// no parameters for this descriptor
}
/**
* Gets the parameters attribute of the TaeAminoAcidDescriptor object.
*
* @return The parameters value
*/
public Object[] getParameters() {
// no parameters to return
return (null);
}
@TestMethod(value="testNamesConsistency")
public String[] getDescriptorNames() {
String[] names = new String[ndesc];
for (int i = 0; i < names.length; i++) names[i] = "TAE"+i;
return names;
}
/**
* Gets the parameterNames attribute of the TaeAminOAcidDescriptor object.
*
* @return The parameterNames value
*/
public String[] getParameterNames() {
// no param names to return
return (null);
}
/**
* Gets the parameterType attribute of the TaeAminoAcidDescriptor object.
*
* @param name Description of the Parameter
* @return The parameterType value
*/
public Object getParameterType(String name) {
return (null);
}
private DescriptorValue getDummyDescriptorValue(Exception e) {
int ndesc = getDescriptorNames().length;
DoubleArrayResult results = new DoubleArrayResult(ndesc);
for (int i = 0; i < ndesc; i++) results.add(Double.NaN);
return new DescriptorValue(getSpecification(), getParameterNames(),
getParameters(), results, getDescriptorNames(), e);
}
/**
* Calculates the 147 TAE descriptors for amino acids.
*
* @param container Parameter is the atom container which should implement {@link IBioPolymer}.
* @return A DoubleArrayResult value representing the 147 TAE descriptors
*/
public DescriptorValue calculate(IAtomContainer container) {
if (TAEParams == null) return getDummyDescriptorValue(new CDKException("TAE parameters were not initialized"));
if (!(container instanceof IBioPolymer)) return getDummyDescriptorValue(new CDKException("The molecule should be of type IBioPolymer"));
IBioPolymer peptide = (IBioPolymer) container;
// I assume that we get single letter names
//Collection aas = peptide.getMonomerNames();
double[] desc = new double[ndesc];
for (int i = 0; i < ndesc; i++) desc[i] = 0.0;
List monomers = getMonomers(peptide);
for (Iterator iterator = monomers.iterator(); iterator.hasNext();) {
Monomer monomer = (Monomer) iterator.next();
String o = monomer.getMonomerName();
if (o.length() == 0) continue;
String olc = String.valueOf(o.toLowerCase().charAt(0));
String tlc = (String) nametrans.get(olc);
logger.debug("Converted " + olc + " to " + tlc);
// get the params for this AA
Double[] params = (Double[]) TAEParams.get(tlc);
for (int i = 0; i < ndesc; i++) desc[i] += params[i];
}
DoubleArrayResult retval = new DoubleArrayResult(ndesc);
for (int i = 0; i < ndesc; i++) retval.add(desc[i]);
return new DescriptorValue(getSpecification(), getParameterNames(), getParameters(),
retval, getDescriptorNames());
}
/**
* Returns the specific type of the DescriptorResult object.
* <p/>
* The return value from this method really indicates what type of result will
* be obtained from the {@link org.openscience.cdk.qsar.DescriptorValue} object. Note that the same result
* can be achieved by interrogating the {@link org.openscience.cdk.qsar.DescriptorValue} object; this method
* allows you to do the same thing, without actually calculating the descriptor.
*
* @return an object that implements the {@link org.openscience.cdk.qsar.result.IDescriptorResult} interface indicating
* the actual type of values returned by the descriptor in the {@link org.openscience.cdk.qsar.DescriptorValue} object
*/
public IDescriptorResult getDescriptorResultType() {
return new DoubleArrayResult();
}
}