/* $RCSfile$
* $Author$
* $Date$
* $Revision$
*
* Copyright (C) 2005-2007 Christian Hoppe <chhoppe@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
package org.openscience.cdk.atomtype;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.PseudoAtom;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.config.AtomTypeFactory;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomType;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.tools.AtomTypeTools;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
/**
* Class implements methods to assign mmff94 atom types for a specific atom in
* an molecule. The full list of mmff94 atom types is defined in the file
* <b>cdk/config/data/mmff94_atomtypes.xml</b>.
*
* @author cho
* @cdk.created 2005-18-07
* @cdk.module extra
* @cdk.githash
*/
public class MMFF94AtomTypeMatcher implements IAtomTypeMatcher {
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(MMFF94AtomTypeMatcher.class);
IBond.Order maxBondOrder = IBond.Order.SINGLE;
private AtomTypeFactory factory = null;
AtomTypeTools atomTypeTools=null;
String [] atomTypeIds={"C","Csp2","C=","Csp","CO2M","CNN+","C%","CIM+","CR4R","CR3R","CE4R",
"Car","C5A","C5B","C5","HC","HO","HN","HOCO","HN=C","HN2",
"HOCC","HOH","HOS","HN+","HO+","HO=+","HP","O","O=","OX",
"OM","O+","O=+","OH2","Oar","N","N=C","NC=C","NSP","=N=",
"NAZT","N+","N2OX","N3OX","NC#N","NO3","N=O","NC=O","NSO","N+=",
"NCN+","NGD+","NR%","NM","N5M","NPYD","NPYL","NPD+","N5A","N5B",
"NPOX","N5OX","N5+","N5","S","S=C",">SN","SO2","SX","SO2M",
"=SO","Sthi","PTET","P","-P=C","F","CL","BR","I","SI",
"CL04","FE+2","FE+3","F-","CL-","BR-","LI+","NA+","K+","ZN+2",
"CA+2","CU+1","CU+2","MG+2","DU"};
/**
* Constructor for the MMFF94AtomTypeMatcher object.
*/
public MMFF94AtomTypeMatcher() {
atomTypeTools=new AtomTypeTools();
}
private String getSphericalMatcher(IAtomType type) throws CDKException {//NOPMD
return (String)type.getProperty(CDKConstants.SPHERICAL_MATCHER);
}
private String getSphericalMatcher(String type) throws CDKException {//NOPMD
return getSphericalMatcher(factory.getAtomType(type));
}
@TestMethod("testFindMatchingAtomType_IAtomContainer")
public IAtomType[] findMatchingAtomType(IAtomContainer atomContainer) throws CDKException {
IAtomType[] types = new IAtomType[atomContainer.getAtomCount()];
int typeCounter = 0;
for (IAtom atom : atomContainer.atoms()) {
types[typeCounter] = findMatchingAtomType(atomContainer, atom);
typeCounter++;
}
return types;
}
/**
* Assign the mmff94 atom type to a given atom.
* Before this method can be called the following has to be done:
* atomContainer=(AtomContainer)atomTypeTools.assignAtomTypePropertiesToAtom(new Molecule(atomContainer));
*
* @param atomContainer AtomContainer
* @param atomInterface the target atom
* @exception CDKException Description of the Exception
* @return the matching AtomType (AtomType class)
*/
public IAtomType findMatchingAtomType(IAtomContainer atomContainer, IAtom atomInterface) throws CDKException {
if (factory == null) {
try {
factory = AtomTypeFactory.getInstance("org/openscience/cdk/config/data/mmff94_atomtypes.xml",
atomContainer.getBuilder()
);
} catch (Exception ex1) {
logger.error(ex1.getMessage());
logger.debug(ex1);
throw new CDKException("Could not instantiate the AtomType list!", ex1);
}
}
org.openscience.cdk.Atom atom = (org.openscience.cdk.Atom)atomInterface;
//logger.debug("****** Configure MMFF94 AtomType via findMatching ******");
//logger.debug(" Symbol:" + atom.getSymbol() +" HoseCode>" + atom.getSphericalMatcher() + " ");
logger.debug(" Symbol:" + atom.getSymbol() +" HoseCode>" + atom.getProperty(CDKConstants.SPHERICAL_MATCHER) + " ");
//System.out.print("IN MMFF94AtomTypeMatcher Symbol:" + atom.getSymbol() +" HoseCode>" + atom.getProperty(CDKConstants.SPHERICAL_MATCHER) + " ");
if (atom instanceof PseudoAtom) {
return factory.getAtomTypes("DU")[0];
}
Pattern p1 = null;
Pattern p2 = null;
String ID = "";
boolean atomTypeFlag = false;
Matcher mat1=null;
Matcher mat2=null;
IBond.Order tmpMaxBondOrder;
maxBondOrder = atomContainer.getMaximumBondOrder(atom);
for (int j = 0; j < atomTypeIds.length; j++){
tmpMaxBondOrder = factory.getAtomType(atomTypeIds[j]).getMaxBondOrder();
String atomSphericalMatcher = (String)factory.getAtomType(atomTypeIds[j]).getProperty(CDKConstants.SPHERICAL_MATCHER);
logger.debug(j + " ATOM TYPE "+ tmpMaxBondOrder + " " +atomSphericalMatcher);
p1 =Pattern.compile(atomSphericalMatcher);
mat1 = p1.matcher((String)atom.getProperty(CDKConstants.SPHERICAL_MATCHER));
if (mat1.matches()) {
ID = atomTypeIds[j];
Object property = atom.getProperty(CDKConstants.CHEMICAL_GROUP_CONSTANT);
int atomChemGroupConstant = (Integer) property;
Object ringSize = atom.getProperty(CDKConstants.PART_OF_RING_OF_SIZE);
int atomRingSize = -1;
if (ringSize != null) {
atomRingSize = (Integer) ringSize;
}
if (atomTypeIds[j].equals("C")) {
if (atomChemGroupConstant != -1) {//in Ring
if (ringSize != null && maxBondOrder == IBond.Order.SINGLE){
if (atomRingSize == 3) {
ID = atomTypeIds[9];//sp3 3mem rings
}else if (atomRingSize == 4) {
ID = atomTypeIds[8];//sp3 4mem rings
}
}else{//sp2
String type13Matcher = getSphericalMatcher(atomTypeIds[13]);
p1 = Pattern.compile(type13Matcher);//C5B
mat1 = p1.matcher(atomSphericalMatcher);
String type12Matcher = getSphericalMatcher(atomTypeIds[12]);
p2 =Pattern.compile(type12Matcher);//C5A
mat2 = p2.matcher(atomSphericalMatcher);
if (mat1.matches() && atomChemGroupConstant%2==0 && atom.getFlag(CDKConstants.ISAROMATIC) && atomRingSize==5){
ID = atomTypeIds[13];
}else if (mat2.matches() && atomChemGroupConstant%2==0 && atom.getFlag(CDKConstants.ISAROMATIC) && atomRingSize==5){
ID = atomTypeIds[12];
}else if (atomChemGroupConstant%2==0 && atom.getFlag(CDKConstants.ISAROMATIC) && atomRingSize==5) {
ID = atomTypeIds[14];//C5 in het 5 ring
}else if (atom.getFlag(CDKConstants.ISAROMATIC)) {
ID = atomTypeIds[11];//Car in benzene, pyroll
}
}
}else{//not in Ring
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[66]));//S=C
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches()){
ID = atomTypeIds[66];//S=C
}
}
} else if (atomTypeIds[j].equals("Csp2")) {
if (atomChemGroupConstant%2==0 & atomRingSize==4 & !atom.getFlag(CDKConstants.ISAROMATIC)) {
ID = atomTypeIds[10];//CE4R
}
} else if (atomTypeIds[j].equals("C=")) {
if (atomChemGroupConstant%2==0 && atom.getFlag(CDKConstants.ISAROMATIC)) {
ID = atomTypeIds[12];//C5A
}
} else if (atomTypeIds[j].equals("N")) {
//Amid
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[48]));//NC=0
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches() & atomChemGroupConstant==-1) {
ID = atomTypeIds[48];//NC=O
}
//nsp3 oxide
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[44]));//sp3 n-oxide
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches() && maxBondOrder==tmpMaxBondOrder){
ID = atomTypeIds[44];
}
//ring sytems
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[56]));
mat1 = p1.matcher(atomSphericalMatcher);
if (atomChemGroupConstant==10){
ID = atomTypeIds[56];
}else if (atomChemGroupConstant==4){
ID = atomTypeIds[57];
}else if (atomChemGroupConstant%2==0 & atomRingSize==5 & atom.getFlag(CDKConstants.ISAROMATIC)){
ID=atomTypeIds[64];
}
//Nsp2-Oxides
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[61]));//npox
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches() && maxBondOrder==tmpMaxBondOrder){
ID=atomTypeIds[43];
}
if (atom.getFlag(CDKConstants.ISAROMATIC)){
if(mat1.matches()&& atomChemGroupConstant==12){
ID = atomTypeIds[61];
}else if(mat1.matches()&& atomRingSize==5){
ID = atomTypeIds[62];
}
}
//NC#N
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[45]));
mat1 = p1.matcher(getSphericalMatcher(atom));
if (mat1.matches()){
ID = atomTypeIds[45];
}
}else if (atomTypeIds[j].equals("N=C")) {
//n beta heteroaromatic ring
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[59]));
mat1 = p1.matcher(getSphericalMatcher(atom));
if (atomChemGroupConstant!=-1) {
if (mat1.matches() && atomChemGroupConstant%2==0 &&
atom.getFlag(CDKConstants.ISAROMATIC) && atomRingSize==5){
ID = atomTypeIds[59];//N5A
}else if(atomChemGroupConstant==10){//NPYD
ID = atomTypeIds[56];
}else if(atomChemGroupConstant==4){//NPYL
ID = atomTypeIds[57];
}
}
//N2OX
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[43]));
mat1 = p1.matcher(getSphericalMatcher(atom));
if (mat1.matches()){
if (atomChemGroupConstant==10){
ID = atomTypeIds[61];//npox
}else if (atom.getFlag(CDKConstants.ISAROMATIC) && atomRingSize==5){
ID = atomTypeIds[62];//n5ox
}else {
ID = atomTypeIds[43];//n2ox
}
}
}else if (atomTypeIds[j].equals("N2OX")){
//NO3
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[46]));
mat1 = p1.matcher(getSphericalMatcher(atom));
if (mat1.matches() && atomChemGroupConstant==-1){
ID = atomTypeIds[46];//NO3
}
if (atomChemGroupConstant==12){
ID = atomTypeIds[61];//NPOX
}else if (atomChemGroupConstant!=-1 && atom.getFlag(CDKConstants.ISAROMATIC) && atomRingSize==5){
ID = atomTypeIds[62];//N5OX
}
}else if (atomTypeIds[j].equals("=N=") || atomTypeIds[j].equals("NAZT")){
if (atomChemGroupConstant!=-1 && atom.getFlag(CDKConstants.ISAROMATIC)
&& atomRingSize==5){
ID = atomTypeIds[59];//aromatic N5A
}
}else if (atomTypeIds[j].equals("N+=")){
if (atomChemGroupConstant!=-1 && atom.getFlag(CDKConstants.ISAROMATIC)
&& atomRingSize==5){
ID = atomTypeIds[63];//n5+
}else if (atomChemGroupConstant==12){
ID = atomTypeIds[58];//npd+
}
}else if (atomTypeIds[j].equals("O")){
if (atomChemGroupConstant==6){
ID = atomTypeIds[35];//Oar
}
}else if (atomTypeIds[j].equals("HO")){
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[21]));
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches()){
ID = atomTypeIds[21];//HOCC
}
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[18]));
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches()){
ID = atomTypeIds[18];//HOCO
}
}else if (atomTypeIds[j].equals("P")){
p1 = Pattern.compile(getSphericalMatcher(atomTypeIds[75]));
mat1 = p1.matcher(atomSphericalMatcher);
if (mat1.matches()){
ID = atomTypeIds[75];//-P=C
}
}else if (atomTypeIds[j].equals("S")){
if (atomRingSize==5 && atom.getFlag(CDKConstants.ISAROMATIC)){
ID = atomTypeIds[72];//Sthiophen
}
}else if (atomTypeIds[j].equals("HC")){
p1 =Pattern.compile(getSphericalMatcher("HP"));
mat1 = p1.matcher((String)atom.getProperty(CDKConstants.SPHERICAL_MATCHER));
if (mat1.matches()){
ID = "HP";
}
}
atomTypeFlag = true;
logger.debug(" MATCH AtomTypeID:"+j+ " " + ID);
break;
}//IF
}//for end
if (atomTypeFlag) {
atomTypeFlag = false;
//System.out.println(" "+ID);
return factory.getAtomType(ID);
} else {
return factory.getAtomType("DU");
}
}
}