/* $RCSfile$ * $Author$ * $Date$ * $Revision$ * * Copyright (C) 2001-2007 Oliver Horlacher <oliver.horlacher@therastrat.com> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * */ package org.openscience.cdk.graph.invariant; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.smiles.InvPair; import org.openscience.cdk.tools.periodictable.PeriodicTable; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; /** * Canonically labels an atom container implementing * the algorithm published in David Weininger et.al. {@cdk.cite WEI89}. * The Collections.sort() method uses a merge sort which is * stable and runs in n log(n). * * @cdk.module standard * @cdk.githash * * @author Oliver Horlacher <oliver.horlacher@therastrat.com> * @cdk.created 2002-02-26 * * @cdk.keyword canonicalization */ @TestClass("org.openscience.cdk.graph.invariant.CanonicalLabelerTest") public class CanonicalLabeler { public CanonicalLabeler() { } /** * Canonically label the fragment. The labels are set as atom property InvPair.CANONICAL_LABEL of type Integer, indicating the canonical order. * This is an implementation of the algorithm published in * David Weininger et.al. {@cdk.cite WEI89}. * * <p>The Collections.sort() method uses a merge sort which is * stable and runs in n log(n). * * <p>It is assumed that a chemicaly valid AtomContainer is provided: * this method does not check * the correctness of the AtomContainer. Negative H counts will * cause a NumberFormatException to be thrown. * @param atomContainer The molecule to label */ @TestMethod("testCanonLabel_IAtomContainer,testSomeMoleculeWithDifferentStartingOrder") public synchronized void canonLabel(IAtomContainer atomContainer) { if (atomContainer.getAtomCount() == 0) return; if (atomContainer.getAtomCount() == 1) { atomContainer.getAtom(0).setProperty(InvPair.CANONICAL_LABEL, 1); } ArrayList vect = createInvarLabel(atomContainer); step3(vect, atomContainer); } /** * @param v the invariance pair vector */ private void step2(ArrayList v, IAtomContainer atoms) { primeProduct(v, atoms); step3(v, atoms); } /** * @param v the invariance pair vector */ private void step3(ArrayList v, IAtomContainer atoms) { sortArrayList(v); rankArrayList(v); if (!isInvPart(v)) { step2(v, atoms); } else { //On first pass save, partitioning as symmetry classes. if (((InvPair) v.get(v.size()-1)).getCurr() < v.size()) { breakTies(v); step2(v, atoms); } // now apply the ranking for (Object aV : v) { ((InvPair) aV).commit(); } } } /** * Create initial invariant labeling corresponds to step 1 * * @return ArrayList containting the */ private ArrayList createInvarLabel(IAtomContainer atomContainer) { java.util.Iterator atoms = atomContainer.atoms().iterator(); IAtom a; StringBuffer inv; ArrayList vect = new ArrayList(); while(atoms.hasNext()) { a = (IAtom)atoms.next(); inv = new StringBuffer(); inv.append(atomContainer.getConnectedAtomsList(a).size() + (a.getHydrogenCount() == CDKConstants.UNSET ? 0 : a.getHydrogenCount())); //Num connections inv.append(atomContainer.getConnectedAtomsList(a).size()); //Num of non H bonds inv.append(PeriodicTable.getAtomicNumber(a.getSymbol())); Double charge = a.getCharge(); if (charge == CDKConstants.UNSET) charge = 0.0; if (charge < 0) //Sign of charge inv.append(1); else inv.append(0); //Absolute charge inv.append((int)Math.abs( (a.getFormalCharge() == CDKConstants.UNSET ? 0.0 : a.getFormalCharge()))); //Hydrogen count inv.append((a.getHydrogenCount() == CDKConstants.UNSET ? 0 : a.getHydrogenCount())); vect.add(new InvPair(Long.parseLong(inv.toString()), a)); } return vect; } /** * Calculates the product of the neighbouring primes. * * @param v the invariance pair vector */ private void primeProduct(ArrayList v, IAtomContainer atomContainer) { Iterator it = v.iterator(); Iterator n; InvPair inv; IAtom a; long summ; while (it.hasNext()) { inv = (InvPair) it.next(); List neighbour = atomContainer.getConnectedAtomsList(inv.getAtom()); n = neighbour.iterator(); summ = 1; while (n.hasNext()) { a = (IAtom) n.next(); int next = ((InvPair)a.getProperty(InvPair.INVARIANCE_PAIR)).getPrime(); summ = summ * next; } inv.setLast(inv.getCurr()); inv.setCurr(summ); } } /** * Sorts the vector according to the current invariance, corresponds to step 3 * * @param v the invariance pair vector * @cdk.todo can this be done in one loop? */ private void sortArrayList(ArrayList v) { Collections.sort(v, new Comparator() { public int compare(Object o1, Object o2) { return (int) (((InvPair) o1).getCurr() - ((InvPair) o2).getCurr()); } }); Collections.sort(v, new Comparator() { public int compare(Object o1, Object o2) { return (int) (((InvPair) o1).getLast() - ((InvPair) o2).getLast()); } }); } /** * Rank atomic vector, corresponds to step 4. * * @param v the invariance pair vector */ private void rankArrayList(ArrayList v) { int num = 1; int[] temp = new int[v.size()]; InvPair last = (InvPair) v.get(0); Iterator it = v.iterator(); InvPair curr; for (int x = 0; it.hasNext(); x++) { curr = (InvPair) it.next(); if (!last.equals(curr)) { num++; } temp[x] = num; last = curr; } it = v.iterator(); for (int x = 0; it.hasNext(); x++) { curr = (InvPair) it.next(); curr.setCurr(temp[x]); curr.setPrime(); } } /** * Checks to see if the vector is invariantely partitioned * * @param v the invariance pair vector * @return true if the vector is invariantely partitioned, false otherwise */ private boolean isInvPart(ArrayList v) { if (((InvPair) v.get(v.size()-1)).getCurr() == v.size()) return true; Iterator it = v.iterator(); InvPair curr; while (it.hasNext()) { curr = (InvPair) it.next(); if (curr.getCurr() != curr.getLast()) return false; } return true; } /** * Break ties. Corresponds to step 7 * * @param v the invariance pair vector */ private void breakTies(ArrayList v) { Iterator it = v.iterator(); InvPair curr; InvPair last = null; int tie = 0; boolean found = false; for (int x = 0; it.hasNext(); x++) { curr = (InvPair) it.next(); curr.setCurr(curr.getCurr() * 2); curr.setPrime(); if (x != 0 && !found && curr.getCurr() == last.getCurr()) { tie = x - 1; found = true; } last = curr; } curr = (InvPair) v.get(tie); curr.setCurr(curr.getCurr() - 1); curr.setPrime(); } }