/* $Revision$ $Author$ $Date$ * * Copyright (C) 2006-2008 Egon Willighagen <egonw@sci.kun.nl> * * Contact: cdk-devel@lists.sourceforge.net * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.io; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.util.Hashtable; import java.util.Iterator; import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.vecmath.Point2d; import javax.vecmath.Point3d; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.config.IsotopeFactory; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IChemObject; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.interfaces.IMolecule; import org.openscience.cdk.interfaces.IPseudoAtom; import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.formats.MDLV3000Format; import org.openscience.cdk.io.setting.IOSetting; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; import org.openscience.cdk.tools.manipulator.BondManipulator; /** * Class that implements the MDL mol V3000 format. This reader reads the * element symbol and 2D or 3D coordinates from the ATOM block. * * @cdk.module io * @cdk.githash * * @author Egon Willighagen <egonw@users.sf.net> * @cdk.created 2006 * * @cdk.keyword MDL molfile V3000 * @cdk.require java1.4+ */ @TestClass("org.openscience.cdk.io.MDLV3000ReaderTest") public class MDLV3000Reader extends DefaultChemObjectReader { BufferedReader input = null; private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(MDLV3000Reader.class); private Pattern keyValueTuple; private Pattern keyValueTuple2; private int lineNumber; public MDLV3000Reader(Reader in) { this(in, Mode.RELAXED); } public MDLV3000Reader(Reader in, Mode mode) { input = new BufferedReader(in); initIOSettings(); super.mode = mode; /* compile patterns */ keyValueTuple = Pattern.compile("\\s*(\\w+)=([^\\s]*)(.*)"); // e.g. CHG=-1 keyValueTuple2 = Pattern.compile("\\s*(\\w+)=\\(([^\\)]*)\\)(.*)"); // e.g. ATOMS=(1 31) lineNumber = 0; } public MDLV3000Reader(InputStream input) { this(input, Mode.RELAXED); } public MDLV3000Reader(InputStream input, Mode mode) { this(new InputStreamReader(input), mode); } public MDLV3000Reader() { this(new StringReader("")); } @TestMethod("testGetFormat") public IResourceFormat getFormat() { return MDLV3000Format.getInstance(); } @TestMethod("testSetReader_Reader") public void setReader(Reader input) throws CDKException { if (input instanceof BufferedReader) { this.input = (BufferedReader)input; } else { this.input = new BufferedReader(input); } lineNumber = 0; } @TestMethod("testSetReader_InputStream") public void setReader(InputStream input) throws CDKException { setReader(new InputStreamReader(input)); } @TestMethod("testAccepts") public boolean accepts(Class classObject) { Class[] interfaces = classObject.getInterfaces(); for (int i=0; i<interfaces.length; i++) { if (IMolecule.class.equals(interfaces[i])) return true; } Class superClass = classObject.getSuperclass(); if (superClass != null) return this.accepts(superClass); return false; } public IChemObject read(IChemObject object) throws CDKException { if (object instanceof IMolecule) { return readMolecule(object.getBuilder()); } return null; } public IMolecule readMolecule(IChemObjectBuilder builder) throws CDKException { return builder.newMolecule(readConnectionTable(builder)); } public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException { logger.info("Reading CTAB block"); IAtomContainer readData = builder.newAtomContainer(); boolean foundEND = false; String lastLine = readHeader(readData); while (isReady() && !foundEND) { String command = readCommand(lastLine); logger.debug("command found: " + command); if ("END CTAB".equals(command)) { foundEND = true; } else if ("BEGIN CTAB".equals(command)) { // that's fine } else if ("COUNTS".equals(command)) { // don't think I need to parse this } else if ("BEGIN ATOM".equals(command)) { readAtomBlock(readData); } else if ("BEGIN BOND".equals(command)) { readBondBlock(readData); } else if ("BEGIN SGROUP".equals(command)) { readSGroup(readData); } else { logger.warn("Unrecognized command: " + command); } lastLine = readLine(); } return readData; } /** * @throws CDKException when no file content is detected * @return Last line read */ public String readHeader(IAtomContainer readData) throws CDKException { // read four lines String line1 = readLine(); if (line1 == null) { throw new CDKException("Expected a header line, but found nothing."); } if (line1.length() > 0) { if (line1.startsWith("M V30")) { // no header return line1; } readData.setProperty(CDKConstants.TITLE, line1); } readLine(); String line3 = readLine(); if (line3.length() > 0) readData.setProperty(CDKConstants.COMMENT, line3); String line4 = readLine(); if (!line4.contains("3000")) { throw new CDKException("This file is not a MDL V3000 molfile."); } return readLine(); } /** * Reads the atoms, coordinates and charges. * * <p>IMPORTANT: it does not support the atom list and its negation! */ public void readAtomBlock(IAtomContainer readData) throws CDKException { logger.info("Reading ATOM block"); IsotopeFactory isotopeFactory; try { isotopeFactory = IsotopeFactory.getInstance(readData.getBuilder()); } catch (IOException exception) { throw new CDKException("Could not initiate the IsotopeFactory.", exception); } int RGroupCounter = 1; int Rnumber = 0; String[] rGroup = null; boolean foundEND = false; while (isReady() && !foundEND) { String command = readCommand(readLine()); if ("END ATOM".equals(command)) { // FIXME: should check whether 3D is really 2D foundEND = true; } else { logger.debug("Parsing atom from: " + command); IAtom atom = readData.getBuilder().newAtom(); StringTokenizer tokenizer = new StringTokenizer(command); // parse the index try { atom.setID(tokenizer.nextToken()); } catch (Exception exception) { String error = "Error while parsing atom index"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse the element String element = tokenizer.nextToken(); if (isotopeFactory.isElement(element)) { atom = isotopeFactory.configure(readData.getBuilder().newAtom(element)); } else if ("A".equals(element)) { atom = readData.getBuilder().newPseudoAtom(element); } else if ("Q".equals(element)) { atom = readData.getBuilder().newPseudoAtom(element); } else if ("*".equals(element)) { atom = readData.getBuilder().newPseudoAtom(element); } else if ("LP".equals(element)) { atom = readData.getBuilder().newPseudoAtom(element); } else if ("L".equals(element)) { atom = readData.getBuilder().newPseudoAtom(element); } else if (element.length() > 0 && element.charAt(0) == 'R'){ logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom."); //check if the element is R rGroup = element.split("^R"); if (rGroup.length > 1){ try{ Rnumber = Integer.valueOf(rGroup[(rGroup.length-1)]).intValue(); RGroupCounter=Rnumber; }catch(Exception ex){ Rnumber=RGroupCounter; RGroupCounter++; } element="R"+Rnumber; } atom = readData.getBuilder().newPseudoAtom(element); } else { if (mode == ISimpleChemObjectReader.Mode.STRICT) { throw new CDKException("Invalid element type. Must be an existing element, or one in: A, Q, L, LP, *."); } atom = readData.getBuilder().newPseudoAtom(element); } // parse atom coordinates (in Angstrom) try { String xString = tokenizer.nextToken(); String yString = tokenizer.nextToken(); String zString = tokenizer.nextToken(); double x = Double.parseDouble(xString); double y = Double.parseDouble(yString); double z = Double.parseDouble(zString); atom.setPoint3d(new Point3d(x, y, z)); atom.setPoint2d(new Point2d(x, y)); // FIXME: dirty! } catch (Exception exception) { String error = "Error while parsing atom coordinates"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // atom-atom mapping String mapping = tokenizer.nextToken(); if (!mapping.equals("0")) { logger.warn("Skipping atom-atom mapping: " + mapping); } // else: default 0 is no mapping defined // the rest are key value things if (command.indexOf("=") != -1) { Map<String,String> options = parseOptions(exhaustStringTokenizer(tokenizer)); Iterator<String> keys = options.keySet().iterator(); while (keys.hasNext()) { String key = keys.next(); String value = options.get(key); try { if (key.equals("CHG")) { int charge = Integer.parseInt(value); if (charge != 0) { // zero is no charge specified atom.setFormalCharge(charge); } } else { logger.warn("Not parsing key: " + key); } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } } // store atom readData.addAtom(atom); logger.debug("Added atom: " + atom); } } } /** * Reads the bond atoms, order and stereo configuration. */ public void readBondBlock(IAtomContainer readData) throws CDKException { logger.info("Reading BOND block"); boolean foundEND = false; while (isReady() && !foundEND) { String command = readCommand(readLine()); if ("END BOND".equals(command)) { foundEND = true; } else { logger.debug("Parsing bond from: " + command); StringTokenizer tokenizer = new StringTokenizer(command); IBond bond = readData.getBuilder().newBond(); // parse the index try { String indexString = tokenizer.nextToken(); bond.setID(indexString); } catch (Exception exception) { String error = "Error while parsing bond index"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse the order try { String orderString = tokenizer.nextToken(); int order = Integer.parseInt(orderString); if (order >= 4) { logger.warn("Query order types are not supported (yet). File a bug if you need it"); } else { bond.setOrder(BondManipulator.createBondOrder((double)order)); } } catch (Exception exception) { String error = "Error while parsing bond index"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse index atom 1 try { String indexAtom1String = tokenizer.nextToken(); int indexAtom1 = Integer.parseInt(indexAtom1String); IAtom atom1 = readData.getAtom(indexAtom1 -1); bond.setAtom(atom1, 0); } catch (Exception exception) { String error = "Error while parsing index atom 1 in bond"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse index atom 2 try { String indexAtom2String = tokenizer.nextToken(); int indexAtom2 = Integer.parseInt(indexAtom2String); IAtom atom2 = readData.getAtom(indexAtom2 -1); bond.setAtom(atom2, 1); } catch (Exception exception) { String error = "Error while parsing index atom 2 in bond"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // the rest are key=value fields if (command.indexOf("=") != -1) { Map<String,String> options = parseOptions(exhaustStringTokenizer(tokenizer)); Iterator<String> keys = options.keySet().iterator(); while (keys.hasNext()) { String key = keys.next(); String value = options.get(key); try { if (key.equals("CFG")) { int configuration = Integer.parseInt(value); if (configuration == 0) { bond.setStereo(IBond.Stereo.NONE); } else if (configuration == 1) { bond.setStereo(IBond.Stereo.UP); } else if (configuration == 2) { bond.setStereo((IBond.Stereo)CDKConstants.UNSET); } else if (configuration == 3) { bond.setStereo(IBond.Stereo.DOWN); } } else { logger.warn("Not parsing key: " + key); } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } } // storing bond readData.addBond(bond); logger.debug("Added bond: " + bond); } } } /** * Reads labels. */ public void readSGroup(IAtomContainer readData) throws CDKException { boolean foundEND = false; while (isReady() && !foundEND) { String command = readCommand(readLine()); if ("END SGROUP".equals(command)) { foundEND = true; } else { logger.debug("Parsing Sgroup line: " + command); StringTokenizer tokenizer = new StringTokenizer(command); // parse the index String indexString = tokenizer.nextToken(); logger.warn("Skipping external index: " + indexString); // parse command type String type = tokenizer.nextToken(); // parse the external index String externalIndexString = tokenizer.nextToken(); logger.warn("Skipping external index: " + externalIndexString); // the rest are key=value fields Map<String,String> options = new Hashtable<String,String>(); if (command.indexOf("=") != -1) { options = parseOptions(exhaustStringTokenizer(tokenizer)); } // now interpret line if (type.startsWith("SUP")) { Iterator<String> keys = options.keySet().iterator(); int atomID = -1; String label = ""; while (keys.hasNext()) { String key = keys.next(); String value = options.get(key); try { if (key.equals("ATOMS")) { StringTokenizer atomsTokenizer = new StringTokenizer(value); Integer.parseInt(atomsTokenizer.nextToken()); // should be 1, int atomCount = atomID = Integer.parseInt(atomsTokenizer.nextToken()); } else if (key.equals("LABEL")) { label = value; } else { logger.warn("Not parsing key: " + key); } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } if (atomID != -1 && label.length() > 0) { IAtom atom = readData.getAtom(atomID-1); if (!(atom instanceof IPseudoAtom)) { atom = readData.getBuilder().newPseudoAtom(atom); } ((IPseudoAtom)atom).setLabel(label); readData.setAtom(atomID-1, atom); } } } else { logger.warn("Skipping unrecognized SGROUP type: " + type); } } } } /** * Reads the command on this line. If the line is continued on the next, that * part is added. * * @return Returns the command on this line. */ private String readCommand(String line) throws CDKException { if (line.startsWith("M V30 ")) { String command = line.substring(7); if (command.endsWith("-")) { command = command.substring(0, command.length()-1); command += readCommand(readLine()); } return command; } else { throw new CDKException("Could not read MDL file: unexpected line: " + line); } } private Map<String,String> parseOptions(String string) throws CDKException { Map<String,String> keyValueTuples = new Hashtable<String,String>(); while (string.length() >= 3) { logger.debug("Matching remaining option string: " + string); Matcher tuple1Matcher = keyValueTuple2.matcher(string); if (tuple1Matcher.matches()) { String key = tuple1Matcher.group(1); String value = tuple1Matcher.group(2); string = tuple1Matcher.group(3); logger.debug("Found key: " + key); logger.debug("Found value: " + value); keyValueTuples.put(key, value); } else { Matcher tuple2Matcher = keyValueTuple.matcher(string); if (tuple2Matcher.matches()) { String key = tuple2Matcher.group(1); String value = tuple2Matcher.group(2); string = tuple2Matcher.group(3); logger.debug("Found key: " + key); logger.debug("Found value: " + value); keyValueTuples.put(key, value); } else { logger.warn("Quiting; could not parse: " + string + "."); string = ""; } } } return keyValueTuples; } public String exhaustStringTokenizer(StringTokenizer tokenizer) { StringBuffer buffer = new StringBuffer(); buffer.append(" "); while (tokenizer.hasMoreTokens()) { buffer.append(tokenizer.nextToken()); buffer.append(" "); } return buffer.toString(); } public String readLine() throws CDKException { String line = null; try { line = input.readLine(); lineNumber++; logger.debug("read line " + lineNumber + ":", line); } catch (Exception exception) { String error = "Unexpected error while reading file: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } return line; } public boolean isReady() throws CDKException { try { return input.ready(); } catch (Exception exception) { String error = "Unexpected error while reading file: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } @TestMethod("testAccepts") public boolean accepts(IChemObject object) { if (object instanceof IMolecule) { return true; } return false; } @TestMethod("testClose") public void close() throws IOException { input.close(); } private void initIOSettings() { } public IOSetting[] getIOSettings() { return new IOSetting[0]; } }