/* $Revision$ $Author$ $Date$ * * Copyright (C) 1997-2007 The Chemistry Development Kit (CDK) project * 2009 Egon Willighagen <egonw@users.sf.net> * 2010 Mark Rijnbeek <mark_rynbeek@users.sf.net> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.io; import java.io.BufferedWriter; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.io.Writer; import java.text.NumberFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.StringTokenizer; import java.util.TimeZone; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.config.IsotopeFactory; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IBond; import org.openscience.cdk.interfaces.IChemFile; import org.openscience.cdk.interfaces.IChemModel; import org.openscience.cdk.interfaces.IChemObject; import org.openscience.cdk.interfaces.IChemSequence; import org.openscience.cdk.interfaces.IMolecule; import org.openscience.cdk.interfaces.IPseudoAtom; import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.formats.MDLFormat; import org.openscience.cdk.io.setting.BooleanIOSetting; import org.openscience.cdk.io.setting.IOSetting; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; import org.openscience.cdk.tools.manipulator.ChemFileManipulator; /** * Writes MDL molfiles, which contains a single molecule. * For writing a MDL molfile you can this code: * <pre> * MDLWriter writer = new MDLWriter(new FileWriter(new File("output.mol"))); * writer.write((Molecule)molecule); * writer.close(); * </pre> * * See {@cdk.cite DAL92}. * * @cdk.module io * @cdk.githash * @cdk.keyword file format, MDL molfile */ @TestClass("org.openscience.cdk.io.MDLWriterTest") public class MDLWriter extends DefaultChemObjectWriter { private final static ILoggingTool logger = LoggingToolFactory.createLoggingTool(MDLWriter.class); private BooleanIOSetting forceWriteAs2DCoords; private BufferedWriter writer; /** * Constructs a new MDLWriter that can write an {@link IMolecule} * to the MDL molfile format. * * @param out The Writer to write to */ public MDLWriter(Writer out) { if (out instanceof BufferedWriter) { writer = (BufferedWriter)out; } else { writer = new BufferedWriter(out); } initIOSettings(); } /** * Constructs a new MDLWriter that can write an {@link IMolecule} * to a given OutputStream. * * @param output The OutputStream to write to */ public MDLWriter(OutputStream output) { this(new OutputStreamWriter(output)); } public MDLWriter() { this(new StringWriter()); } @TestMethod("testGetFormat") public IResourceFormat getFormat() { return MDLFormat.getInstance(); } public void setWriter(Writer out) throws CDKException { if (out instanceof BufferedWriter) { writer = (BufferedWriter)out; } else { writer = new BufferedWriter(out); } } public void setWriter(OutputStream output) throws CDKException { setWriter(new OutputStreamWriter(output)); } /** * Flushes the output and closes this object. */ @TestMethod("testClose") public void close() throws IOException { writer.close(); } @TestMethod("testAccepts") public boolean accepts(Class classObject) { Class[] interfaces = classObject.getInterfaces(); for (int i=0; i<interfaces.length; i++) { if (IAtomContainer.class.equals(interfaces[i])) return true; if (IChemFile.class.equals(interfaces[i])) return true; if (IChemModel.class.equals(interfaces[i])) return true; } Class superClass = classObject.getSuperclass(); if (superClass != null) return this.accepts(superClass); return false; } /** * Writes a {@link IChemObject} to the MDL molfile formated output. * It can only output ChemObjects of type {@link IChemFile}, * {@link IMolecule} and {@link IAtomContainer}. * * @param object {@link IChemObject} to write * * @see #accepts(Class) */ public void write(IChemObject object) throws CDKException { customizeJob(); try { if (object instanceof IChemFile) { writeChemFile((IChemFile)object); return; } else if (object instanceof IChemModel) { IChemFile file = object.getBuilder().newChemFile(); IChemSequence sequence = object.getBuilder().newChemSequence(); sequence.addChemModel((IChemModel)object); file.addChemSequence(sequence); writeChemFile((IChemFile)file); return; } else if (object instanceof IAtomContainer) { writeMolecule((IAtomContainer)object); return; } } catch (Exception ex) { logger.error(ex.getMessage()); logger.debug(ex); throw new CDKException("Exception while writing MDL file: " + ex.getMessage(), ex); } throw new CDKException("Only supported is writing of IChemFile, " + "IChemModel, and IAtomContainer objects."); } private void writeChemFile(IChemFile file) throws Exception { IAtomContainer bigPile = file.getBuilder().newAtomContainer(); for (IAtomContainer container : ChemFileManipulator.getAllAtomContainers(file)) { bigPile.add(container); if(container.getProperty(CDKConstants.TITLE)!=null){ if(bigPile.getProperty(CDKConstants.TITLE)!=null) bigPile.setProperty(CDKConstants.TITLE, bigPile.getProperty(CDKConstants.TITLE)+"; " +container.getProperty(CDKConstants.TITLE)); else bigPile.setProperty(CDKConstants.TITLE, container.getProperty(CDKConstants.TITLE)); } if(container.getProperty(CDKConstants.REMARK)!=null){ if(bigPile.getProperty(CDKConstants.REMARK)!=null) bigPile.setProperty(CDKConstants.REMARK, bigPile.getProperty(CDKConstants.REMARK)+"; " +container.getProperty(CDKConstants.REMARK)); else bigPile.setProperty(CDKConstants.REMARK, container.getProperty(CDKConstants.REMARK)); } } writeMolecule(bigPile); } /** * Writes a Molecule to an OutputStream in MDL sdf format. * * @param container Molecule that is written to an OutputStream */ public void writeMolecule(IAtomContainer container) throws Exception { String line = ""; List<Integer> rgroupList=null; // write header block // lines get shortened to 80 chars, that's in the spec String title = (String)container.getProperty(CDKConstants.TITLE); if (title == null) title = ""; if(title.length()>80) title=title.substring(0,80); writer.write(title); writer.newLine(); /* From CTX spec * This line has the format: * IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR * (FORTRAN: A2<--A8--><---A10-->A2I2<--F10.5-><---F12.5--><-I6-> ) * User's first and last initials (l), program name (P), * date/time (M/D/Y,H:m), dimensional codes (d), scaling factors (S, s), * energy (E) if modeling program input, internal registry number (R) * if input through MDL form. * A blank line can be substituted for line 2. */ writer.write(" CDK "); writer.write(new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis())); writer.newLine(); String comment = (String)container.getProperty(CDKConstants.REMARK); if (comment == null) comment = ""; if(comment.length()>80) comment=comment.substring(0,80); writer.write(comment); writer.newLine(); // write Counts line line += formatMDLInt(container.getAtomCount(), 3); line += formatMDLInt(container.getBondCount(), 3); line += " 0 0 0 0 0 0 0 0999 V2000"; writer.write(line); writer.newLine(); // write Atom block for (int f = 0; f < container.getAtomCount(); f++) { IAtom atom = container.getAtom(f); line = ""; if (atom.getPoint3d() != null && !forceWriteAs2DCoords.isSet()) { line += formatMDLFloat((float) atom.getPoint3d().x); line += formatMDLFloat((float) atom.getPoint3d().y); line += formatMDLFloat((float) atom.getPoint3d().z) + " "; } else if (atom.getPoint2d() != null) { line += formatMDLFloat((float) atom.getPoint2d().x); line += formatMDLFloat((float) atom.getPoint2d().y); line += " 0.0000 "; } else { // if no coordinates available, then output a number // of zeros line += formatMDLFloat((float)0.0); line += formatMDLFloat((float)0.0); line += formatMDLFloat((float)0.0) + " "; } if(container.getAtom(f) instanceof IPseudoAtom){ //according to http://www.google.co.uk/url?sa=t&ct=res&cd=2&url=http%3A%2F%2Fwww.mdl.com%2Fdownloads%2Fpublic%2Fctfile%2Fctfile.pdf&ei=MsJjSMbjAoyq1gbmj7zCDQ&usg=AFQjCNGaJSvH4wYy4FTXIaQ5f7hjoTdBAw&sig2=eSfruNOSsdMFdlrn7nhdAw an R group is written as R# IPseudoAtom pseudoAtom = (IPseudoAtom) container.getAtom(f); if (pseudoAtom.getSymbol().equals("R") && pseudoAtom.getLabel().length()>1) { line += "R# "; if (rgroupList==null) { rgroupList = new ArrayList<Integer>(); } Integer rGroupNumber = new Integer(pseudoAtom.getLabel().substring(1)); rgroupList.add(f+1); rgroupList.add(rGroupNumber); } else line += formatMDLString(((IPseudoAtom) container.getAtom(f)).getLabel(), 3); }else{ line += formatMDLString(container.getAtom(f).getSymbol(), 3); } line += " 0 0 0 0 0 0 0 0 0 0 0 0"; writer.write(line); writer.newLine(); } // write Bond block Iterator<IBond> bonds = container.bonds().iterator(); while (bonds.hasNext()) { IBond bond = (IBond) bonds.next(); if (bond.getAtomCount() != 2) { logger.warn("Skipping bond with more/less than two atoms: " + bond); } else { if (bond.getStereo() == IBond.Stereo.UP_INVERTED || bond.getStereo() == IBond.Stereo.DOWN_INVERTED) { // turn around atom coding to correct for inv stereo line = formatMDLInt(container.getAtomNumber(bond.getAtom(1)) + 1,3); line += formatMDLInt(container.getAtomNumber(bond.getAtom(0)) + 1,3); } else { line = formatMDLInt(container.getAtomNumber(bond.getAtom(0)) + 1,3); line += formatMDLInt(container.getAtomNumber(bond.getAtom(1)) + 1,3); } line += formatMDLInt((int)bond.getOrder().ordinal()+1,3); line += " "; switch(bond.getStereo()){ case UP: line += "1"; break; case UP_INVERTED: line += "1"; break; case DOWN: line += "6"; break; case DOWN_INVERTED: line += "6"; break; case UP_OR_DOWN: line += "4"; break; case E_OR_Z: line += "3"; break; default: line += "0"; } line += " 0 0 0 "; writer.write(line); writer.newLine(); } } // write formal atomic charges for (int i = 0; i < container.getAtomCount(); i++) { IAtom atom = container.getAtom(i); Integer charge = atom.getFormalCharge(); if (charge != null && charge != 0) { writer.write("M CHG 1 "); writer.write(formatMDLInt(i+1,3)); writer.write(" "); writer.write(formatMDLInt(charge,3)); writer.newLine(); } } // write formal isotope information for (int i = 0; i < container.getAtomCount(); i++) { IAtom atom = container.getAtom(i); if (!(atom instanceof IPseudoAtom)) { Integer atomicMass = atom.getMassNumber(); if (atomicMass != null) { int majorMass = IsotopeFactory.getInstance(atom.getBuilder()).getMajorIsotope(atom.getSymbol()).getMassNumber(); if (atomicMass != majorMass) { writer.write("M ISO 1 "); writer.write(formatMDLInt(i+1,3)); writer.write(" "); writer.write(formatMDLInt(atomicMass,3)); writer.newLine(); } } } } //write RGP line (max occurrence is 16 data points per line) if (rgroupList!=null) { StringBuffer rgpLine=new StringBuffer(); int cnt=0; for (int i=1; i<= rgroupList.size(); i++) { rgpLine.append(formatMDLInt((rgroupList.get(i-1)), 4)); i++; rgpLine.append(formatMDLInt((rgroupList.get(i-1)), 4)); cnt++; if (i==rgroupList.size() || i==16 ) { rgpLine.insert(0, "M RGP"+formatMDLInt(cnt, 3)); writer.write(rgpLine.toString()); writer.newLine(); rgpLine=new StringBuffer(); cnt=0; } } } // close molecule writer.write("M END"); writer.newLine(); writer.flush(); } /** * Formats an integer to fit into the connection table and changes it * to a String. * * @param i The int to be formated * @param l Length of the String * @return The String to be written into the connectiontable */ protected static String formatMDLInt(int i, int l) { String s = "", fs = ""; NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH); nf.setParseIntegerOnly(true); nf.setMinimumIntegerDigits(1); nf.setMaximumIntegerDigits(l); nf.setGroupingUsed(false); s = nf.format(i); l = l - s.length(); for (int f = 0; f < l; f++) fs += " "; fs += s; return fs; } /** * Formats a float to fit into the connectiontable and changes it * to a String. * * @param fl The float to be formated * @return The String to be written into the connectiontable */ protected static String formatMDLFloat(float fl) { String s = "", fs = ""; int l; NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH); nf.setMinimumIntegerDigits(1); nf.setMaximumIntegerDigits(4); nf.setMinimumFractionDigits(4); nf.setMaximumFractionDigits(4); nf.setGroupingUsed(false); s = nf.format(fl); l = 10 - s.length(); for (int f = 0; f < l; f++) fs += " "; fs += s; return fs; } /** * Formats a String to fit into the connectiontable. * * @param s The String to be formated * @param le The length of the String * @return The String to be written in the connectiontable */ protected static String formatMDLString(String s, int le) { s = s.trim(); if (s.length() > le) return s.substring(0, le); int l; l = le - s.length(); for (int f = 0; f < l; f++) s += " "; return s; } private void initIOSettings() { forceWriteAs2DCoords = new BooleanIOSetting( "ForceWriteAs2DCoordinates", IOSetting.LOW, "Should coordinates always be written as 2D?", "false" ); } public void customizeJob() { fireIOSettingQuestion(forceWriteAs2DCoords); } public IOSetting[] getIOSettings() { IOSetting[] settings = new IOSetting[1]; settings[0] = forceWriteAs2DCoords; return settings; } }