/* $RCSfile$ * $Author$ * $Date$ * $Revision$ * * Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.io.iterator; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.util.NoSuchElementException; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.DefaultChemObjectBuilder; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.interfaces.IChemObject; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.interfaces.IMolecule; import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.formats.SMILESFormat; import org.openscience.cdk.smiles.SmilesParser; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; /** * Iterating SMILES file reader. It allows to iterate over all molecules * in the SMILES file, without being read into memory all. Suitable * for very large SMILES files. These SMILES files are expected to have one * molecule on each line. * * <p>For parsing each SMILES it still uses the normal SMILESReader. * * @cdk.module smiles * @cdk.githash * * @see org.openscience.cdk.io.SMILESReader * * @author Egon Willighagen <egonw@sci.kun.nl> * @cdk.created 2004-12-16 * * @cdk.keyword file format, SMILES */ @TestClass("org.openscience.cdk.io.iterator.IteratingSMILESReaderTest") public class IteratingSMILESReader extends DefaultIteratingChemObjectReader { private BufferedReader input; private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(IteratingSMILESReader.class); private String currentLine; private SmilesParser sp = null; private boolean nextAvailableIsKnown; private boolean hasNext; private IMolecule nextMolecule; /** * Contructs a new IteratingSMILESReader that can read Molecule from a given Reader. * * @param in The Reader to read from * @param builder The builder to use * @see org.openscience.cdk.DefaultChemObjectBuilder * @see org.openscience.cdk.nonotify.NoNotificationChemObjectBuilder */ @TestMethod("testSMILESFileWithNames") public IteratingSMILESReader(Reader in, IChemObjectBuilder builder) { sp = new SmilesParser(builder); setReader(in); } /** * Contructs a new IteratingSMILESReader that can read Molecule from a given InputStream. * * This method will use @link{DefaultChemObjectBuilder} to build the actual molecules * * @param in The InputStream to read from */ @TestMethod("testSMILESFileWithNames") public IteratingSMILESReader(InputStream in) { this(new InputStreamReader(in), DefaultChemObjectBuilder.getInstance()); } /** * Contructs a new IteratingSMILESReader that can read Molecule from a given InputStream and IChemObjectBuilder. * * @param in The input stream * @param builder The builder */ public IteratingSMILESReader(InputStream in, IChemObjectBuilder builder) { this(new InputStreamReader(in), builder); } /** * Get the format for this reader. * * @return An instance of {@link org.openscience.cdk.io.formats.SMILESFormat} */ @TestMethod("testGetFormat") public IResourceFormat getFormat() { return SMILESFormat.getInstance(); } /** * Checks whether there is another molecule to read. * * @return true if there are molecules to read, false otherwise */ @TestMethod("testSMILESFileWithNames,testSMILESFileWithSpacesAndTabs,testSMILESTitles,testSMILESFile") public boolean hasNext() { if (!nextAvailableIsKnown) { hasNext = false; // now try to parse the next Molecule try { if (input.ready()) { currentLine = input.readLine().trim(); logger.debug("Line: ", currentLine); int indexSpace = currentLine.indexOf(" "); if (indexSpace == -1) indexSpace = currentLine.indexOf("\t"); String SMILES = currentLine; String name = null; if (indexSpace != -1) { logger.debug("Space found at index: ", indexSpace); SMILES = currentLine.substring(0,indexSpace); name = currentLine.substring(indexSpace+1); name = name.trim(); logger.debug("Line contains SMILES and name: ", SMILES, " + " , name); } nextMolecule = sp.parseSmiles(SMILES); if (name != null) { nextMolecule.setProperty(CDKConstants.TITLE, name); } if (nextMolecule.getAtomCount() > 0) { hasNext = true; } else { hasNext = false; } } else { hasNext = false; } } catch (Exception exception) { logger.error("Error while reading next molecule: ", exception.getMessage()); logger.debug(exception); hasNext = false; } if (!hasNext) nextMolecule = null; nextAvailableIsKnown = true; } return hasNext; } /** * Get the next molecule from the stream. * * @return The next molecule */ @TestMethod("testSMILESFileWithNames,testSMILESFileWithSpacesAndTabs,testSMILESTitles,testSMILESFile") public IChemObject next() { if (!nextAvailableIsKnown) { hasNext(); } nextAvailableIsKnown = false; if (!hasNext) { throw new NoSuchElementException(); } return nextMolecule; } /** * Close the reader. * * @throws IOException if there is an error during closing */ @TestMethod("testSMILESFileWithNames,testSMILESFileWithSpacesAndTabs,testClose") public void close() throws IOException { input.close(); } @TestMethod("testRemove") public void remove() { throw new UnsupportedOperationException(); } @TestMethod("testSetReader_Reader") public void setReader(Reader reader) { if (reader instanceof BufferedReader) { input = (BufferedReader)reader; } else { input = new BufferedReader(reader); } nextMolecule = null; nextAvailableIsKnown = false; hasNext = false; } @TestMethod("testSetReader1,testSetReader_InputStream") public void setReader(InputStream reader) { setReader(new InputStreamReader(reader)); } }