/* $Revision$ $Author$ $Date$ * * Copyright (C) 2001-2007 Bradley A. Smith <bradley@baysmith.com> * 2003-2009 Egon Willighagen <egonw@users.sf.net> * * Contact: cdk-devel@lists.sourceforge.net * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.openscience.cdk.io; import java.io.BufferedReader; import java.io.CharArrayReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import org.openscience.cdk.annotations.TestClass; import org.openscience.cdk.annotations.TestMethod; import org.openscience.cdk.io.formats.IChemFormat; import org.openscience.cdk.io.formats.IChemFormatMatcher; import org.openscience.cdk.io.formats.XYZFormat; /** * A factory for recognizing chemical file formats. Formats * of GZiped files can be detected too. * * A typical example is: * <pre> * StringReader stringReader = new StringReader("<molecule/>"); * IChemFormat format = new FormatFactory().guessFormat(stringReader); * </pre> * * @cdk.module ioformats * @cdk.githash * * @author Egon Willighagen <egonw@sci.kun.nl> * @author Bradley A. Smith <bradley@baysmith.com> */ @TestClass("org.openscience.cdk.io.FormatFactoryTest") public class FormatFactory { private final static String IO_FORMATS_LIST = "io-formats.set"; private int headerLength; private static List<IChemFormatMatcher> formats = null; /** * Constructs a ReaderFactory which tries to detect the format in the * first 65536 chars. */ public FormatFactory() { this(65536); } /** * Constructs a ReaderFactory which tries to detect the format in the * first given number of chars. * * @param headerLength length of the header in number of chars */ public FormatFactory(int headerLength) { this.headerLength = headerLength; loadFormats(); } private void loadFormats() { if (formats == null) { formats = new ArrayList<IChemFormatMatcher>(); try { BufferedReader reader = new BufferedReader(new InputStreamReader( this.getClass().getClassLoader().getResourceAsStream(IO_FORMATS_LIST) )); int formatCount = 0; while (reader.ready()) { // load them one by one String formatName = reader.readLine(); formatCount++; try { Class<? extends Object> formatClass = this.getClass().getClassLoader().loadClass(formatName); Method getinstanceMethod = formatClass.getMethod("getInstance", new Class[0]); IChemFormatMatcher format = (IChemFormatMatcher)getinstanceMethod.invoke(null, new Object[0]); formats.add(format); } catch (ClassNotFoundException exception) { } catch (Exception exception) { } } } catch (Exception exception) { } } } /** * Registers a format for detection. */ public void registerFormat(IChemFormatMatcher format) { formats.add(format); } /** * Returns the list of recognizable formats. * * @return {@link List} of {@link IChemFormat}s. */ public List<IChemFormatMatcher> getFormats(){ return formats; } /** * Creates a String of the Class name of the <code>IChemObject</code> reader * for this file format. The input is read line-by-line * until a line containing an identifying string is * found. * * <p>The ReaderFactory detects more formats than the CDK * has Readers for. * * <p>This method is not able to detect the format of gziped files. * Use <code>guessFormat(InputStream)</code> instead for such files. * * @throws IOException if an I/O error occurs * @throws IllegalArgumentException if the input is null * * @see #guessFormat(InputStream) */ public IChemFormat guessFormat(Reader input) throws IOException { if (input == null) { throw new IllegalArgumentException("input cannot be null"); } // make a copy of the header char[] header = new char[this.headerLength]; if (!input.markSupported()) { throw new IllegalArgumentException("input must support mark"); } input.mark(this.headerLength); input.read(header, 0, this.headerLength); input.reset(); BufferedReader buffer = new BufferedReader(new CharArrayReader(header)); /* Search file for a line containing an identifying keyword */ String line = null; int lineNumber = 1; while ((line = buffer.readLine()) != null) { for (int i=0; i<formats.size(); i++) { IChemFormatMatcher cfMatcher = formats.get(i); if (cfMatcher.matches(lineNumber, line)) { return cfMatcher; } } lineNumber++; } buffer = new BufferedReader(new CharArrayReader(header)); line = buffer.readLine(); // is it a XYZ file? StringTokenizer tokenizer = new StringTokenizer(line.trim()); try { int tokenCount = tokenizer.countTokens(); if (tokenCount == 1) { Integer.parseInt(tokenizer.nextToken()); // if not failed, then it is a XYZ file return (IChemFormat)XYZFormat.getInstance(); } else if (tokenCount == 2) { Integer.parseInt(tokenizer.nextToken()); if ("Bohr".equalsIgnoreCase(tokenizer.nextToken())) { return (IChemFormat)XYZFormat.getInstance(); } } } catch (NumberFormatException exception) { } return null; } @TestMethod("testGuessFormat") public IChemFormat guessFormat(InputStream input) throws IOException { if (input == null) { throw new IllegalArgumentException("input cannot be null"); } // make a copy of the header byte[] header = new byte[this.headerLength]; if (!input.markSupported()) { throw new IllegalArgumentException("input must support mark"); } input.mark(this.headerLength); input.read(header, 0, this.headerLength); input.reset(); BufferedReader buffer = new BufferedReader( new StringReader(new String(header)) ); /* Search file for a line containing an identifying keyword */ String line = null; int lineNumber = 1; while ((line = buffer.readLine()) != null) { for (int i=0; i<formats.size(); i++) { IChemFormatMatcher cfMatcher = formats.get(i); if (cfMatcher.matches(lineNumber, line)) { return cfMatcher; } } lineNumber++; } buffer = new BufferedReader( new StringReader(new String(header)) ); line = buffer.readLine(); // is it a XYZ file? StringTokenizer tokenizer = new StringTokenizer(line.trim()); try { int tokenCount = tokenizer.countTokens(); if (tokenCount == 1) { Integer.parseInt(tokenizer.nextToken()); // if not failed, then it is a XYZ file return (IChemFormat)XYZFormat.getInstance(); } else if (tokenCount == 2) { Integer.parseInt(tokenizer.nextToken()); if ("Bohr".equalsIgnoreCase(tokenizer.nextToken())) { return (IChemFormat)XYZFormat.getInstance(); } } } catch (NumberFormatException exception) { } return null; } }