/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.data; import java.io.IOException; import java.lang.reflect.Constructor; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.util.FileUtils; /** * A factory class that knows how to build a {@link InputData} object from a given file name * * @author Rudolf Mayer * @version $Id: InputDataFactory.java 3868 2010-10-21 15:52:31Z mayer $ */ public class InputDataFactory { private static final HashMap<String, Class<? extends AbstractSOMLibSparseInputData>> inputClasses = new HashMap<String, Class<? extends AbstractSOMLibSparseInputData>>(); /** Supported Input File Format Types */ public static final String[] INPUT_FILE_FORMAT_TYPES; private static final HashMap<String, String> knownExtensions = new HashMap<String, String>(); static { knownExtensions.put(SOMLibSparseInputData.getFileNameSuffix(), SOMLibSparseInputData.getFormatName()); inputClasses.put(SOMLibSparseInputData.getFormatName(), SOMLibSparseInputData.class); knownExtensions.put(ARFFFormatInputData.getFileNameSuffix(), ARFFFormatInputData.getFormatName()); inputClasses.put(ARFFFormatInputData.getFormatName(), ARFFFormatInputData.class); knownExtensions.put(RandomAccessFileSOMLibInputData.getFileNameSuffix(), RandomAccessFileSOMLibInputData.getFormatName()); inputClasses.put(RandomAccessFileSOMLibInputData.getFormatName(), RandomAccessFileSOMLibInputData.class); knownExtensions.put(SimpleMatrixInputData.getFileNameSuffix(), SimpleMatrixInputData.getFormatName()); inputClasses.put(SimpleMatrixInputData.getFormatName(), SimpleMatrixInputData.class); knownExtensions.put(SOMPAKInputData.getFileNameSuffix(), SOMPAKInputData.getFormatName()); inputClasses.put(SOMPAKInputData.getFormatName(), SOMPAKInputData.class); knownExtensions.put(ESOMInputData.getFileNameSuffix(), ESOMInputData.getFormatName()); inputClasses.put(ESOMInputData.getFormatName(), ESOMInputData.class); inputClasses.put(MarsyasARFFInputData.getFormatName(), MarsyasARFFInputData.class); INPUT_FILE_FORMAT_TYPES = inputClasses.keySet().toArray(new String[inputClasses.size()]); } // FIXME: support more file formats, such as ESOM and SOMPAK public static InputData open(String vectorFileName, String templateFileName, boolean sparse, boolean norm, int numCacheBlocks, long seed) { // try to detect from extension try { String inputFormat = detectInputFormatFromExtension(vectorFileName, "input"); if (inputFormat != null) { if (!inputFormat.equals(AbstractSOMLibSparseInputData.getFormatName())) { return open(inputFormat, vectorFileName); } else { return new SOMLibSparseInputData(vectorFileName, templateFileName, sparse, norm, numCacheBlocks, seed); } } } catch (SOMToolboxException e1) { e1.printStackTrace(); } // check if we have a SOMLib Format try { Map<String, String> headers = FileUtils.readSOMLibFileHeaders(FileUtils.openFile("Input vector file", vectorFileName), "Input vector file"); System.out.println(headers); if (headers.size() > 2) { // headers found Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "Found " + (headers.size() - 2) + " SOMlib headers, assuming SOMLib format."); return new SOMLibSparseInputData(vectorFileName, templateFileName, sparse, norm, numCacheBlocks, seed); } else { Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "No SOMlib headers found, trying simple matrix format."); return new SimpleMatrixInputData(vectorFileName); } } catch (IOException e) { e.printStackTrace(); return new SimpleMatrixInputData(vectorFileName); } } public static InputData open(String inputVectorFileName) { return open(inputVectorFileName, null, true, true, 1, 7); } public static InputData open(String formatName, String inputFileName) throws SOMToolboxException { try { Class<? extends AbstractSOMLibSparseInputData> c = inputClasses.get(formatName); if (c == null) { throw new SOMToolboxException("Unknown Format: '" + formatName + "', possible formats are: " + inputClasses.keySet()); } Constructor<? extends AbstractSOMLibSparseInputData> constr = c.getConstructor(String.class); return constr.newInstance(inputFileName); } catch (SOMToolboxException e) { throw e; // just throw it on } catch (Exception e) { throw new SOMToolboxException("Could not instanciate reader for '" + formatName + "': " + e.getMessage()); } } public static String detectInputFormatFromExtension(String inputFileName, String type) throws SOMToolboxException { Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "No " + type + " format specified, detecting from file extension..."); if (inputFileName.endsWith(".gz")) { inputFileName = inputFileName.substring(0, inputFileName.length() - 3); } for (String extension : knownExtensions.keySet()) { if (inputFileName.endsWith(extension)) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info( "... found '" + extension + "' extension, assuming '" + knownExtensions.get(extension)); return knownExtensions.get(extension); } } throw new SOMToolboxException("Unknown " + type + " format for file '" + inputFileName + "', please specify the " + type + " format via the option."); } }