/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.data; import java.io.BufferedReader; import java.util.ArrayList; import java.util.logging.Logger; /** * This class reads input vector data from files in the SOMPak format, as provided by the MATLAB SOMToolbox. See * http://www.cis.hut.fi/projects/somtoolbox/package/docs2/som_read_data.html for format details. * * @author Rudolf Mayer * @version $Id: SOMPAKInputData.java 3583 2010-05-21 10:07:41Z mayer $ */ public class SOMPAKInputData extends SOMLibSparseInputData { public static final String INDICATOR_COMPONENTS = "#n"; public SOMPAKInputData(String vectorFileName) { super(vectorFileName); } @Override protected void readVectorFile(String vectorFileName, boolean sparse) { BufferedReader br = openFile(vectorFileName); try { // read the first header line, which gives the dimensionality String dimString = br.readLine(); dim = Integer.parseInt(dimString); String[] componentNames = null; // the second line might give us the component names, e.g. in the format of // #n SepalL SepalW PetalL PetalW String line = br.readLine(); if (line.startsWith(INDICATOR_COMPONENTS)) { line = line.substring(INDICATOR_COMPONENTS.length()).trim(); componentNames = line.split(" "); line = br.readLine(); } // we don't have any information on the number of vectors available, thus we need to first read them into a // list ArrayList<String> lines = new ArrayList<String>(); while (line != null) { if (line.trim().length() > 0) { lines.add(line.trim()); } line = br.readLine(); } // now we can compute the number of vectors, and initialise our data structures accordingly. numVectors = lines.size(); // now that we know numVectors, we can also create the template vector templateVector = new SOMLibTemplateVector(numVectors, componentNames); classInfo = new SOMLibClassInformation(); initDataStructures(sparse); // finally, process all vectors int index = 0; for (String s : lines) { String[] lineElements = s.split(" "); // TODO: add a sanity check for lineElements.length == dim (or dim+1 if we have classes) for (int ve = 0; ve < dim; ve++) { setMatrixValue(index, ve, parseDouble(lineElements[ve])); } addInstance(index, String.valueOf(index + 1)); // if we have a class info if (lineElements.length > dim) { classInfo.addItem(String.valueOf(index + 1), lineElements[dim].trim()); } index++; } } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(ERROR_MESSAGE_FILE_FORMAT_CORRUPT); e.printStackTrace(); throw new IllegalArgumentException(e.getMessage()); } classInfo.processItems(false); Logger.getLogger("at.tuwien.ifs.somtoolbox").info("SOMPak vector file seems to be correct. Riding on ..."); } public static String getFileNameSuffix() { return ".sompak"; } public static String getFormatName() { return "SOMPak"; } }