/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.data; import java.io.BufferedReader; import java.util.logging.Logger; import org.apache.commons.lang.ArrayUtils; import at.tuwien.ifs.somtoolbox.util.FileUtils; import at.tuwien.ifs.somtoolbox.util.StringUtils; /** * Reads a input data file in the ESOM format. For details on the file-format specification, see * http://databionic-esom.sourceforge.net/user.html#File_formats. * * @author Rudolf Mayer * @version $Id: ESOMInputData.java 3358 2010-02-11 14:35:07Z mayer $ */ public class ESOMInputData extends SOMLibSparseInputData { public ESOMInputData(String vectorFileName) { super(vectorFileName); } @Override protected void readVectorFile(String vectorFileName, boolean sparse) { try { BufferedReader br = FileUtils.openFile("ESOM input data file", vectorFileName); // ignore comment lines String line = FileUtils.consumeHeaderComments(br); // first line: numVectors numVectors = Integer.parseInt(line.trim().substring(1).trim()); // second line: dimensionality. Also includes the index/label field, thus we store the value -1 dim = Integer.parseInt(br.readLine().trim().substring(1).trim()) - 1; initDataStructures(false); // third line - column types // TODO: process it line = br.readLine(); // fourth line - component names => construct a template vector line = br.readLine(); String[] componentNames = line.split(StringUtils.REGEX_SPACE_OR_TAB); templateVector = new SOMLibTemplateVector(numVectors, (String[]) ArrayUtils.remove(componentNames, 0)); // all the other lines are data int index = 0; while ((line = br.readLine()) != null) { line = line.trim(); if (line.length() == 0) { continue; } String[] lineElements = line.split(StringUtils.REGEX_SPACE_OR_TAB); // TODO: add a sanity check for lineElements.length == dim (or dim+1 if we have classes) for (int ve = 0; ve < dim; ve++) { setMatrixValue(index, ve, parseDouble(lineElements[ve + 1])); } addInstance(index, lineElements[0]); index++; } } catch (Exception e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(ERROR_MESSAGE_FILE_FORMAT_CORRUPT); e.printStackTrace(); throw new IllegalArgumentException(e.getMessage()); } Logger.getLogger("at.tuwien.ifs.somtoolbox").info("ESOM vector file seems to be correct. Riding on ..."); } public static String getFileNameSuffix() { return ".esom"; } public static String getFormatName() { return "ESOM"; } }