/* Copyright (C) 2006 Leonardo Bispo de Oliveira and * Daniele Sunaga de Oliveira * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package br.com.ibmp.som.matrix; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.FileNotFoundException; import java.util.List; import java.util.Random; import java.util.ArrayList; import java.util.StringTokenizer; import br.com.ibmp.som.exception.SOMException; import br.com.ibmp.som.matrix.vo.SOMElementVO; /** * Class that contains the sample vector structure. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * @version 1.0 * */ public class SampleVectorFile implements SampleVectorInterface { /** Matrix Header. */ private List<String> header; /** Array that contains the sample vector. */ private List<SOMElementVO> vector; /** Random instance. */ private Random randomize; /** Sample name. */ private String name; /** * Constructor. * * @param file - File that contains the sample vector. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * * @throws SOMException * */ public SampleVectorFile(String file) throws SOMException { if (file == null) throw new SOMException("File is not valid"); this.name = file; vector = new ArrayList<SOMElementVO>(100); header = new ArrayList<String>(100); randomize = new Random(); openFile(file); } /** * Assessor for opening a file that contains the sample vector. * * @param fileName - File to be opened. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * * @throws SOMException * */ protected final void openFile(String fileName) throws SOMException { File file; String line; InputStream is; BufferedReader reader; file = new File(fileName); if (!file.exists()) throw new SOMException("File does not exists"); try { is = new FileInputStream(file); } catch (FileNotFoundException e) { throw new SOMException("File not found", e); } reader = new BufferedReader(new InputStreamReader(is)); try { parseHeader(reader.readLine()); while ((line = reader.readLine()) != null) vector.add(parseLine(line)); } catch (IOException e) { throw new SOMException("Problems with reader", e); } } /** * Assessor to normalize empty columns. * * @param line - Line to be normalized. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ private final String normalizeLine(String line) { while (line.contains("\t\t")) line = line.replaceAll("\t\t", "\t \t"); return line; } /** Parse the header line. * * @param line - Sample unparsed line. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * * @throws SOMException * */ protected final void parseHeader(String line) throws SOMException { StringTokenizer tokenizer; line = normalizeLine(line); tokenizer = new StringTokenizer(line, "\t"); while (tokenizer.hasMoreTokens()) header.add(tokenizer.nextToken()); } /** * Parse the vector sample. * * @param line - Sample unparsed line. * * @return List of parsed line. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * * @throws SOMException * */ protected final SOMElementVO parseLine(String line) throws SOMException { String token; Double value; SOMElementVO element; StringTokenizer tokenizer; line = normalizeLine(line); tokenizer = new StringTokenizer(line, "\t"); if (!tokenizer.hasMoreTokens()) throw new SOMException("Problems with the Sample file"); element = new SOMElementVO(); element.setName(tokenizer.nextToken()); if (!tokenizer.hasMoreTokens()) throw new SOMException("Problems with the Sample file"); element.setDescription(tokenizer.nextToken()); while (tokenizer.hasMoreTokens()) { if ((token = tokenizer.nextToken()).equals(" ")) value = null; else value = new Double(token); element.addValue(value); } while (element.getNumberOfValues() < getColSize()) element.addValue(null); return element; } /** * Assessor for returning the sample header. * * @return List of sample header. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public List<String> getHeader() { return header; } /** * Assessor for returning the sample name. * * @return Sample name. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public String getName() { return name; } /** * Assessor for returning the Sample vector line passed with parameter. * * @param idx - Sample line to be returned. * @return The Matrix element. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public SOMElementVO getElement(final int idx) { return vector.get(idx); } /** * Assessor for returning a randomized weight. * * @return Randomized weight. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public SOMElementVO randomizeWeight() { int i; int row; SOMElementVO weight; Double value; value = null; weight = new SOMElementVO(); for (i = 0; i < getColSize(); i++) { while (value == null) { row = randomize.nextInt(getRowSize()); value = getElement(row).getValue(i); } weight.addValue(value); } return weight; } /** * Assessor for returning a randomized sample. * * @return Randomized sample. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public SOMElementVO randomizeSample() { int row; row = randomize.nextInt(getRowSize()); return getElement(row); } /** * Assessor for returning the row size. * * @return Row size. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public int getRowSize() { return vector.size(); } /** * Assessor for returning the column size. * * @return Column size. * * @author Leonardo Bispo de Oliveira and Daniele Sunaga de Oliveira. * */ public int getColSize() { return (header.size() - 2); } }