/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.data; import cern.colt.matrix.DoubleMatrix1D; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.layers.metrics.DistanceMetric; /** * The <code>InputData</code> provides the input vectors to be used for the training process of a Self-Organizing Map. * The data structure to read construct an InputData from is normally generated by a parser or vector generator program. * * @author Michael Dittenbach * @author Rudolf Mayer * @version $Id: InputData.java 3589 2010-05-21 10:42:01Z mayer $ */ public interface InputData { public static final double MISSING_VALUE = Double.NaN; String inputFileNameSuffix = ".vec"; /** * Indicates whether this data set has been normalised to the unit length. * * @return true if this data set is normalised, false otherwise. */ public boolean isNormalizedToUnitLength(); /** * Gets the dimension of the input data. * * @return the dimension. */ public int dim(); /** * Gives the size of this input data set. * * @return the number of vectors. */ public int numVectors(); /** * Gets a random input sample from the input data set. * * @return the random input data. */ public InputDatum getRandomInputDatum(int iteration, int numIterations); /** * Get an input datum with a specified index. * * @param d the index of the input datum. * @return the input datum. */ public InputDatum getInputDatum(int d); /** Get the vector for the input datum of the specified index */ public double[] getInputVector(int d); /** * Get an input datum with a specified label. * * @param label the name of the input datum. * @return the input datum. */ public InputDatum getInputDatum(String label); /** * Returns an array of input data with the specified labels. * * @param labels the labels of the input data. * @return the input data. */ public InputDatum[] getInputDatum(String[] labels); /** Returns an array containing the labels of all the input data. */ public String[] getLabels(); /** Return the label of the input vector at the given index. */ public String getLabel(int index); /** * Gets the mean vector of the input vectors. * * @return the mean vector. */ public DoubleMatrix1D getMeanVector(); /** * Returns mean vector of specified vectors provided by String[] array. * * @param labels label names of the input data. * @return the mean vector. */ public DoubleMatrix1D getMeanVector(String[] labels); /** * Gets the template vector associated with this input data. * * @return the template vector, or null if the template vector was not specified. */ public TemplateVector templateVector(); /** * Gets the class info associated with this input data. * * @return the class info, or null if the class info file was not specified. */ public SOMLibClassInformation classInformation(); /** * Sets the template vector to be associated with this input data. * * @param templateVector the new template vector. */ public void setTemplateVector(TemplateVector templateVector); /** * Calculates the mean quantisation error of the top-level unit. * * @param metric the metric to use for distance calculation. * @return the mqe0. */ public double mqe0(DistanceMetric metric); /** * Gets a subset of this input data set. The input data in the subset are identified by the specified labels. * * @param names the label names of the desired subset data. * @return a subset of the data. */ public InputData subset(String[] names); /** Return the input data as a double array, i.e. a matrix of numVectors x dim */ public double[][] getData(); /** Return the min and max values for each feature, in a matrix of dim x 2 */ public double[][] getDataIntervals(); /** Returns the value of the y-th feature of input vector x. */ public double getValue(int x, int y); /** * Gets the number of rows before vectorisation. * * @return the number of rows of feature matrix before having been vectorized to input vector, or -1 if not * available. */ public int getFeatureMatrixRows(); /** * Gets the number of columns before vectorisation. * * @return the number of columns of feature matrix before having been vectorized to input vector, or -1 if not * available. */ public int getFeatureMatrixColumns(); /** * Gets the content type. * * @return the content type */ public String getContentType(); /** * Gets the content sub-type. * * @return the content sub-type */ public String getContentSubType(); public void setClassInfo(SOMLibClassInformation classInfo); /** * Returns the vectors of all inputs associated with the given class name * * @throws SOMToolboxException If no class information file is loaded */ public double[][] getData(String className) throws SOMToolboxException; /** returns the name/URI/etc. of the source where this input data was read from */ public String getDataSource(); }