DatasetInformation.java example

Explorer

sos-dendrogram-master
- somtoolbox
  - src
    - core
      - at
        tuwien
        ifs
        commons
        gui
        SOMToolboxAppChooser.java
        controls
        TitledCollapsiblePanel.java
        swing
        table
        ButtonCellEditor.java
        ButtonCellRenderer.java
        ClassColorTableModel.java
        ColorCellEditor.java
        ColorCellRenderer.java
        jsap
        BooleanEditor.java
        EnumerationEditor.java
        FileEditor.java
        FlaggedOptionControl.java
        GenericGUI.java
        IntegerEditor.java
        OptionEditor.java
        ParameterControl.java
        StringEditor.java
        SwitchControl.java
        UnflaggedOptionControl.java
        package-info.java
        util
        MaximisedJFrame.java
        models
        ClassComboBoxModel.java
        util
        MathUtils.java
        io
        ExtensionFileFilterSwing.java
        jsap
        EnumeratedStringParserLister.java
        feature
        evaluation
        SimilarityRetrieval.java
        SimilarityRetrievalGUI.java
        somtoolbox
        SOMToolboxException.java
        SOMToolboxMetaConstants.java
        apps
        DataSetViewer.java
        MapInterpolator.java
        MergedSOMDataMapper.java
        PaletteEditor.java
        QualityMeasureComputer.java
        SDHTrainingSequenceGenerator.java
        SOMToolboxApp.java
        SOMToolboxMain.java
        SecondLifeFileReaderMain.java
        UnitFileViewer.java
        VisualisationImageSaver.java
        analysis
        FeatureDistributionAnalysis.java
        PLAnalyser.java
        PLClusterSpaceAnalyser.java
        PLInputSpaceAnalyser.java
        PLOutputSpaceAnalyser.java
        PLStepSequenceAnalyser.java
        PlaylistAnalysis.java
        config
        AbstractOptionFactory.java
        OptionFactory.java
        package-info.java
        helper
        ClassInfoFileCleaner.java
        DataGenerator.java
        DataMapper.java
        DataSetGenerator.java
        DataWinnerMappingWriter.java
        DatasetRandomiser.java
        HTMLMapReader.java
        MapRotator.java
        RGBPaletteConverter.java
        SOMLibDataInfoGenerator.java
        SOMLibInputMerger.java
        SOMMerger.java
        SomFilePacker.java
        SomToInputConvertor.java
        StringReplacer.java
        TemplateVectorComparator.java
        UnitDescriptionMappingRewriter.java
        VectorFileChopper.java
        VectorFileMerger.java
        VectorFilePrefixAdder.java
        VectorFileRewriter.java
        VectorFileToRandomAccessFileConverter.java
        VectorSimilarityWriter.java
        initEval
        EvaluationMain.java
        Measure.java
        metricEval
        MetricEvaluation.java
        package-info.java
        server
        HTMLMapInformation.java
        LabelDescription.java
        LabelImageCreator.java
        MapPreviewServlet.java
        MapSectionServlet.java
        MapServlet.java
        SOMMap.java
        SOMPageParameters.java
        ServerPalettes.java
        ServerSOM.java
        ServerVisualizations.java
        package-info.java
        trainer
        GHSOMSettingsPanel.java
        GrowingSOMSettingsPanel.java
        MnemonicSOMSettingsPanel.java
        SOMModelSettingsPanel.java
        SOMTrainer.java
        viewer
        ArrowPNode.java
        CommonSOMViewerStateData.java
        ComponentPlaneClusteringFrame.java
        ControlCollector.java
        DocViewPanel.java
        ExportDialog.java
        GeneralUnitPNode.java
        GeneralUnitPNodeSerializer.java
        GenericPNodeScrollPane.java
        InputPNode.java
        ItemSelectionListener.java
        LabellingDialog.java
        MapPNode.java
        MyPCanvas.java
        PieChartPNode.java
        RhythmPattern.java
        RhythmPatternsVisWindow.java
        SOMFrame.java
        SOMPane.java
        SOMViewer.java
        SharedSOMVisualisationDataDialog.java
        StarPNode.java
        StatusBar.java
        ToolTipPNode.java
        TuxRacerExportDialog.java
        UnitSelectionListener.java
        VisualizationChangeListener.java
        controls
        AbstractSelectionPanel.java
        AbstractViewerControl.java
        AutoRoutePanel.java
        ClassLegendPane.java
        ClusterBoderColorChooser.java
        ClusteringControl.java
        ColorChooser.java
        ColourLegendTable.java
        ComparisonPanel.java
        DendogrammPane.java
        DocSOMPanel.java
        GHSOMNavigationPanel.java
        LabelEditColorChooser.java
        MapDetailPanel.java
        MapOverviewPane.java
        MultichannelPlaybackPanel.java
        PaletteDisplayer.java
        PalettePanel.java
        PlaySOMPanel.java
        PlaygroundPanel.java
        PlaylistEvaluationPane.java
        QuerySOMPanel.java
        ShiftsControlPanel.java
        VisualizationControl.java
        multichannelPlayback
        ActivityGrid.java
        ActivityGridModel.java
        Commons.java
        ControlFrame.java
        DecoderThread.java
        FindMeLoopThread.java
        LayoutTable.java
        LineListModel.java
        MyGridCellRenderer.java
        PlaybackThread.java
        TPlaybackThreadDataRecord.java
        TimeUpdateThread.java
        package-info.java
        player
        AudioPlayThread.java
        AudioPlayer.java
        FlatAudioPlayThread.java
        MP3PlayThread.java
        PlayList.java
        PlayListListener.java
        PlaySOMPlayer.java
        PlayerControl.java
        PlayerListener.java
        StandalonePlaySOMPlayer.java
        psomserver
        PathMerger.java
        PocketSOMConnector.java
        httphandler
        HttpErrorHandler.java
        MapInformationProvider.java
        PocketSOMConfigProvider.java
        SongProvider.java
        fileutils
        ExportUtils.java
        LabelXmlUtils.java
        MIMETypes.java
        MySOMVisualisationDataFileFilter.java
        PlayList.java
        PocketSOMFormatUtils.java
        handlers
        AbstractDragSequenceEventHandler.java
        ClusterSelectionEventHandler.java
        EditLabelEventListener.java
        LineSelectionEventHandler.java
        LoggingHandler.java
        MyInputDragSequenceEventHandler.java
        MyLabelDragSequenceEventHandler.java
        MyMapInputEventHandler.java
        MyRectangleSelectionEventHandler.java
        MyWheelZoomEventHandler.java
        OrderedPSelectionEventHandler.java
        package-info.java
        package-info.java
        audio
        Constants.java
        DecoderThread.java
        PlaybackListener.java
        PlaybackThread.java
        PlaybackThreadDataRecord.java
        clustering
        Cluster.java
        ClusteringAlgorithm.java
        ClusteringTools.java
        DistanceFunctionType.java
        HierarchicalCluster.java
        HierarchicalClusteringAlgorithm.java
        WardClustering.java
        functions
        ClusterElementFunctions.java
        ComponentLine2DDistance.java
        ComponentLine3DDistance.java
        DoubleVector2DDistance.java
        UnitClusteringFunction.java
        data
        ARFFFormatInputData.java
        AbstractSOMLibSparseInputData.java
        AbstractSOMLibTemplateVector.java
        ClassInfoHeaderNotFoundException.java
        DataBaseSOMLibSparseInputData.java
        DataBaseSOMLibTemplateVector.java
        DataDimensionException.java
        ESOMClassInformation.java
        ESOMInputData.java
        InputData.java
        InputDataFactory.java
        InputDataFileFormatConverter.java
        InputDataWriter.java
        InputDatum.java
        MarsyasARFFInputData.java
        RandomAccessFileSOMLibInputData.java
        SOMLibClassInformation.java
        SOMLibDataInformation.java
        SOMLibRegressInformation.java
        SOMLibSparseInputData.java
        SOMLibSparseInputDataNames.java
        SOMLibTemplateVector.java
        SOMLibVectorNormalization.java
        SOMLibZeroVectorRemover.java
        SOMPAKInputData.java
        SOMVisualisationData.java
        SharedSOMVisualisationData.java
        SimpleMatrixInputData.java
        TemplateVector.java
        TemplateVectorElement.java
        distance
        AbstractMemoryInputVectorDistanceMatrix.java
        DistanceMatrixWriter.java
        FullMemoryInputVectorDistanceMatrix.java
        InputVectorDistanceMatrix.java
        LeightWeightMemoryInputVectorDistanceMatrix.java
        RandomAccessFileInputVectorDistanceMatrix.java
        metadata
        AbstractVectorMetaData.java
        AudioVectorMetaData.java
        EmtpyVectorMetaData.java
        MP3VectorMetaData.java
        normalisation
        AbstractNormaliser.java
        MinMaxNormaliser.java
        StandardScoreNormaliser.java
        UnitLengthNormaliser.java
        package-info.java
        database
        DBConnector.java
        Index2DatabaseImporter.java
        MySQLConnector.java
        VectorFile2DatabaseImporter.java
        package-info.java
        doc
        RunnablesReferenceCreator.java
        input
        AbstractSOMInputReader.java
        DataItemLinkageMap.java
        ESOMFormatInputReader.java
        InputCorrections.java
        MapFileFormatConverter.java
        MnemonicSOMLibFormatInputReader.java
        SOMInputReader.java
        SOMLibDataWinnerMapping.java
        SOMLibFileFormatException.java
        SOMLibFormatInputReader.java
        SOMLibMapDescription.java
        SOMPAKFormatInputReader.java
        SecondLifeInputFileReader.java
        package-info.java
        layers
        AdaptiveCoordinatesVirtualLayer.java
        AdaptiveCoordinatesVirtualUnit.java
        GrowingCellLayer.java
        GrowingCellUnit.java
        GrowingLayer.java
        InputContainer.java
        Label.java
        Layer.java
        LayerAccessException.java
        MappedInputs.java
        MnemonicGrowingLayer.java
        ToroidLayer.java
        TrainingInterruptionListener.java
        Unit.java
        UnitPair.java
        initialisation
        LayerInitializer.java
        PCAInitializer.java
        RandomInitializer.java
        RandomSamplingInitializer.java
        SOMInitializer.java
        metrics
        AbstractMetric.java
        AbstractWeightedMetric.java
        CosineMetric.java
        DistanceMetric.java
        L1Metric.java
        L2Metric.java
        L2MetricFast.java
        L2MetricSparse.java
        L2MetricWeighted.java
        LInfinityMetric.java
        LnAlphaMetric.java
        LnMetric.java
        MahalanobisMetric.java
        MetricException.java
        Metrics.java
        MissingValueMetricWrapper.java
        MnemonicSOMMetric.java
        package-info.java
        package-info.java
        quality
        AbstractQualityMeasure.java
        EntropyAndPurityCalculator.java
        EntropyMeasure.java
        IntrinsicDistance.java
        InversionMeasure.java
        MetricMultiScaling.java
        PseudoSilhouetteValue.java
        QualityMeasure.java
        QualityMeasureNotFoundException.java
        QuantizationError.java
        SOMDistortion.java
        SOMSilhouetteValue.java
        SammonMeasure.java
        SilhouetteValue.java
        SpearmanCoefficient.java
        TopographicError.java
        TopographicFunction.java
        TopographicProduct.java
        Trustworthiness_NeighborhoodPreservation.java
        package-info.java
        models
        AbstractNetworkModel.java
        GHSOM.java
        GHSOMHierarchyRoot.java
        GHSOMLevelLayer.java
        GrowingCellStructures.java
        GrowingSOM.java
        MnemonicSOM.java
        NetworkModel.java
        package-info.java
        output
        AttendeeMapper.java
        ESOMMapOutputter.java
        GrowingCellStructuresMapOutputter.java
        HTMLOutputter.java
        SOMLibMapOutputter.java
        SOMPAKMapOutputter.java
        TrajectoryOutputter.java
        labeling
        AbstractLabeler.java
        LabelSOM.java
        Labeler.java
        LagusKeywordLabeler.java
        package-info.java
        package-info.java
        properties
        FileProperties.java
        GHSOMProperties.java
        PropertiesException.java
        SOMProperties.java
        SOMViewerProperties.java
        package-info.java
        reportgenerator
        DatasetInformation.java
        EditableReportProperties.java
        GGSOMTestRunResult.java
        GHSOMTestRunResult.java
        MySOMLibMapDescription.java
        QEContainers
        InputQEContainer.java
        MapQEContainer.java
        QEContainer.java
        QMConfigurationProfile.java
        QMContainer.java
        UnitQEContainer.java
        ReportGenerator.java
        SemanticClass.java
        SemanticInterpreterGrid.java
        SemanticNode.java
        TestRunResult.java
        TestRunResultCollection.java
        TextualDescriptionProvider.java
        gui
        InputVectorSelectorPane.java
        ReportGenWindow.java
        RunResultSpecifierPane.java
        output
        OutputReport.java
        OutputReportHtml.java
        OutputReportLATEX.java
        ReportFileWriter.java
        SOMDescriptionHTML.java
        SOMDescriptionLATEX.java
        SOMGGDescriptionHTML.java
        SOMGGDescriptionLATEX.java
        SOMGHSOMDescriptionHTML.java
        SOMGHSOMDescriptionLATEX.java
        structures
        ComponentLine2D.java
        ComponentLine3D.java
        DoubleVector2D.java
        ElementWithIndex.java
        summarisation
        Summariser.java
        SummariserGUI.java
        gui
        DocumentListPanel.java
        MultiDocumentSummarisationPanel.java
        NavigationPanel.java
        OptionPanel.java
        PalettePanel.java
        SearchPanel.java
        SingleDocumentSummarisationPanel.java
        methods
        CombinedMethod.java
        KeywordMethod.java
        LocationMethod.java
        PartOfSpeech.java
        TFxIDF.java
        TitleMethod.java
        output
        DocumentDisplayer.java
        MultiDocumentHandler.java
        ResultHandler.java
        parser
        Scorer.java
        SentenceParser.java
        util
        AdaptiveStdErrProgressWriter.java
        ArrayUtils.java
        CentredDialog.java
        CollectionUtils.java
        Cuboid.java
        DateUtils.java
        ElementCounter.java
        FileUtils.java
        GridBagConstraintsIFS.java
        HTMLUtils.java
        ImageUtils.java
        Indices2D.java
        InverseComparator.java
        JMultiLineRadioButtonMenuItem.java
        JMultiLineToolTip.java
        LabelPNodeGenerator.java
        LeastRecentelyUsedImageCache.java
        LeastRecentlyUsedCache.java
        MultiLineToolTipUI.java
        NumberUtils.java
        PCA.java
        Point3d.java
        ProgressListener.java
        ProgressListenerFactory.java
        RandomTools.java
        StdErrProgressWriter.java
        StringIntegerComparator.java
        StringUtils.java
        SubClassFinder.java
        SwingWorker.java
        TogglablePanel.java
        UiUtils.java
        VectorTools.java
        VisualisationUtils.java
        comparables
        ComponentRegionCount.java
        InputDistance.java
        InputNameDistance.java
        UnitDistance.java
        growingCellStructures
        GrowingCellDrawSurface.java
        GrowingCellTetraheder.java
        inputVerifier
        DoubleNumberInputVerifier.java
        mnemonic
        MapPanel.java
        MnemonicSOMGenerator.java
        package-info.java
        package-info.java
        visualization
        AbstractBackgroundImageVisualizer.java
        AbstractItemVisualizer.java
        AbstractMatrixVisualizer.java
        ActivityHistogram.java
        AdaptiveCoordinatesVisualizer.java
        BackgroundImageVisualizer.java
        BackgroundImageVisualizerInstance.java
        CategoryPieChartVisualizer.java
        ClusterConnectionsVisualizer.java
        ColorGradient.java
        ColorGradientFactory.java
        ComparisonVisualizer.java
        ComponentPlanesVisualizer.java
        EntropyVisualizer.java
        FlowBorderlineVisualizer.java
        FuzzyColourCodingVisualiser.java
        GapVisualiser.java
        HitHistogramVisualizer.java
        IntrinsicDistanceVisualizer.java
        MappingDistortionVisualizer.java
        MatrixVisualizer.java
        MetroMapVisualizer.java
        MinimumSpanningTreeVisualizer.java
        NeighbourhoodGraph.java
        PMatrix.java
        Palette.java
        Palettes.java
        QualityMeasureVisualizer.java
        QuantizationErrorVisualizer.java
        RegressionVisualiser.java
        RhythmPatternsAttributeVisualizer.java
        SearchResultHistogramVisualizer.java
        SilhouetteVisualizer.java
        SmoothedCountHistograms.java
        SmoothedDataHistograms.java
        Snapper.java
        Snapper2D.java
        Snapper3D.java
        ThematicClassMapVisualizer.java
        TopographicErrorVisualizer.java
        TopographicProductVisualizer.java
        TrustwothinessVisualizer.java
        UMatrix.java
        VisualizationUpdateListener.java
        Visualizations.java
        clustering
        AbstractWardsLinkageTreeBuilder.java
        BorderPNode.java
        Cluster.java
        ClusterElementsStorage.java
        ClusterEquivalence.java
        ClusterLabel.java
        ClusterNode.java
        ClusteringAbortedException.java
        ClusteringTree.java
        ColoredClusterPNode.java
        CompleteLinkageTreeBuilder.java
        Covariance.java
        KMeans.java
        KMeansTreeBuilder.java
        LabelCoordinates.java
        LabelPositioning.java
        MoreCentresThanKException.java
        NodeDistance.java
        NonHierarchicalTreeBuilder.java
        SingleLinkageTreeBuilder.java
        TreeBuilder.java
        UnitKMeans.java
        WardsLinkageTreeBuilder.java
        WardsLinkageTreeBuilderAll.java
        package-info.java
        comparison
        QuiverPNode.java
        SOMComparison.java
        Shift.java
        contourplot
        ContourPlot.java
        metromap
        JetColorMap.java
        MetroColorMap.java
        package-info.java
        minimumSpanningTree
        Edge.java
        Graph.java
        InputdataGraph.java
        Node.java
        SomGraph.java
        package-info.java
        thematicmap
        Grid.java
        RegionManager.java
        RndIndexGenerator.java
        SOMClass.java
        SOMRegion.java
        SegDistComperator.java
        Segment.java
        package-info.java

/*
 * Copyright 2004-2010 Information & Software Engineering Group (188/1)
 *                     Institute of Software Technology and Interactive Systems
 *                     Vienna University of Technology, Austria
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.tuwien.ifs.somtoolbox.reportgenerator;

import java.awt.Color;
import java.io.File;
import java.util.Vector;
import java.util.logging.Logger;

import at.tuwien.ifs.somtoolbox.apps.viewer.CommonSOMViewerStateData;
import at.tuwien.ifs.somtoolbox.data.InputData;
import at.tuwien.ifs.somtoolbox.data.InputDatum;
import at.tuwien.ifs.somtoolbox.data.SOMLibClassInformation;
import at.tuwien.ifs.somtoolbox.data.SOMVisualisationData;
import at.tuwien.ifs.somtoolbox.data.TemplateVector;
import at.tuwien.ifs.somtoolbox.util.PCA;
import at.tuwien.ifs.somtoolbox.visualization.clustering.ClusterLabel;
import at.tuwien.ifs.somtoolbox.visualization.clustering.ClusterNode;

/**
 * FIXME: most probably all the methods in this class should be part of {@link InputData} and
 * {@link SOMLibClassInformation}, respectively ! <br>
 * this class collects all available information about the values in the input dataset, like from the input file, the
 * template vector file, ... and maybe computes some properties of its own. It's job is to give one centralized placed
 * where the actual report generators (the output object) can ask for the data.
 * 
 * @author Sebastian Skritek (0226286, Sebastian.Skritek@gmx.at)
 * @version $Id: DatasetInformation.java 3590 2010-05-21 10:43:45Z mayer $
 */
public class DatasetInformation {

    public static final int MIN_VALUE = 1;

    public static final int MAX_VALUE = 2;

    public static final int MEAN_VALUE = 3;

    public static final int VAR_VALUE = 4;

    public static final int ZERO_VALUE = 5;

    public static final int ONLY01 = 6;

    public static final int DISCRETE = 7;

    private Vector<Integer> selectedIndices;

    private InputData inputData;

    private String inputDataFilename;

    private String tvFilename;

    private TemplateVector inputTemplate;

    private SOMLibClassInformation classInfo;

    private String[] classNames = null;

    private String classInformationFilename = null;

    private EditableReportProperties EP;

    /* variables containing information about the data distribution */

    /** we check whether there are values != 0 or 1 */
    private boolean only01[] = null;

    /** only an estimation - we call values discrete if they are integer values */
    private boolean discrete[] = null;

    /** holds for each dimension the minimal value */
    private double[] min = null;

    /** holds for each dimension the maximal value */
    private double[] max = null;

    /** holds for each dimension the mean value */
    private double[] mean = null;

    /** holds for each dimension the variance */
    private double[] var = null;

    /** holds for each dimension the number of 0 - values. Using this we estimate the missing values */
    private int zeroValues[] = null;

    boolean denseData = true;

    // Input Data herrichten
    /**
     * creates a new object storing information about a given dataset
     * 
     * @param selectedIndices Vector of indices of the input items selected for more information
     * @param inputDataFilename the path to the file containing the input data
     * @param tvFilename the path to the file containin the template vector
     * @param classInformationFile the path to the file containing the class information
     * @param EP the customized Report Features of the Semantic Report
     */
    public DatasetInformation(Vector<Integer> selectedIndices, String inputDataFilename, String tvFilename,
            String classInformationFile, EditableReportProperties EP) {
        this(selectedIndices, inputDataFilename, tvFilename, classInformationFile, EP, new CommonSOMViewerStateData());
    }

    public DatasetInformation(Vector<Integer> selectedIndices, String inputDataFilename, String tvFilename,
            String classInformationFile, EditableReportProperties EP, CommonSOMViewerStateData state) {

        // check what files we already have in the state, and try to load the missing ones
        if (state.inputDataObjects.getInputData() == null) { // need to load the input data
            state.inputDataObjects.setFileName(SOMVisualisationData.INPUT_VECTOR, inputDataFilename);
            state.inputDataObjects.readAvailableData();
        }
        this.inputData = state.inputDataObjects.getInputData();

        if (state.inputDataObjects.getClassInfo() == null) {
            if (new File(classInformationFile).exists()) {
                this.classInformationFilename = classInformationFile;
                state.inputDataObjects.setFileName(SOMVisualisationData.CLASS_INFO, classInformationFile);
                state.inputDataObjects.readAvailableData();
                if (state.inputDataObjects.getClassInfo() == null) {
                    Logger.getLogger("at.tuwien.ifs.somtoolbox.reports").warning(
                            "Could not read class information file from '" + classInformationFile
                                    + "' - generating report without class information.");
                }
                this.classInfo = state.inputDataObjects.getClassInfo();
            } else {
                Logger.getLogger("at.tuwien.ifs.somtoolbox.reports").warning(
                        "No class information file specified - generating report without class information.");
            }
        } else {
            classInfo = state.inputDataObjects.getClassInfo();
        }
        this.EP = EP;
        this.tvFilename = tvFilename;
        this.selectedIndices = selectedIndices;
        this.inputTemplate = this.inputData.templateVector();
        this.inputDataFilename = inputDataFilename;
    }

    /**
     * returns whether class information are attached to the input vectors does not check whether it is a valid file,
     * only whether a String with length > 0 has been specified as path
     * 
     * @return true if a class information file (.cls) has been specified, false otherwise
     */
    public boolean classInfoAvailable() {
        return classInfo != null;
    }

    public SOMLibClassInformation getClassInfo() {
        return classInfo;
    }

    /**
     * returns the number of input vectors used for training the SOM, that is the number of different vectors present in
     * the input file for the SOM training.
     * 
     * @return the number of input vectors that appear in the input file
     */
    public int getNumberOfInputVectors() {
        return inputData.numVectors();
    }

    /**
     * returns the mean vector of all input items belonging to the given class
     * 
     * @param classId the id of the class for which the mean vector shall be calculated
     * @return the mean vector of the class
     */
    public double[] getClassMeanVector(int classId) {

        double[] mean = new double[this.getVectorDim()];
        int n = this.classInfo.getNumberOfClassMembers(classId);
        for (int i = 0; i < mean.length; i++) {
            mean[i] = 0;
        }
        String[] ins = this.classInfo.getDataNamesInClass(this.getNameOfClass(classId));

        for (String in : ins) {
            double[] temp = this.inputData.getInputDatum(in).getVector().toArray();
            for (int c = 0; c < temp.length; c++) {
                mean[c] += temp[c] / n;
            }
        }
        return mean;
    }

    /**
     * returns the dimension of the input vectors, that is the same as the number of attributes used to describe the
     * objects.
     * 
     * @return the dimension of the input vectors
     */
    public int getVectorDim() {
        return inputData.dim();
    }

    /**
     * returns whether the values in the given dimension are all only 0 or 1
     * 
     * @param index the dimension (starting with 0) for which this property is requested
     * @return true if all input vectors contain only 0 or 1 in this dimension, false otherwise
     */
    public boolean is01(int index) {
        if (this.only01 == null) {
            this.checkDatatypes();
        }
        if (index > this.only01.length - 1) {
            return false;
        }

        return this.only01[index];
    }

    /**
     * returns whether our heuristic estimates this dimension to contain discrete values This is the case, if all values
     * in this dimension are exact integer values.
     * 
     * @param index the dimension (starting with 0) for which the estimation is requested
     * @return true if all input vectors have only plain integers as values in this dimension, false otherwise
     */
    public boolean isDiscrete(int index) {
        if (this.discrete == null) {
            this.checkDatatypes();
        }
        if (index > this.discrete.length - 1) {
            return false;
        }

        return this.discrete[index];
    }

    /**
     * returns the number of input vectors that have 0 as value in the given dimension
     * 
     * @param index the dimension (starting with 0) for which the number is requested
     * @return the number of input vectors having the value 0 in the given dimension
     */
    public int getNumberOfZeroValues(int index) {
        if (this.zeroValues == null) {
            this.checkDatatypes();
        }
        if (index > this.zeroValues.length - 1) {
            return -1;
        }

        return this.zeroValues[index];
    }

    /**
     * returns whether the input set has been normalized (in fact, this functions returns the result of
     * InputData.isNormalizedToUnitLength())
     * 
     * @return true if data iset is normalized, false if not
     */
    public boolean isNormalized() {
        return this.inputData.isNormalizedToUnitLength();
    }

    /**
     * FIXME: split this into simple single getter methods... !<br>
     * returns the requested value describing the distribution of the input values. The types of information available
     * are described by the constant members of this class (this function returns numerical properties):
     * <ul>
     * <li>min value of all input vectors(MIN_VALUE)</li>
     * <li>max value of all input vectors(MAX_VALUE)</li>
     * <li>mean value of all input vecotrs(MEAN_VALUE)</li>
     * <li>variance of the values in the input vectors(VAR_VALUE)</li>
     * <li>the number of input vectors having 0 as value(ZERO_VALUE) (is in fact int, not double)</li>
     * </ul>
     * all information are returned for the given dimension (argument attribute).
     * 
     * @param type specifies the type of information to be returned: allowed are some constants defined by this class
     *            (see above)
     * @param attribute the index of the attribute for which the value shall be returned (starting with 0)
     * @return the requested value. if the requested type is not available, -1 is returned
     */
    public double getNumericalDataProps(int type, int attribute) {

        switch (type) {

            case MIN_VALUE:
                if (this.min == null) {
                    this.checkDatatypes();
                }
                return this.min[attribute];

            case MAX_VALUE:
                if (this.max == null) {
                    this.checkDatatypes();
                }
                return this.max[attribute];

            case MEAN_VALUE:
                if (this.mean == null) {
                    this.checkDatatypes();
                }
                return this.mean[attribute];

            case VAR_VALUE:
                if (this.var == null) {
                    this.checkDatatypes();
                }
                return this.var[attribute];

            case ZERO_VALUE:
                if (this.zeroValues == null) {
                    this.checkDatatypes();
                }
                return this.zeroValues[attribute];

            default:
                Logger.getLogger("at.tuwien.ifs.somtoolbox.reports").warning(
                        "Requested unknown dataset distribution value: " + type
                                + " (in DatasetInformation.getNumericalDataProps())");
                return -1;
        }
    }

    /**
     * FIXME: split this into simple single getter methods... !<br>
     * returns the requested value describing the distribution of the input values. The types of information available
     * are described by the constant members of this class (this function returns boolean properties):
     * <ul>
     * <li>whether all values are plain integers(DISCRETE)</li>
     * <li>whether all values are either 0 or 1 (ONLY01)</li>
     * </ul>
     * all information are returned for the given dimension (argument attribute).
     * 
     * @param type specifies the type of information to be returned: allowed are some constants defined by this class
     *            (see above)
     * @param attribute the index of the attribute for which the value shall be returned (starting with 0)
     * @return the requested value. if the requested type is not available, -1 is returned
     */
    public boolean getBoolDataProps(int type, int attribute) {

        switch (type) {

            case DISCRETE:
                if (this.discrete == null) {
                    this.checkDatatypes();
                }
                return this.discrete[attribute];

            case ONLY01:
                if (this.only01 == null) {
                    this.checkDatatypes();
                }
                return this.only01[attribute];

            default:
                Logger.getLogger("at.tuwien.ifs.somtoolbox.reports").warning(
                        "Requested unknown dataset distribution value: " + type
                                + " (in DatasetInformation.getBoolDataProps())");
                return false;
        }

    }

    /**
     * returns the label (that is the name defined for an attribute in the template vector file) for the specified
     * attribute. If no template file is given, only the index of the attribute is returned.
     * 
     * @param dim the index within the vector of the attribute whose label shall be returned
     * @return the label specified in the template vector file or (if not present) the index of the attribute
     */
    public String getAttributeLabel(int dim) {
        if (this.inputTemplate != null) {
            return inputTemplate.getLabel(dim);
        } else {
            return "" + dim;
        }
    }

    /**
     * returns the number of classes. If there are no class information are attached to data, -1 is returned.
     * 
     * @return the number of classes or -1
     */
    public int getNumberOfClasses() {
        if (this.classInfo == null) {
            return -1;
        } else {
            return this.classInfo.numClasses();
        }
    }

    /**
     * returns the name of the class specified by the index
     * 
     * @param c the index of the class (starting with 0)
     * @return the name of the class specified by the index, the empty String in case of any error finding the name
     */
    public String getNameOfClass(int c) {
        if (this.classInfo == null) {
            return "";
        }
        if (this.classNames == null) {
            this.classNames = this.classInfo.classNames();
        }
        if (this.classNames.length <= c) {
            return "";
        }
        return this.classNames[c];
    }

    /**
     * returns a list of labels of all input items belonging to the given class
     * 
     * @param classId the id of the class for which the input items are requested
     * @return a list containing the lables of the input items belonging to this class
     */
    public String[] getInputLabelsofClass(int classId) {
        return this.classInfo.getDataNamesInClass(this.getNameOfClass(classId));
    }

    /**
     * returns an array of length three containing the r,g,b values of the colour used to colour the specified class
     * 
     * @param c the index of the class for which the colour is requested
     * @return an array containing the r, g and b definitions of a color
     */
    public int[] getClassColorRGB(int c) {
        int[] rgb = new int[3];

        Color[] colors = this.classInfo.getClassColors();
        if (colors.length > c) {
            rgb[0] = colors[c].getRed();
            rgb[1] = colors[c].getGreen();
            rgb[2] = colors[c].getBlue();
        } else {
            rgb[0] = 255;
            rgb[1] = 255;
            rgb[2] = 255;
        }

        return rgb;
    }

    /**
     * returns the number of input elements belonging to the given class if no class information is attached to this
     * input, -1 is returned
     * 
     * @param c the index of the class (starting with 0)
     * @return the number of elements belonging to this class, or -1
     */
    public int getNumberOfClassmembers(int c) {
        if (this.classInfo == null) {
            return -1;
        } else {
            return this.classInfo.getNumberOfClassMembers(c);
        }
    }

    /** returns the index of the class the input vector specified by its index belongs to */
    public int getClassIndexOfInput(String inputLabel) {
        return this.classInfo.getClassIndex(inputLabel);
    }

    /**
     * returns the path of the file containin the class information
     * 
     * @return path to the file containting the class information
     */
    public String getClassInformationFilename() {
        return this.classInformationFilename;
    }

    /**
     * runs over all dimensions of the input vectors and tries to fetch some information about their data ranges and
     * other properties information gathered are:
     * <ul>
     * <li>min and max value within each dimension (this.min, this.max)</li>
     * <li>does a dimension contain only 0/1 values (this.only01)</li>
     * <li>does a dimension contain only plain integer values (this.discrete)</li>
     * <li>how many 0 (=missing?) values are in each dimension (this.zeroValues)</li>
     * </ul>
     * the results are stored in the appropriate arrays
     */
    private void checkDatatypes() {

        int d = this.getVectorDim(); // dimension of input vectors
        int n = this.getNumberOfInputVectors(); // the numbers of vectors
        double temp;

        this.only01 = new boolean[d];
        this.discrete = new boolean[d];
        this.min = new double[d];
        this.max = new double[d];
        this.mean = new double[d];
        this.var = new double[d];
        this.zeroValues = new int[d];

        // check all dimensions
        for (int i = 0; i < d; i++) {
            // initialization of the arrays for this dimension
            this.only01[i] = true;
            this.discrete[i] = true;
            this.min[i] = this.inputData.getInputDatum(0).getVector().get(i);
            this.max[i] = this.inputData.getInputDatum(0).getVector().get(i);
            this.mean[i] = 0;
            this.mean[i] = 0;
            this.zeroValues[i] = 0;

            // and now for each input vector in this dimension
            for (int j = 0; j < n; j++) {

                temp = this.inputData.getInputDatum(j).getVector().get(i); // retrieve the current value

                // discrete or 01?
                if ((int) temp != temp && this.discrete[i]) {
                    // not even discrete value
                    this.only01[i] = false;
                    this.discrete[i] = false;
                } else if ((double) (int) temp % 2 != temp && this.only01[i]) {
                    // no 01 value
                    this.only01[i] = false;
                }

                // min/max values:
                if (temp < this.min[i]) {
                    this.min[i] = temp;
                }
                if (temp > this.max[i]) {
                    this.max[i] = temp;
                }

                // mean value:
                this.mean[i] += temp / n;

                // zero Values
                if (temp == 0) {
                    this.zeroValues[i]++;
                }
            }

            // now that we have the mean value for this dimension, we can calculate the variance
            for (int j = 0; j < n; j++) {
                temp = this.inputData.getInputDatum(j).getVector().get(i); // retrieve the current value
                this.var[i] += (temp - this.mean[i]) * (temp - this.mean[i]) / (n - 1);
            }
        }
    }

    /**
     * returns the InputData object storing information about the input data used for training the som. Needed by
     * objects of type TestRunResult for some analysis
     * 
     * @return the input data used to train the SOM
     */
    public InputData getInputData() {
        return this.inputData;
    }

    /** returns the InputDatum labelled with the specified name */
    public InputDatum getInputDatum(String name) {
        return this.inputData.getInputDatum(name);
    }

    /** returns the InputDatum at the specified index */
    public InputDatum getInputDatum(int d) {
        return this.inputData.getInputDatum(d);
    }

    /**
     * returns the number of inputs the user has selected to get information about their position on the SOM
     * 
     * @return the number of inputs selected by the user.
     */
    public int getNumberOfSelectedInputs() {
        return this.selectedIndices.size();
    }

    /**
     * returns the id of the inputVector at position index in the list of selected inputs each input vector is
     * identified by an id, which is its index in the complete input. The vectors selected by the user (to display their
     * position on the SOM) are also stored in a list. To retrieve the "real" id of the vector at position index in this
     * list, this function should be used
     * 
     * @param index the index of the vector in the list of selected inputs
     * @return the id of the corresponding input, that is the index in the complete input list, -1 if error
     */
    public int getSelectedInputId(int index) {
        if (index >= this.selectedIndices.size()) {
            return -1;
        }
        return this.selectedIndices.get(index).intValue();
    }

    /**
     * returns the complete filename of the file containing the input data complete filename means including the path.
     * The string is not verified to point to a valid input file (or any file at all).
     * 
     * @return the complete filename (including absolute path) of the input filename
     */
    public String getInputDataFilename() {
        return this.inputDataFilename;
    }

    /**
     * returns the complete filename of the file containing the template data complete filename means including the
     * path. The string is not verified to point to a valid template file (or any file at all).
     * 
     * @return the complete filename (including absolute path) of the template filename
     */
    public String getTemplateFilename() {
        return this.tvFilename;
    }

    /**
     * Tries to name a cluster by the input data mapped to units lying within the cluster For naming the cluster, some
     * very simple heuristics are used: First, if there are any labels of the clusters, which correpsond to 0/1
     * attributes, and their values are all 0 (or 1) in the cluster, the name of this attribute is included to the name
     * of the cluster. (attributes of 0/1 type are supposed to encode any "has this property" yes/no information,
     * thereby the value 1 is interpreted as "cluster has this property", whereas 0 is interpreted as "has not") If
     * there are any labels that don't correspond not 0/1 attributes, it is checked whether both subclusters have the
     * same value for this label. If yes, the name of this label is included to the name of the cluster If none of the
     * properties above is valid, the first nodeDepth-1 labels of the cluster suggested by the clustering algorithm is
     * used. (at least for the animal map this works quite well)
     * 
     * @param node the node representing the cluster tha shall be named
     * @param clusterByValue indicates whether the labels for the cluster shall be created by value (is handed unchanged
     *            to ClusterNode.getLabels(clusterByValue, boolen)
     * @param nodeDepth the depth of the node in the tree, whereby the root (i.e. the cluster containing the whole map)
     *            node has depth 1
     * @return the list of labels found for this cluster
     */
    public Vector<String> getClusterName(ClusterNode node, int clusterByValue, int nodeDepth) {

        Vector<String> texts = new Vector<String>();

        ClusterLabel[] labels = node.getLabels(clusterByValue, false);
        if (labels == null || labels.length == 0) {
            texts.add("no label found");
            return texts;
        }

        String curLabel;
        int[] inds = new int[labels.length];
        /*
         * Following idea: - if the data is only 0/1, we take all those where we have 0/1 as value, and call it has label and has not label -
         * otherwise we take all labels, that have the same value in both child nodes - we take at least one label for the cluster
         */

        boolean[] labeled = new boolean[labels.length];
        String[] createdLabels = new String[labels.length];
        for (int i = 0; i < labeled.length; i++) {
            labeled[i] = false;
        }

        // first check whether we have 0/1 values (indications for "has/has not" a given property
        for (int i = 0; i < labels.length; i++) {
            curLabel = labels[i].getName();
            inds[i] = -1;
            for (int j = 0; j < this.getVectorDim(); j++) {
                if (this.inputTemplate.getLabel(j).equals(curLabel)) {
                    inds[i] = j;
                    break;
                }
            }

            if (inds[i] >= 0 && this.is01(inds[i])) {
                if (labels[i].getValue() == 0) {
                    // yes - strike
                    curLabel = "has/is no/not " + curLabel;
                    labeled[i] = true;
                    createdLabels[i] = curLabel;
                } else if (labels[i].getValue() == 1) {
                    // ok - strike too
                    curLabel = "has/is " + curLabel;
                    labeled[i] = true;
                    createdLabels[i] = curLabel;
                }
            }
        }

        // then check for all labels left, whether the are the same as in both child nodes:
        ClusterNode child1 = node.getChild1();
        ClusterNode child2 = node.getChild2();

        ClusterLabel[] labels1 = child1.getLabels(clusterByValue, false);
        ClusterLabel[] labels2 = child2.getLabels(clusterByValue, false);

        for (int i = 0; i < labeled.length; i++) {
            if (labeled[i]) {
                continue; // already done
            }
            if (inds[i] > 0 && is01(inds[i])) {
                continue; // already done
            }
            if (labels1.length < i + 1 || labels2.length < i + 1) {
                break;
            }

            if (labels[i].getValue() == labels1[i].getValue() && labels[i].getValue() == labels2[i].getValue()) {
                curLabel = labels[i].getName() + " = " + String.format("%.6f", labels[i].getValue());
                labeled[i] = true;
                createdLabels[i] = curLabel;
            }
        }

        for (int i = 0; i < createdLabels.length; i++) {
            if (labeled[i]) {
                texts.add(createdLabels[i]);
            }
        }

        if (texts.size() == 0) {
            /*
             * we need some labels. the question is: which and how many. The idea is to stick to the labeling algorithm provided by the toolbox, and
             * to simlpy pick the first k ones. We make k dependent from the depth of the node, by the idea that ideally each new cluster introduced a
             * new dimension.
             */
            for (int i = 0; i < nodeDepth - 1 && i < labels.length; i++) {
                texts.add(labels[i].getName() + " = " + String.format("%.6f", labels[i].getValue()));
            }

        }
        return texts;
    }

    /**
     * This method calculates the most important Dimensions of the Dataset according to the results of a PCA, and rows
     * the resulting dim-index in a new array on first index. On index 2, the corresponding % of the TotalVariance is
     * calculated (as a quality measure)
     * 
     * @return new array with most important dims ranked decreasingly.
     */

    public double[][] getPCAdeterminedDims() {
        double[][] result_array;
        if (this.EP == null) {// If selected Number >this.getVectorDim() or if no specified value was entered, take all
            // PCAComponents
            result_array = new double[this.getVectorDim()][2];
        } else {
            if (this.EP.getMetroMapComponents() > this.getVectorDim() || this.EP.getMetroMapComponents() < 0) {
                result_array = new double[this.getVectorDim()][2];
            } else {
                result_array = new double[this.EP.getMetroMapComponents()][2];
            }
        }
        double[][] data = inputData.getData();
        PCA pca = new PCA(data);
        double BestAxisVar = Double.MAX_VALUE;
        double CurrBestAxisVar = Double.MIN_VALUE;

        int CurrBestAxisIndex = -1;
        int counter = 0;
        double temp = 0.0;

        while (counter != result_array.length) { /* Loop it result array length times and get the best Eigenvalue. */
            CurrBestAxisVar = Double.MIN_VALUE;
            for (int curAxis = 0; curAxis < this.getVectorDim(); curAxis++) {
                if (pca.info[curAxis] > CurrBestAxisVar && pca.info[curAxis] < BestAxisVar) {
                    CurrBestAxisVar = pca.info[curAxis];
                    CurrBestAxisIndex = curAxis;
                }
            }
            temp = pca.info[CurrBestAxisIndex] / this.getVectorDim();
            BestAxisVar = CurrBestAxisVar;
            result_array[counter][0] = CurrBestAxisIndex; /* save the best Dim index on index 0 */
            result_array[counter][1] = temp; /* and its corresponding Variance Value on index 1 */
            counter++;
        }
        return result_array;
    }

    /**
     * this method is just a small helper method, used to display the Dimensions in the top-part of the output document
     * It accumulates the Variances and calculates this Percentage from the total Variance
     */
    public double calculateAccumulatedVariance() {
        double perc = 0.0;
        double[][] array = this.getPCAdeterminedDims();
        for (double[] element : array) {
            perc += element[1];
        }
        return perc;
    }

    /** Returns the names of the 3 files, used for training */
    public String[] getTrainingDataInfo() {
        String[] list = new String[3];
        list[0] = new String(applyNameFix(this.inputDataFilename));
        list[1] = new String(applyNameFix(this.tvFilename));
        if (this.classInformationFilename == null) {
            list[2] = "no class information file";
        } else {
            list[2] = new String(applyNameFix(this.classInformationFilename));
        }
        ;

        return list;
    }

    /** small helper method for getTrainingDataInfo */
    private static String applyNameFix(String target) {
        int c1 = target.lastIndexOf(System.getProperty("file.separator"), target.length());
        return target.substring(c1 + 1, target.length());
    }

    /** Returns the Editable Report Properties for the Semantic Report */
    public EditableReportProperties getEP() {
        return this.EP;
    }

}