/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Cristobal Romero (Universidad de C�rdoba) 10/10/2007
* @version 0.1
* @since JDK 1.5
*</p>
*/
package keel.Algorithms.Decision_Trees.M5;
/**
* A Utility class that contains summary information on an
* the values that appear in a dataset for a particular attribute.
*/
public class M5AttrStats {
/** The number of int-like values */
public int intCount = 0;
/** The number of real-like values (i.e. have a fractional part) */
public int realCount = 0;
/** The number of missing values */
public int missingCount = 0;
/** The number of distinct values */
public int distinctCount = 0;
/** The number of values that only appear once */
public int uniqueCount = 0;
/** The total number of values (i.e. number of instances) */
public int totalCount = 0;
/** Stats on numeric value distributions */
public SimpleStatistics numericStats;
/** Counts of each nominal value */
public int[] nominalCounts;
/**
* Updates the counters for one more observed distinct value.
*
* @param value the value that has just been seen
* @param count the number of times the value appeared
*/
protected void addDistinct(double value, int count) {
if (count > 0) {
if (count == 1) {
uniqueCount++;
}
if (M5StaticUtils.eq(value, (double) ((int) value))) {
intCount += count;
} else {
realCount += count;
}
if (nominalCounts != null) {
nominalCounts[(int) value] = count;
}
if (numericStats != null) {
numericStats.add(value, count);
numericStats.calculateDerived();
}
}
distinctCount++;
}
/**
* Returns a human readable representation of this AttributeStats instance.
*
* @return a String represtinging these AttributeStats.
*/
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(M5StaticUtils.padLeft("Type", 4)).append(M5StaticUtils.
padLeft("Nom", 5));
sb.append(M5StaticUtils.padLeft("Int",
5)).append(M5StaticUtils.padLeft("Real", 5));
sb.append(M5StaticUtils.padLeft("Missing", 12));
sb.append(M5StaticUtils.padLeft("Unique", 12));
sb.append(M5StaticUtils.padLeft("Dist", 6));
if (nominalCounts != null) {
sb.append(' ');
for (int i = 0; i < nominalCounts.length; i++) {
sb.append(M5StaticUtils.padLeft("C[" + i + "]", 5));
}
}
sb.append('\n');
long percent;
percent = Math.round(100.0 * intCount / totalCount);
if (nominalCounts != null) {
sb.append(M5StaticUtils.padLeft("Nom", 4)).append(' ');
sb.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
sb.append(M5StaticUtils.padLeft("" + 0, 3)).append("% ");
} else {
sb.append(M5StaticUtils.padLeft("Num", 4)).append(' ');
sb.append(M5StaticUtils.padLeft("" + 0, 3)).append("% ");
sb.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
}
percent = Math.round(100.0 * realCount / totalCount);
sb.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
sb.append(M5StaticUtils.padLeft("" + missingCount, 5)).append(" /");
percent = Math.round(100.0 * missingCount / totalCount);
sb.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
sb.append(M5StaticUtils.padLeft("" + uniqueCount, 5)).append(" /");
percent = Math.round(100.0 * uniqueCount / totalCount);
sb.append(M5StaticUtils.padLeft("" + percent, 3)).append("% ");
sb.append(M5StaticUtils.padLeft("" + distinctCount, 5)).append(' ');
if (nominalCounts != null) {
for (int i = 0; i < nominalCounts.length; i++) {
sb.append(M5StaticUtils.padLeft("" + nominalCounts[i], 5));
}
}
sb.append('\n');
return sb.toString();
}
}