/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/*
* Attribute.java
*
*/
package keel.Dataset;
import java.util.*;
/**
* <p>
* <b> Attribute </b>
* </p>
* It contains an attribute representation. The class attributes are enough to
* descrive completly an attribute: name, type, possible values, minimums and
* maximums, etc. It offers a collection of functions to get all this information.
*
* @author Albert Orriols Puig
* @version keel0.1
*/
public class Attribute{
/////////////////////////////////////////////////////////////////////////////
//////////////// CONSTANTS OF THE ATTRIBUTE CLASS ///////////////////////////
/////////////////////////////////////////////////////////////////////////////
/**
* Label for NOMINAL values.
*/
public final static int NOMINAL = 0;
/**
* Label for INTEGER values.
*/
public final static int INTEGER = 1;
/**
* Label for REAL VALUES
*/
public final static int REAL = 2;
/**
* Label to identify INPUT attributes
*/
public final static int INPUT = 1;
/**
* Label to identify OUTPUT attributes
*/
public final static int OUTPUT = 2;
/**
* Label to identify attributes that hasn't been defined neither as input or output
*/
public final static int DIR_NOT_DEF = -1;
/////////////////////////////////////////////////////////////////////////////
/////////////// ATTRIBUTES OF THE ATTRIBUTE CLASS ///////////////////////////
/////////////////////////////////////////////////////////////////////////////
/**
* It indicates if the attribute is an input (0), an output (1) or has not been
* defined neither as input or output (-1)
*/
private int dirAttribute;
/**
* It keeps the type of the attribute. It can be one of the following values:
* [Attribute.Nominal, Attribute.Integer, Attribute.Real]
*/
private int type;
/**
* It stores the name of the attribute.
*/
private String name;
/**
* Vector where all the values that can take this nominal attribute are going
* to be stored.
*/
private Vector nominalValues;
/**
* Minimum value that can take a real attribute.
*/
private double min;
/**
* Maximum value that can take a real attribute.
*/
private double max;
/**
* Flag that indicates if it's the first time that an operation is made
* with the current attribute.
*/
private boolean firstTime;
/**
* It indicates if the bounds of the attribute has been fixed in its definition.
*/
private boolean fixedBounds;
/**
* It counts the number of values that can take a nominal attribute
*/
private int countValues;
/**
* It informs that a nominal value not compresed in train list values has been
* read in test
*/
private boolean newValuesInTest;
/**
* It keeps the new values in test
*/
private Vector newValuesList;
/**
* It keeps the frequency of each class value
*/
private int [][] classFrequencies;
/**
* It stores the most used value in a nominal attribute
*/
private String [] mostUsedValue;
/**
* It stores the integer/real mean for this attribute
*/
private double [] meanValue;
/**
* It keeps the number of updates per class
*/
private int [] numStatUpdates;
/**
* It says if statistics has to be made
*/
private boolean makeStatistics;
/////////////////////////////////////////////////////////////////////////////
///////////////// METHODS OF THE ATTRIBUTE CLASS ////////////////////////////
/////////////////////////////////////////////////////////////////////////////
/**
* Attribute Constructor. It instances a new Attribute instance.
*/
public Attribute() {
type=-1;
countValues=0;
dirAttribute = DIR_NOT_DEF;
makeStatistics = false;
}//end Attribute
/**
* It sets the attribute type.
*/
public void setType(int _type) {
if(type!=-1) {
System.err.println("Type already fixed !!");
System.exit(1);
}
type=_type;
firstTime=true;
//If type is nominal, a new vector has to be created to store the list of
//values that it can take.
if(type==NOMINAL) {
nominalValues=new Vector();
newValuesList = new Vector();
}
//In all cases, the fixedBounds flag is set to false.
fixedBounds=false;
}//end setType
/**
* It does return the type of the attribute
* @return an int that contains the type of the attribute.
*/
public int getType() {
return type;
}//end getType
/**
* It sets the attribute name
* @param _name is the name to be set.
*/
public void setName(String _name) {
name = _name;
}//end setName
/**
* It gets the attribute name
* @return a String with the attribute name.
*/
public String getName() {
return name;
}//end setName
/**
* It sets the bound of the integer or real attribute.
* @param _min is the minimum value that the attribute can take.
* @param _max is the maximum value that the attribute can take.
*/
public void setBounds(double _min,double _max) {
if(type != REAL && type != INTEGER) return;
fixedBounds=true;
min=_min;
max=_max;
}//end setBounds
/**
* It returns the variable fixedBounds.
* @return a boolean that indicates if the bounds are fixed.
*/
public boolean getFixedBounds(){
return fixedBounds;
}//end getFixedBounds
/**
* It sets the fixedBounds value
* @param fBounds is the value that has to be fixed to fixedBounds.
*/
public void setFixedBounds(boolean fBounds){
fixedBounds = fBounds;
}//end setFixedBounds
/**
* It does enlarge the attribute bounds
* @param value is the value read from the BD file
*/
public void enlargeBounds(double value) {
if(type!=REAL && type!=INTEGER) return;
if(firstTime) {
//If it's the first attribute update and the bounds are not fixed in its
//specification, the min and max values are initialized.
if(!fixedBounds) {
min=value;
max=value;
}
firstTime=false;
}
//valueMeans[instanceClass]+=value;
countValues++;
if(fixedBounds) return;
if(value<min) min=value;
if(value>max) max=value;
}//end enlargeBounds
/**
* It update an integer or real value read for an attribute in the test
* set if it doesn't match with the bounds defined in the train set. In
* this case, it replaces the value read for the nearliest bound (the
* minimum or the maximim bound respectively)
* @param value is the value read from the test file.
* @return a double with the rectified value.
*/
public double rectifyValueInBounds (double value){
if (value < min) return min;
if (value > max) return max;
return value;
}//end rectifyValueInBounds
/**
* It does check if the value passed as an argument is bounded by
* the [min, max] interval.
* @return a boolean that indicates if the value is bounded.
*/
public boolean isInBounds(double val){
return (val>=min && val<=max);
}//end isInBounds
/**
* It returns if the value passed is in the list of nominal values
* @param val is the value to be checked.
* @return a boolean indicating if the value is a possible nominal.
*/
public boolean isNominalValue(String val){
return nominalValues.contains(val);
}//end isNominalValue
/**
* It returns the minimum possible value in a integer or real attribute
* @return a double with the minimum value
*/
public double getMinAttribute() {
return min;
}//end minAttribute
/**
* It returns the maximum possible value in a integer or real attribute
* @return a double with the maximum value
*/
public double getMaxAttribute() {
return max;
}//end maxAttribute
/**
* This method add a new value to the list of possible values in a nominal
* attribute.
* @param value is the new value to be added.
*/
public void addNominalValue(String value) {
if(type!=NOMINAL) return;
if (!nominalValues.contains(value)){
nominalValues.addElement(new String(value));
}
}//end addNominalValue
/**
* It does return the value most frequent for the class
* @param whichClass is the class which is wanted to know the most
* frequent value.
* @return a String with the most used value.
*/
public String getMostFrequentValue(int whichClass){
if (!makeStatistics || type != NOMINAL || mostUsedValue == null) return null;
if (whichClass <0 || whichClass >= mostUsedValue.length) return null;
return mostUsedValue[whichClass];
}//end getMostFrequentValue
/**
* Does return the mean value for that attribute.
* @param whichClass is the integer value for the class
* @return a double with the mean value.
*/
public double getMeanValue(int whichClass){
if (!makeStatistics || (type != REAL && type!=INTEGER) || meanValue == null) return 0;
if(whichClass<0 || whichClass >= meanValue.length) return 0;
return meanValue[whichClass];
}//end getMeanValue
/**
* It does initializes the variables to make statistics
* @param classNumber is the number of classes.
*/
void initStatistics(int classNumber){
makeStatistics = true;
if (type == NOMINAL){
classFrequencies = new int [classNumber][];
numStatUpdates = new int[classNumber];
for (int i=0; i<classNumber; i++){
numStatUpdates[i] = 0;
classFrequencies[i] = new int[nominalValues.size()];
for (int j=0; j<nominalValues.size(); j++)
classFrequencies[i][j] = 0;
}
}
else{
meanValue = new double [classNumber];
numStatUpdates = new int[classNumber];
for (int i=0; i<classNumber; i++){
meanValue[i] = 0;
numStatUpdates[i] = 0;
}
}
}//end initStatistics
/**
* It does finish the statistics process.
*/
void finishStatistics(){
if (!makeStatistics) return;
if (type == NOMINAL){
mostUsedValue = new String [classFrequencies.length];
for (int i=0; i<mostUsedValue.length; i++){
int max = classFrequencies[i][0];
int pos = 0;
for (int j=1; j<classFrequencies[i].length; j++){
if (classFrequencies[i][j] > max){
max = classFrequencies[i][j];
pos = j;
}
}
mostUsedValue[i] = (String)nominalValues.elementAt(pos);
}
}
else{
for (int i=0; i<meanValue.length; i++){
meanValue[i] /= (double)numStatUpdates[i];
}
}
}//end finishStatistics
/**
* It does increment the frequency that a value of a class has been used.
* It's called when a new value is read.
* @param whichClass is the class which frequency has to be increased
* @param value is the nominal value which frequency has to be increased.
*/
void increaseClassFrequency(int whichClass, String value){
if (makeStatistics && classFrequencies != null &&
classFrequencies[whichClass] != null &&
classFrequencies[whichClass] != null){
classFrequencies[whichClass] [convertNominalValue(value)]++;
numStatUpdates[whichClass]++;
}
}//end increaseClassFrequency
/**
* It adds the new value to the mean values vector
* @param whichClass is the class where to add the new value
* @param value is the value to be added.
*/
public void addInMeanValue(int whichClass, double value){
if (makeStatistics){
numStatUpdates [whichClass]++;
meanValue[whichClass] += value;
}
}//en addInMeanValue
/**
* Adds a new value for a nominal that has been read in the test file.
* @param value is the new value to be added.
* @return a boolean indicating if value didn't exist in the list.
*/
public boolean addTestNominalValue(String value){
if (type != NOMINAL) return false;
if (!nominalValues.contains(value)){
nominalValues.addElement(new String(value));
newValuesList.addElement(new String(value));
newValuesInTest = true;
return true;
}
return false;
}//end addTestNominalValue
/**
* It returns a vector with all new nominal values read in test.
* @return a Vector with all new nominal values.
*/
public Vector getNewValuesInTest(){
return newValuesList;
}//end newValuesList
/**
* It returns true if in test have appeared new values.
* @return a boolean indicating if new values have been read in test.
*/
public boolean areNewNominalValuesInTest(){
return newValuesInTest;
}//return areNewValuesInTest
/**
* It returns the number of different values that can take a nominal attribute.
* @return an int with the number of different values that can take a nominal
* attribute.
*/
public int getNumNominalValues() {
if(type!=NOMINAL) return -1;
return nominalValues.size();
}//end getNumNominalValues
/**
* Returns all the possible nominal values
* @return a Vector with the possible values that the nominal can take
*/
public Vector getNominalValuesList(){
return nominalValues;
}//end getNominalValuesList
/**
* It returns de ith value of that nominal attribute
* @param pos indicate which attribute value is wanted.
* @return a string with the value.
*/
public String getNominalValue(int pos) {
if(type!=NOMINAL) return null;
return (String)nominalValues.elementAt(pos);
}//end getNominalValue
/**
* It converts a nominal value to a integer
* @param value is the value that is wanted to be converted
* @return an int with the converted value.
*/
public int convertNominalValue(String value) {
return nominalValues.indexOf(value);
}//end convertNominalValue
/**
* It compares two attributes.
* @param attr is the second attribute of the comparation.
* @return a boolean that indicates if the attributes are equal.
*/
public boolean equals(Attribute attr) {
if(!name.equals(attr.name)) return false;
if(attr.type!=type) return false;
if(type==NOMINAL) {
if(!nominalValues.equals(attr.nominalValues))
return false;
}
return true;
}//end equals
/**
* It sets if the attribute is an input or an output attribute
* @param _dirAtt is the direction (input/output) of the attribute.
*/
public void setDirectionAttribute(int _dirAtt){
dirAttribute = _dirAtt;
}//end setInputAttribute
/**
* It returns if the attribute is an input attribute
* @return a int that indicates if it's an input or output attribute
*/
public int getDirectionAttribute(){
return dirAttribute;
}//end getDirectionAttribute
/**
* It does normalize a value.
* @param val is the value to be normalized.
* @return a double with the normalized value.
*/
public double normalizeValue (double val){
if (type == NOMINAL) return val;
if (type == INTEGER) return val-min;
if (type == REAL) return (val-min)/(max-min);
return val;
}//end normalizeValue
/**
* It returns a String with the attribute information in keel format
* @return an String with the attribute information.
*/
public String toString(){
String []typeNames = {"","integer","real"};
String aux = "@attribute " + name;
switch (type){
case NOMINAL:
aux += "{";
String ending = ",";
for (int i=0; i<nominalValues.size(); i++){
if (i == nominalValues.size() - 1) ending = "";
aux += (String)nominalValues.elementAt(i) + ending;
}
aux +='}';
//System.out.println("Caso NOMINAL, aux->"+aux);
//System.out.println("name->" + name);
break;
case INTEGER:
aux += " integer["+(new Integer ((int)min)).toString();
aux += ","+ (new Integer ((int)max)).toString()+"]";
break;
case REAL:
aux += " real["+(new Double (min)).toString();
aux += ","+ (new Double (max)).toString()+"]";
break;
}
return aux;
}//end toString
/**
* This method prints the attribute information.
*/
public void print(){
String [] typesConv = {"Nominal","Integer","Real"};
System.out.println(" > Name: "+name+".");
System.out.println(" > Type: "+type );
System.out.println(" > Type: "+typesConv[type]+".");
System.out.print (" > Input/Output: ");
switch (dirAttribute){
case INPUT:
System.out.println("INPUT");
break;
case OUTPUT:
System.out.println("OUTPUT");
break;
default:
System.out.println("NOT DEFINED");
}
System.out.print(" > Range: ");
switch (type){
case NOMINAL:
System.out.print("{");
for (int i=0; i<nominalValues.size(); i++){
System.out.print ((String)nominalValues.elementAt(i)+" ");
}
System.out.print("}");
break;
case INTEGER:
System.out.print("["+(int)min+","+(int)max+"]");
break;
default:
System.out.print("["+min+","+max+"]");
}
if (type == NOMINAL){
if (mostUsedValue != null){
System.out.println("\n > Most used value: ");
for (int i=0; i<mostUsedValue.length; i++){
System.out.print(" > class "+i+":"+mostUsedValue[i]);
System.out.println(" ("+classFrequencies[i][convertNominalValue(mostUsedValue[i])]+")." );
}
}
}else if (meanValue != null){
System.out.println("\n > Mean used value: ");
for (int i=0; i<meanValue.length; i++){
System.out.println(" > class "+i+": "+meanValue[i]);
}
}
System.out.println();
}//end print
}//end of class Attribute