/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/**
* <p>
* @author Written by Cristobal Romero (Universidad de C�rdoba) 10/10/2007
* @version 0.1
* @since JDK 1.5
*</p>
*/
package keel.Algorithms.Decision_Trees.M5;
import java.io.*;
import java.util.*;
/**
* Class for handling an attribute. Once an attribute has been created,
* it can't be changed. <p>
*
* Three attribute types are supported:
* <ul>
* <li> numeric: <ul>
* This type of attribute represents a floating-point number.
* </ul>
* <li> nominal: <ul>
* This type of attribute represents a fixed set of nominal values.
* </ul>
* <li> string: <ul>
* This type of attribute represents a dynamically expanding set of
* nominal values. String attributes are not used by the learning
* schemes. They can be used, for example, to store an
* identifier with each instance in a dataset.
* </ul>
* </ul>
*/
public class M5Attribute implements Serializable {
/** Constant set for numeric attributes. */
public final static int NUMERIC = 0;
/** Constant set for nominal attributes. */
public final static int NOMINAL = 1;
/** Constant set for attributes with string values. */
public final static int STRING = 2;
/** Strings longer than this will be stored compressed. */
private final static int STRING_COMPRESS_THRESHOLD = 200;
/** The attribute's name. */
private String m_Name;
/** The attribute's type. */
private int m_Type;
/** The attribute's values (if nominal or string). */
private M5Vector m_Values;
/** Mapping of values to indices (if nominal or string). */
private Hashtable m_Hashtable;
/** The attribute's index. */
private int m_Index;
/**
* Constructor for a numeric attribute.
*
* @param attributeName the name for the attribute
*/
public M5Attribute(String attributeName) {
m_Name = attributeName;
m_Index = -1;
m_Values = null;
m_Hashtable = null;
m_Type = NUMERIC;
}
/**
* Constructor for nominal attributes and string attributes.
* If a null vector of attribute values is passed to the method,
* the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the
* attribute values. Null if the attribute is a string attribute.
*/
public M5Attribute(String attributeName,
M5Vector attributeValues) {
m_Name = attributeName;
m_Index = -1;
if (attributeValues == null) {
m_Values = new M5Vector();
m_Hashtable = new Hashtable();
m_Type = STRING;
} else {
m_Values = new M5Vector(attributeValues.size());
m_Hashtable = new Hashtable(attributeValues.size());
for (int i = 0; i < attributeValues.size(); i++) {
Object store = attributeValues.elementAt(i);
if (((String) store).length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(attributeValues.elementAt(
i), true);
} catch (Exception ex) {
System.err.println(
"Couldn't compress nominal attribute value -"
+ " storing uncompressed.");
}
}
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(i));
}
m_Type = NOMINAL;
}
}
/**
* Produces a shallow copy of this attribute.
*
* @return a copy of this attribute with the same index
*/
public Object copy() {
M5Attribute copy = new M5Attribute(m_Name);
copy.m_Index = m_Index;
if (!isNominal() && !isString()) {
return copy;
}
copy.m_Type = m_Type;
copy.m_Values = m_Values;
copy.m_Hashtable = m_Hashtable;
return copy;
}
/**
* Returns an enumeration of all the attribute's values if
* the attribute is nominal or a string, null otherwise.
*
* @return enumeration of all the attribute's values
*/
public final Enumeration enumerateValues() {
if (isNominal() || isString()) {
final Enumeration ee = m_Values.elements();
return new Enumeration() {
public boolean hasMoreElements() {
return ee.hasMoreElements();
}
public Object nextElement() {
Object oo = ee.nextElement();
if (oo instanceof SerializedObject) {
return ((SerializedObject) oo).getObject();
} else {
return oo;
}
}
};
}
return null;
}
/**
* Tests if given attribute is equal to this attribute.
*
* @param other the Object to be compared to this attribute
* @return true if the given attribute is equal to this attribute
*/
public final boolean equals(Object other) {
if ((other == null) || !(other.getClass().equals(this.getClass()))) {
return false;
}
M5Attribute att = (M5Attribute) other;
if (!m_Name.equals(att.m_Name)) {
return false;
}
if (isNumeric() && att.isNumeric()) {
return true;
}
if (isNumeric() || att.isNumeric()) {
return false;
}
if (m_Values.size() != att.m_Values.size()) {
return false;
}
for (int i = 0; i < m_Values.size(); i++) {
if (!m_Values.elementAt(i).equals(att.m_Values.elementAt(i))) {
return false;
}
}
return true;
}
/**
* Returns the index of this attribute.
*
* @return the index of this attribute
*/
public final int index() {
return m_Index;
}
/**
* Returns the index of a given attribute value. (The index of
* the first occurence of this value.)
*
* @param value the value for which the index is to be returned
* @return the index of the given attribute value if attribute
* is nominal or a string, -1 if it is numeric or the value
* can't be found
*/
public final int indexOfValue(String value) {
if (!isNominal() && !isString()) {
return -1;
}
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " searching uncompressed.");
}
}
Integer val = (Integer) m_Hashtable.get(store);
if (val == null) {
return -1;
} else {
return val.intValue();
}
}
/**
* Test if the attribute is nominal.
*
* @return true if the attribute is nominal
*/
public final boolean isNominal() {
return (m_Type == NOMINAL);
}
/**
* Tests if the attribute is numeric.
*
* @return true if the attribute is numeric
*/
public final boolean isNumeric() {
return (m_Type == NUMERIC);
}
/**
* Tests if the attribute is a string.
*
* @return true if the attribute is a string
*/
public final boolean isString() {
return (m_Type == STRING);
}
/**
* Returns the attribute's name.
*
* @return the attribute's name as a string
*/
public final String name() {
return m_Name;
}
/**
* Returns the number of attribute values. Returns 0 for numeric attributes.
*
* @return the number of attribute values
*/
public final int numValues() {
if (!isNominal() && !isString()) {
return 0;
} else {
return m_Values.size();
}
}
/**
* Returns a description of this attribute in ARFF format. Quotes
* strings if they contain whitespace characters, or if they
* are a question mark.
*
* @return a description of this attribute as a string
*/
public final String toString() {
StringBuffer text = new StringBuffer();
text.append("@attribute " + M5StaticUtils.quote(m_Name) + " ");
if (isNominal()) {
text.append('{');
Enumeration enuma = enumerateValues();
while (enuma.hasMoreElements()) {
text.append(M5StaticUtils.quote((String) enuma.nextElement()));
if (enuma.hasMoreElements()) {
text.append(',');
}
}
text.append('}');
} else {
if (isNumeric()) {
text.append("numeric");
} else {
text.append("string");
}
}
return text.toString();
}
/**
* Returns the attribute's type as an integer.
*
* @return the attribute's type.
*/
public final int type() {
return m_Type;
}
/**
* Returns a value of a nominal or string attribute.
* Returns an empty string if the attribute is neither
* nominal nor a string attribute.
*
* @param valIndex the value's index
* @return the attribute's value as a string
*/
public final String value(int valIndex) {
if (!isNominal() && !isString()) {
return "";
} else {
Object val = m_Values.elementAt(valIndex);
// If we're storing strings compressed, uncompress it.
if (val instanceof SerializedObject) {
val = ((SerializedObject) val).getObject();
}
return (String) val;
}
}
/**
* Constructor for a numeric attribute with a particular index.
*
* @param attributeName the name for the attribute
* @param index the attribute's index
*/
M5Attribute(String attributeName, int index) {
this(attributeName);
m_Index = index;
}
/**
* Constructor for nominal attributes and string attributes with
* a particular index.
* If a null vector of attribute values is passed to the method,
* the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the attribute values.
* Null if the attribute is a string attribute.
* @param index the attribute's index
*/
M5Attribute(String attributeName, M5Vector attributeValues,
int index) {
this(attributeName, attributeValues);
m_Index = index;
}
/**
* Adds a string value to the list of valid strings for attributes
* of type STRING and returns the index of the string.
*
* @param value The string value to add
* @return the index assigned to the string, or -1 if the attribute is not
* of type M5Attribute.STRING
*/
public int addStringValue(String value) {
if (!isString()) {
return -1;
}
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " storing uncompressed.");
}
}
Integer index = (Integer) m_Hashtable.get(store);
if (index != null) {
return index.intValue();
} else {
int intIndex = m_Values.size();
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(intIndex));
return intIndex;
}
}
/**
* Adds a string value to the list of valid strings for attributes
* of type STRING and returns the index of the string. This method is
* more efficient than addStringValue(String) for long strings.
*
* @param src The Attribute containing the string value to add.
* @param index the index of the string value in the source attribute.
* @return the index assigned to the string, or -1 if the attribute is not
* of type M5Attribute.STRING
*/
public int addStringValue(M5Attribute src, int index) {
if (!isString()) {
return -1;
}
Object store = src.m_Values.elementAt(index);
Integer oldIndex = (Integer) m_Hashtable.get(store);
if (oldIndex != null) {
return oldIndex.intValue();
} else {
int intIndex = m_Values.size();
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(intIndex));
return intIndex;
}
}
/**
* Adds an attribute value. Creates a fresh list of attribute
* values before adding it.
*
* @param value the attribute value
*/
final void addValue(String value) {
m_Values = (M5Vector) m_Values.copy();
m_Hashtable = (Hashtable) m_Hashtable.clone();
forceAddValue(value);
}
/**
* Produces a shallow copy of this attribute with a new name.
*
* @param newName the name of the new attribute
* @return a copy of this attribute with the same index
*/
final M5Attribute copy(String newName) {
M5Attribute copy = new M5Attribute(newName);
copy.m_Index = m_Index;
if (!isNominal() && !isString()) {
return copy;
}
copy.m_Type = m_Type;
copy.m_Values = m_Values;
copy.m_Hashtable = m_Hashtable;
return copy;
}
/**
* Removes a value of a nominal or string attribute. Creates a
* fresh list of attribute values before removing it.
*
* @param index the value's index
* @exception IllegalArgumentException if the attribute is not nominal
*/
final void delete(int index) {
if (!isNominal() && !isString()) {
throw new IllegalArgumentException("Can only remove value of" +
"nominal or string attribute!");
} else {
m_Values = (M5Vector) m_Values.copy();
m_Values.removeElementAt(index);
Hashtable hash = new Hashtable(m_Hashtable.size());
Enumeration enuma = m_Hashtable.keys();
while (enuma.hasMoreElements()) {
Object string = enuma.nextElement();
Integer valIndexObject = (Integer) m_Hashtable.get(string);
int valIndex = valIndexObject.intValue();
if (valIndex > index) {
hash.put(string, new Integer(valIndex - 1));
} else if (valIndex < index) {
hash.put(string, valIndexObject);
}
}
m_Hashtable = hash;
}
}
/**
* Adds an attribute value.
*
* @param value the attribute value
*/
final void forceAddValue(String value) {
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " storing uncompressed.");
}
}
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(m_Values.size() - 1));
}
/**
* Sets the index of this attribute.
*
* @param the index of this attribute
*/
final void setIndex(int index) {
m_Index = index;
}
/**
* Sets a value of a nominal attribute or string attribute.
* Creates a fresh list of attribute values before it is set.
*
* @param index the value's index
* @param string the value
* @exception IllegalArgumentException if the attribute is not nominal or
* string.
*/
final void setValue(int index, String string) {
if (!isNominal() && !isString()) {
throw new IllegalArgumentException("Can only set value of nominal" +
"or string attribute!");
} else {
m_Values = (M5Vector) m_Values.copy();
m_Hashtable = (Hashtable) m_Hashtable.clone();
Object store = string;
if (string.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(string, true);
} catch (Exception ex) {
System.err.println(
"Couldn't compress string attribute value -"
+ " storing uncompressed.");
}
}
m_Hashtable.remove(m_Values.elementAt(index));
m_Values.setElementAt(store, index);
m_Hashtable.put(store, new Integer(index));
}
}
/**
* Simple main method for testing this class.
*/
public static void main(String[] ops) {
try {
// Create numeric attributes "length" and "weight"
M5Attribute length = new M5Attribute("length");
M5Attribute weight = new M5Attribute("weight");
// Create vector to hold nominal values "first", "second", "third"
M5Vector my_nominal_values = new M5Vector(3);
my_nominal_values.addElement("first");
my_nominal_values.addElement("second");
my_nominal_values.addElement("third");
// Create nominal attribute "position"
M5Attribute position = new M5Attribute("position",
my_nominal_values);
// Print the name of "position"
System.out.println("Name of \"position\": " + position.name());
// Print the values of "position"
Enumeration attValues = position.enumerateValues();
while (attValues.hasMoreElements()) {
String string = (String) attValues.nextElement();
System.out.println("Value of \"position\": " + string);
}
// Shallow copy attribute "position"
M5Attribute copy = (M5Attribute) position.copy();
// Test if attributes are the same
System.out.println("Copy is the same as original: " +
copy.equals(position));
// Print index of attribute "weight" (should be unset: -1)
System.out.println("Index of attribute \"weight\" (should be -1): " +
weight.index());
// Print index of value "first" of attribute "position"
System.out.println(
"Index of value \"first\" of \"position\" (should be 0): " +
position.indexOfValue("first"));
// Tests type of attribute "position"
System.out.println("\"position\" is numeric: " + position.isNumeric());
System.out.println("\"position\" is nominal: " + position.isNominal());
System.out.println("\"position\" is string: " + position.isString());
// Prints name of attribute "position"
System.out.println("Name of \"position\": " + position.name());
// Prints number of values of attribute "position"
System.out.println("Number of values for \"position\": " +
position.numValues());
// Prints the values (againg)
for (int i = 0; i < position.numValues(); i++) {
System.out.println("Value " + i + ": " + position.value(i));
}
// Prints the attribute "position" in ARFF format
System.out.println(position);
// Checks type of attribute "position" using constants
switch (position.type()) {
case M5Attribute.NUMERIC:
System.out.println("\"position\" is numeric");
break;
case M5Attribute.NOMINAL:
System.out.println("\"position\" is nominal");
break;
case M5Attribute.STRING:
System.out.println("\"position\" is string");
break;
default:
System.out.println("\"position\" has unknown type");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}