/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* HyperPipes.java
* Copyright (C) 2002 University of Waikato
*
*/
package weka.classifiers.misc;
import weka.classifiers.Evaluation;
import weka.classifiers.Classifier;
import weka.classifiers.DistributionClassifier;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.UnsupportedClassTypeException;
import java.io.*;
/**
* Class implementing a HyperPipe classifier. For each category a
* HyperPipe is constructed that contains all points of that category
* (essentially records the attribute bounds observed for each category).
* Test instances are classified according to the category that most
* contains the instance).
* Does not handle numeric class, or missing values in test cases. Extremely
* simple algorithm, but has the advantage of being extremely fast, and
* works quite well when you have smegloads of attributes.
*
* @author Lucio de Souza Coelho (lucio@intelligenesis.net)
* @author Len Trigg (len@reeltwo.com)
* @version $Revision: 1.1.1.1 $
*/
public class HyperPipes extends DistributionClassifier {
/** The index of the class attribute */
protected int m_ClassIndex;
/** The structure of the training data */
protected Instances m_Instances;
/** Stores the HyperPipe for each class */
protected HyperPipe [] m_HyperPipes;
/**
* Represents an n-dimensional structure that bounds all instances
* passed to it (generally all of a given class value).
*/
class HyperPipe implements Serializable {
/** Contains the numeric bounds of all instances in the HyperPipe */
protected double [][] m_NumericBounds;
/** Contains the nominal bounds of all instances in the HyperPipe */
protected boolean [][] m_NominalBounds;
/**
* Creates the HyperPipe as the n-dimensional parallel-piped
* with minimum volume containing all the points in
* pointSet.
*
* @param instances all instances belonging to the same class
* @exception Exception if missing values are found
*/
public HyperPipe(Instances instances) throws Exception {
m_NumericBounds = new double [instances.numAttributes()][];
m_NominalBounds = new boolean [instances.numAttributes()][];
for (int i = 0; i < instances.numAttributes(); i++) {
switch (instances.attribute(i).type()) {
case Attribute.NUMERIC:
m_NumericBounds[i] = new double [2];
m_NumericBounds[i][0] = Double.POSITIVE_INFINITY;
m_NumericBounds[i][1] = Double.NEGATIVE_INFINITY;
break;
case Attribute.NOMINAL:
m_NominalBounds[i] = new boolean [instances.attribute(i).numValues()];
break;
default:
throw new UnsupportedAttributeTypeException("Cannot process string attributes!");
}
}
for (int i = 0; i < instances.numInstances(); i++) {
addInstance(instances.instance(i));
}
}
/**
* Updates the bounds arrays with a single instance. Missing values
* are ignored (i.e. they don't change the bounds for that attribute)
*
* @param instance the instance
* @exception Exception if any missing values are encountered
*/
public void addInstance(Instance instance) throws Exception {
for (int j = 0; j < instance.numAttributes(); j++) {
if ((j != m_ClassIndex) && (!instance.isMissing(j))) {
double current = instance.value(j);
if (m_NumericBounds[j] != null) { // i.e. a numeric attribute
if (current < m_NumericBounds[j][0])
m_NumericBounds[j][0] = current;
if (current > m_NumericBounds[j][1])
m_NumericBounds[j][1] = current;
} else { // i.e. a nominal attribute
m_NominalBounds[j][(int) current] = true;
}
}
}
}
/**
* Returns the fraction of the dimensions of a given instance with
* values lying within the corresponding bounds of the HyperPipe.
*
* @param instance the instance
* @exception Exception if any missing values are encountered
*/
public double partialContains(Instance instance) throws Exception {
int count = 0;
for (int i = 0; i < instance.numAttributes(); i++) {
if (i == m_ClassIndex) {
continue;
}
if (instance.isMissing(i)) {
continue;
}
double current = instance.value(i);
if (m_NumericBounds[i] != null) { // i.e. a numeric attribute
if ((current >= m_NumericBounds[i][0])
&& (current <= m_NumericBounds[i][1])) {
count++;
}
} else { // i.e. a nominal attribute
if (m_NominalBounds[i][(int) current]) {
count++;
}
}
}
return ((double)count) / (instance.numAttributes() - 1);
}
}
/**
* Generates the classifier.
*
* @param instances set of instances serving as training data
* @exception Exception if the classifier has not been generated successfully
*/
public void buildClassifier(Instances instances) throws Exception {
if (instances.classIndex() == -1) {
throw new Exception("No class attribute assigned");
}
if (!instances.classAttribute().isNominal()) {
throw new UnsupportedClassTypeException("HyperPipes: class attribute needs to be nominal!");
}
m_ClassIndex = instances.classIndex();
m_Instances = new Instances(instances, 0); // Copy the structure for ref
// Create the HyperPipe for each class
m_HyperPipes = new HyperPipe [instances.numClasses()];
for (int i = 0; i < m_HyperPipes.length; i++) {
m_HyperPipes[i] = new HyperPipe(new Instances(instances, 0));
}
// Add the instances
for (int i = 0; i < instances.numInstances(); i++) {
updateClassifier(instances.instance(i));
}
}
/**
* Updates the classifier.
*
* @param instance the instance to be put into the classifier
* @exception Exception if the instance could not be included successfully
*/
public void updateClassifier(Instance instance) throws Exception {
if (instance.classIsMissing()) {
return;
}
m_HyperPipes[(int) instance.classValue()].addInstance(instance);
}
/**
* Classifies the given test instance.
*
* @param instance the instance to be classified
* @return the predicted class for the instance
* @exception Exception if the instance can't be classified
*/
public double [] distributionForInstance(Instance instance) throws Exception {
double [] dist = new double[m_HyperPipes.length];
for (int j = 0; j < m_HyperPipes.length; j++) {
dist[j] = m_HyperPipes[j].partialContains(instance);
}
Utils.normalize(dist);
return dist;
}
/**
* Returns a description of this classifier.
*
* @return a description of this classifier as a string.
*/
public String toString() {
if (m_HyperPipes == null) {
return ("HyperPipes classifier");
}
StringBuffer text = new StringBuffer("HyperPipes classifier\n");
/* Perhaps print out the bounds for each HyperPipe.
for (int i = 0; i < m_HyperPipes.length; i++) {
text.append("HyperPipe for class: "
+ m_Instances.attribute(m_ClassIndex).value(i) + "\n");
text.append(m_HyperPipes[i] + "\n\n");
}
*/
return text.toString();
}
/**
* Main method for testing this class.
*
* @param argv should contain command line arguments for evaluation
* (see Evaluation).
*/
public static void main(String [] argv) {
try {
System.out.println(Evaluation.evaluateModel(new HyperPipes(), argv));
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
}