/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Clusterer.java
* Copyright (C) 1999 Mark Hall
*
*/
package weka.clusterers;
import java.io.Serializable;
import java.util.*;
import weka.core.*;
import weka.core.metrics.*;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.clusterers.initializers.*;
import weka.clusterers.ClusterEvaluation;
/**
* Abstract clusterer.
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public abstract class Clusterer implements Cloneable, Serializable,OptionHandler{
public Instances [] instanceses;
public int K=0;
public Instances instances;
public ArrayList [] clusters;
public int [] assignments;
public Metric metric=new Euclidean();
public Instances centroids=new Instances("Centroids");
public Initializer initializer=new RandomInitializer();
public Filter filter=new ReplaceMissingValues();
// ===============
// Public methods.
// ===============
/**
* Generates a clusterer. Has to initialize all fields of the clusterer
* that are not being set via options.
*
* @param data set of instances serving as training data
* @exception Exception if the clusterer has not been
* generated successfully
*/
public void buildClusterer(Instances instances)throws Exception{
if(filter instanceof Filter){
filter.setInputFormat(instances);
instances=Filter.useFilter(instances,filter);
}
this.instances=instances;
metric.build(instances);
if(centroids.numInstances()==0){
initializer.setClusterer(this);
centroids=initializer.initialize();
}
instanceses=new Instances[K];
clusters=new ArrayList[K];
assignments=new int[instances.numInstances()];
int loop=0;
while(true){
System.out.println("===***=== "+loop+" ===***===");
if(clusterInstances(instances))break;
clusterCentroids();
for(int i=0;i<K;i++){
System.out.print(instanceses[i].numInstances()+"\t");
}
System.out.println();
loop++;
}
}
public void buildClusterer()throws Exception{
buildClusterer(instances);
}
public String evaluate()throws Exception{
return ClusterEvaluation.evaluateClusterer(this);
}
/**
* Classifies a given instance.
*
* @param instance the instance to be assigned to a cluster
* @return the number of the assigned cluster as an interger
* if the class is enumerated, otherwise the predicted value
* @exception Exception if instance could not be classified
* successfully
*/
public int clusterInstance(Instance instance)throws Exception{
double min=Integer.MAX_VALUE;
int assignment=0;
for(int i=0;i<K;i++){
double d=metric.distance(instance,centroids.instance(i));
if(d<min){
min=d;
assignment=i;
}
}
return assignment;
}
public boolean clusterInstances()throws Exception{
return clusterInstances(instances);
}
public boolean clusterInstances(Instances instances)throws Exception{
for(int i=0;i<K;i++){
instanceses[i]=new Instances(instances,0);
clusters[i]=new ArrayList();
}
boolean done=true;
for(int i=0;i<instances.numInstances();i++){
Instance instance=instances.instance(i);
int assignment=clusterInstance(instance);
instanceses[assignment].add(instance);
clusters[assignment].add(new Integer(i));
if(assignment!=assignments[i]){
done=false;
assignments[i]=assignment;
}
}
return done;
}
public void clusterCentroids(){
centroids=new Instances(instances,0);
for(int i=0;i<K;i++)centroids.add(instanceses[i].meanOrMode());
}
/**
* Returns the number of clusters.
*
* @return the number of clusters generated for a training dataset.
* @exception Exception if number of clusters could not be returned
* successfully
*/
public int numberOfClusters() throws Exception{
return K;
}
public double [] getAssignments(){
double[] array=new double[assignments.length];
for(int i=0;i<assignments.length;i++)array[i]=assignments[i];
return array;
}
/**
* Creates a new instance of a clusterer given it's class name and
* (optional) arguments to pass to it's setOptions method. If the
* clusterer implements OptionHandler and the options parameter is
* non-null, the clusterer will have it's options set.
*
* @param searchName the fully qualified class name of the clusterer
* @param options an array of options suitable for passing to setOptions. May
* be null.
* @return the newly created search object, ready for use.
* @exception Exception if the clusterer class name is invalid, or the
* options supplied are not acceptable to the clusterer.
*/
public static Clusterer forName(String clustererName,String[] options)throws Exception{
return (Clusterer)Utils.forName(Clusterer.class,clustererName,options);
}
/**
* Creates copies of the current clusterer. Note that this method
* now uses Serialization to perform a deep copy, so the Clusterer
* object must be fully Serializable. Any currently built model will
* now be copied as well.
*
* @param model an example clusterer to copy
* @param num the number of clusterer copies to create.
* @return an array of clusterers.
* @exception Exception if an error occurs
*/
public static Clusterer[] makeCopies(Clusterer model,int num)throws Exception{
if(model==null)throw new Exception("No model clusterer set");
Clusterer[] clusterers=new Clusterer[num];
SerializedObject so=new SerializedObject(model);
for(int i=0;i<clusterers.length;i++){
clusterers[i]=(Clusterer)so.getObject();
}
return clusterers;
}
public Enumeration listOptions(){
Vector vector=new Vector(3);
vector.addElement(new Option("\tnumber of clusters.","N",1,"-N <num>"));
vector.addElement(new Option("\tmetric.\tdefault=weka.core.metrics.Euclidean","M",1,"-M <metric class>"));
vector.addElement(new Option("\tinitializer.\tdefault=weka.clusters.initializers.RandomInitializer","I",1,"-I <initializer class>"));
return vector.elements();
}
public void setOptions(String [] options) throws Exception{
String string;
string=Utils.getOption('N',options);
if(string.length()!=0)K=Integer.parseInt(string);
string=Utils.getOption('M',options);
if(string.length()!=0)metric=(Metric)Utils.forName(Metric.class,string,options);
string=Utils.getOption('I',options);
if(string.length()!=0)initializer=(Initializer)Utils.forName(Initializer.class,string,options);
}
public String [] getOptions(){
String [] options=new String[6];
int current=0;
options[current++]="-N";
options[current++]=Integer.toString(K);
options[current++]="-M";
options[current++]=metric.getClass().getName();
options[current++]="-I";
options[current++]=initializer.getClass().getName();
return options;
}
}