/*
* BIRCHCluster.java
* Copyright (C) 2001 Gabi Schmidberger.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package weka.datagenerators;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.OptionHandler;
import weka.core.Option;
import weka.core.Utils;
import java.io.Serializable;
import java.util.Random;
import java.util.Enumeration;
import java.util.Vector;
/**
* Cluster data generator designed for the BIRCH System
*
* Dataset is generated with instances in K clusters.
* Instances are 2-d data points.
* Each cluster is characterized by the number of data points in it
* its radius and its center. The location of the cluster centers is
* determined by the pattern parameter. Three patterns are currently
* supported grid, sine and random.
* todo:
*
* (out of: BIRCH: An Efficient Data Clustering Method for Very Large
* Databases; T. Zhang, R. Ramkrishnan, M. Livny; 1996 ACM)
*
*
* Class to generate data randomly by producing a decision list.
* The decision list consists of rules.
* Instances are generated randomly one by one. If decision list fails
* to classify the current instance, a new rule according to this current
* instance is generated and added to the decision list.<p>
*
* The option -V switches on voting, which means that at the end
* of the generation all instances are
* reclassified to the class value that is supported by the most rules.<p>
*
* This data generator can generate 'boolean' attributes (= nominal with
* the values {true, false}) and numeric attributes. The rules can be
* 'A' or 'NOT A' for boolean values and 'B < random_value' or
* 'B >= random_value' for numeric values.<p>
*
* Valid options are:<p>
*
* -G <br>
* The pattern for instance generation is grid.<br>
* This flag cannot be used at the same time as flag I.
* The pattern is random, if neither flag G nor flag I is set.<p>
*
* -I <br>
* The pattern for instance generation is sine.<br>
* This flag cannot be used at the same time as flag G.
* The pattern is random, if neither flag G nor flag I is set.<p>
*
* -N num .. num <br>
* The range of the number of instances in each cluster (default 1..50).<br>
* Lower number must be between 0 and 2500, upper number must be between
* 50 and 2500.<p>
*
* -R num .. num <br>
* The range of the radius of the clusters (default 0.1 .. SQRT(2)).<br>
* Lower number must be between 0 and SQRT(2), upper number must be between<br>
* SQRT(2) and SQRT(32).<p>
*
* -M num <br>
* Distance multiplier, only used if pattern is grid (default 4). <p>
*
* -C num <br>
* Number of cycles, only used if pattern is sine (default 4). <p>
*
* -O <br>
* Flag for input order is ordered. If flag is not set then input
* order is randomized.<p>
*
* -P num<br>
* Noise rate in percent. Can be between 0% and 30% (default 0%).<br>
* (Remark: The original algorithm only allows noise up to 10%.)<p>
*
* -S seed <br>
* Random number seed for random function used (default 1). <p>
*
* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
**/
public class BIRCHCluster extends ClusterGenerator
implements OptionHandler,
Serializable {
/**@serial minimal number of instances per cluster (option N)*/
private int m_MinInstNum = 1;
/**@serial maximal number of instances per cluster (option N)*/
private int m_MaxInstNum = 50;
/**@serial minimum radius (option R)*/
private double m_MinRadius= 0.1;
/**@serial maximum radius (option R)*/
private double m_MaxRadius = Math.sqrt(2.0);
/**@serial Constant set for choice of pattern. (option G)*/
public static final int GRID = 0;
/**@serial Constant set for choice of pattern. (option I)*/
public static final int SINE = 1;
/**@serial Constant set for choice of pattern. (default)*/
public static final int RANDOM = 2;
/**@serial pattern (changed with options G or S)*/
private int m_Pattern = RANDOM;
/**@serial distance multiplier (option M)*/
private double m_DistMult = 4.0;
/**@serial number of cycles (option C)*/
private int m_NumCycles = 4;
/**@serial Constant set for input order (option O)*/
public static final int ORDERED = 0;
/**@serial Constant set for input order (default)*/
public static final int RANDOMIZED = 1;
/**@serial input order (changed with option O)*/
private int m_InputOrder = RANDOMIZED;
/**@serial noise rate in percent (option P, between 0 and 30)*/
private double m_NoiseRate = 0.0;
/**@serial random number generator seed (option S)*/
private int m_Seed = 1;
/**@serial dataset format*/
private Instances m_DatasetFormat = null;
/**@serial random number generator*/
private Random m_Random = null;
/**@serial debug flag*/
private int m_Debug = 0;
/**@serial cluster list */
private FastVector m_ClusterList;
// following are used for pattern is GRID
/**@serial grid size*/
private int m_GridSize;
/**@serial grid width*/
private double m_GridWidth;
/**********************************************************************
* class to represent cluster
*/
private class Cluster implements Serializable {
// number of instances for this cluster
private int m_InstNum;
// radius of cluster
// variance is radius ** 2 / 2
private double m_Radius;
// center of cluster = array of Double values
private double [] m_Center;
/*
* Constructor, used for pattern = RANDOM
*
* @param instNum the number of instances
* @param radius radius of the cluster
* @param center
*/
private Cluster(int instNum, double radius, Random random) {
m_InstNum = instNum;
m_Radius = radius;
m_Center = new double[m_NumAttributes];
for (int i = 0; i < m_NumAttributes; i++) {
m_Center[i] = random.nextDouble() * (double) m_NumClusters;
}
}
/*
* Constructor, used for pattern = GRID
*
* @param instNum the number of instances
* @param radius radius of the cluster
* @param gridVector vector for grid positions
* @param gridWidth factor for grid position
*/
// center is defined in the constructor of cluster
private Cluster(int instNum,
double radius,
int [] gridVector,
double gridWidth) {
m_InstNum = instNum;
m_Radius = radius;
m_Center = new double[m_NumAttributes];
for (int i = 0; i < m_NumAttributes; i++) {
m_Center[i] = ((double) gridVector[i] + 1.0) * gridWidth;
}
}
private int getInstNum () { return m_InstNum; }
private double getRadius () { return m_Radius; }
private double getVariance () { return Math.pow(m_Radius, 2.0) / 2.0; }
private double getStdDev () { return (m_Radius / Math.pow(2.0, 0.5)); }
private double [] getCenter () { return m_Center; }
private double getCenterValue (int dimension) throws Exception {
if (dimension >= m_Center.length)
throw new Exception("Current system has only " +
m_Center.length + " dimensions.");
return m_Center[dimension];
}
} // end class Cluster
/**********************************************************************
* class to represent Vector for placement of the center in space
*/
private class GridVector implements Serializable {
// array of integer
private int [] m_GridVector;
// one higher then the highest possible integer value
// in any of the integers in the gridvector
private int m_Base;
// size of vector
private int m_Size;
/*
* Constructor
*
* @param numDim number of dimensions = number of attributes
* @param base is one higher then the highest possible integer value
* in any of the integers in the gridvector
*/
private GridVector(int numDim, int base) {
m_Size = numDim;
m_Base = base;
m_GridVector = new int [numDim];
for (int i = 0; i < numDim; i++) {
m_GridVector[i] = 0;
}
}
/*
* returns the integer array
*
* @return the integer array
*/
private int [] getGridVector() {
return m_GridVector;
}
/*
* Overflow has occurred when integer is zero.
*
*@param digit the input integer
*@return true if digit is 0
*/
private boolean overflow(int digit) {
return (digit == 0);
}
/*
* Adds one to integer and sets to zero, if new value was
* equal m_Base.
*
*@param digit the input integer
*@return new integer object
*/
private int addOne(int digit) {
int value = digit + 1;
if (value >= m_Base) value = 0;
return value;
}
/*
* add 1 to vector
*/
private void addOne() {
m_GridVector[0] = addOne(m_GridVector[0]);
int i = 1;
while (overflow(m_GridVector[i - 1]) && i < m_Size) {
m_GridVector[i] = addOne(m_GridVector[i]);
i++;
}
}
} // end class GridVector
/**
* Returns a string describing this data generator.
*
* @return a description of the data generator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "A data generator that produces data points in "
+ "clusters.";
}
/**
* Sets the upper and lower boundary for instances per cluster.
*
* @param newToFrom the string containing the upper and lower boundary for
* instances per cluster separated by ..
*/
public void setInstNums(String fromTo) {
int i = fromTo.indexOf("..");
String from = fromTo.substring(0, i);
setMinInstNum(Integer.parseInt(from));
String to = fromTo.substring(i + 2, fromTo.length());
setMaxInstNum(Integer.parseInt(to));
}
/**
* Gets the upper and lower boundary for instances per cluster.
*
* @return the string containing the upper and lower boundary for
* instances per cluster separated by ..
*/
public String getInstNums() {
String fromTo = "" +
getMinInstNum() + ".." +
getMaxInstNum();
return fromTo;
}
/**
* Gets the lower boundary for instances per cluster.
*
* @return the the lower boundary for instances per cluster
*/
public int getMinInstNum() { return m_MinInstNum; }
/**
* Sets the lower boundary for instances per cluster.
*
* @param newMinInstNum new lower boundary for instances per cluster
*/
public void setMinInstNum(int newMinInstNum) {
m_MinInstNum = newMinInstNum;
}
/**
* Gets the upper boundary for instances per cluster.
*
* @return the upper boundary for instances per cluster
*/
public int getMaxInstNum() { return m_MaxInstNum; }
/**
* Sets the upper boundary for instances per cluster.
*
* @param newMaxInstNum new upper boundary for instances per cluster
*/
public void setMaxInstNum(int newMaxInstNum) {
m_MaxInstNum = newMaxInstNum;
}
/**
* Sets the upper and lower boundary for the radius of the clusters.
*
* @param newToFrom the string containing the upper and lower boundary for
* the radius of the clusters, separated by ..
*/
public void setRadiuses(String fromTo) {
int i = fromTo.indexOf("..");
String from = fromTo.substring(0, i);
setMinRadius(Double.valueOf(from).doubleValue());
String to = fromTo.substring(i + 2, fromTo.length());
setMaxRadius(Double.valueOf(to).doubleValue());
}
/**
* Gets the upper and lower boundary for the radius of the clusters.
*
* @return the string containing the upper and lower boundary for
* the radius of the clusters, separated by ..
*/
public String getRadiuses() {
String fromTo = "" +
Utils.doubleToString(getMinRadius(), 2) + ".." +
Utils.doubleToString(getMaxRadius(), 2);
return fromTo;
}
/**
* Gets the lower boundary for the radiuses of the clusters.
*
* @return the lower boundary for the radiuses of the clusters
*/
public double getMinRadius() { return m_MinRadius; }
/**
* Sets the lower boundary for the radiuses of the clusters.
*
* @param newMinRadius new lower boundary for the radiuses of the clusters
*/
public void setMinRadius(double newMinRadius) {
m_MinRadius = newMinRadius;
}
/**
* Gets the upper boundary for the radiuses of the clusters.
*
* @return the upper boundary for the radiuses of the clusters
*/
public double getMaxRadius() { return m_MaxRadius; }
/**
* Sets the upper boundary for the radiuses of the clusters.
*
* @param newMaxRadius new upper boundary for the radiuses of the clusters
*/
public void setMaxRadius(double newMaxRadius) {
m_MaxRadius = newMaxRadius;
}
/**
* Gets the grid flag (option G).
*
* @return true if grid flag is set
*/
public boolean getGridFlag() { return m_Pattern == GRID; }
/**
* Gets the sine flag (option S).
*
* @return true if sine flag is set
*/
public boolean getSineFlag() { return m_Pattern == SINE; }
/**
* Gets the pattern type.
*
* @return the current pattern type
*/
public int getPattern() { return m_Pattern; }
/**
* Sets the pattern type.
*
* @param newPattern new pattern type
*/
public void setPattern(int newPattern) {
m_Pattern = newPattern;
}
/**
* Gets the distance multiplier.
*
* @return the distance multiplier
*/
public double getDistMult() { return m_DistMult; }
/**
* Sets the distance multiplier.
*
* @param newDistMult new distance multiplier
*/
public void setDistMult(double newDistMult) {
m_DistMult = newDistMult;
}
/**
* Gets the number of cycles.
*
* @return the number of cycles
*/
public int getNumCycles() { return m_NumCycles; }
/**
* Sets the the number of cycles.
*
* @param newNumCycles new number of cycles
*/
public void setNumCycles(int newNumCycles) {
m_NumCycles = newNumCycles;
}
/**
* Gets the input order.
*
* @return the current input order
*/
public int getInputOrder() { return m_InputOrder; }
/**
* Sets the input order.
*
* @param newInputOrder new input order
*/
public void setInputOrder(int newInputOrder) {
m_InputOrder = newInputOrder;
}
/**
* Gets the ordered flag (option O).
*
* @return true if ordered flag is set
*/
public boolean getOrderedFlag() { return m_InputOrder == ORDERED; }
/**
* Gets the percentage of noise set.
*
* @return the percentage of noise set
*/
public double getNoiseRate() { return m_NoiseRate; }
/**
* Sets the percentage of noise set.
*
* @param newNoiseRate new percentage of noise
*/
public void setNoiseRate(double newNoiseRate) {
m_NoiseRate = newNoiseRate;
}
/**
* Gets the random generator.
*
* @return the random generator
*/
public Random getRandom() {
if (m_Random == null) {
m_Random = new Random (getSeed());
}
return m_Random;
}
/**
* Sets the random generator.
*
* @param newRandom is the random generator.
*/
public void setRandom(Random newRandom) {
m_Random = newRandom;
}
/**
* Gets the random number seed.
*
* @return the random number seed.
*/
public int getSeed() { return m_Seed; }
/**
* Sets the random number seed.
*
* @param newSeed the new random number seed.
*/
public void setSeed(int newSeed) { m_Seed = newSeed; }
/**
* Gets the dataset format.
*
* @return the dataset format.
*/
public Instances getDatasetFormat() { return m_DatasetFormat; }
/**
* Sets the dataset format.
*
* @param newDatasetFormat the new dataset format.
*/
public void setDatasetFormat(Instances newDatasetFormat) {
m_DatasetFormat = newDatasetFormat;
}
/**
* Gets the single mode flag.
*
* @return true if methode generateExample can be used.
*/
public boolean getSingleModeFlag() { return (false); }
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
Vector newVector = new Vector(5);
newVector.addElement(new Option(
"\tSet pattern to grid (default is random).",
"G", 1, "-G"));
newVector.addElement(new Option(
"\tSet pattern to sine (default is random).",
"S", 1, "-S"));
newVector.addElement(new Option(
"\tThe range of number of instances per cluster (default 1..50).",
"N", 1, "-N <num>..<num>"));
newVector.addElement(new Option(
"\tThe range of radius per cluster (default 0.1..sqrt(2)).",
"R", 1, "-R <num>..<num>"));
newVector.addElement(new Option(
"\tThe distance multiplier (default 4).",
"M", 1, "-M <num>"));
newVector.addElement(new Option(
"\tThe number of cycles (default 4).",
"C", 1, "-C <num>"));
newVector.addElement(new Option(
"\tSet input order to ordered (default is randomized).",
"O", 1, "-O"));
newVector.addElement(new Option(
"\tThe noise rate in percent (default 0).",
"P", 1, "-P <num>"));
newVector.addElement(new Option(
"\tThe Seed for random function (default 1).",
"S", 1, "-S"));
return newVector.elements();
}
/**
* Sets all options to their default values. <p>
*/
public void setDefaultOptions() {
m_MinInstNum = 1;
m_MaxInstNum = 50;
m_MinRadius = 0.1;
m_MaxRadius = Math.sqrt(2.0);
m_Pattern = RANDOM;
m_DistMult = 4;
m_NumCycles = 4;
m_InputOrder = RANDOMIZED;
m_NoiseRate = 0.0;
m_Seed = 1;
}
/**
* Parses a list of options for this object. <p>
*
* For list of valid options see class description.<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setDefaultOptions();
String num;
String fromTo;
fromTo = Utils.getOption('N', options);
if (fromTo.length() != 0) {
setInstNums(fromTo);
}
fromTo = Utils.getOption('R', options);
if (fromTo.length() != 0) {
setRadiuses(fromTo);
}
boolean grid = Utils.getFlag('G', options);
boolean sine = Utils.getFlag('I', options);
if (grid && sine)
throw new Exception("Flags G and I can only be set mutually exclusiv.");
if (grid)
setPattern(GRID);
if (sine)
setPattern(SINE);
num = Utils.getOption('M', options);
if (num.length() != 0) {
if (!grid)
throw new Exception("Option M can only be used with GRID pattern.");
setDistMult(Double.valueOf(num).doubleValue());
}
num = Utils.getOption('C', options);
if (num.length() != 0) {
if (!sine)
throw new Exception("Option C can only be used with SINE pattern.");
setNumCycles((int)Double.valueOf(num).doubleValue());
}
boolean ordered = Utils.getFlag('O', options);
if (ordered)
setInputOrder(ORDERED);
num = Utils.getOption('P', options);
if (num.length() != 0) {
setNoiseRate(Double.valueOf(num).doubleValue());
}
num = Utils.getOption('S', options);
if (num.length() != 0) {
setSeed(Integer.parseInt(num));
}
}
/**
* Gets the current settings of the datagenerator BIRCHCluster.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [20];
int i = 0;
options[i++] = "-N"; options[i++] = "" + getInstNums();
options[i++] = "-R"; options[i++] = "" + getRadiuses();
if (getGridFlag()) {
options[i++] = "-G"; options[i++] = "";
options[i++] = "-D"; options[i++] = "" + getDistMult();
}
if (getSineFlag()) {
options[i++] = "-I"; options[i++] = "";
options[i++] = "-C"; options[i++] = "" + getNumCycles();
}
if (getOrderedFlag()) {
options[i++] = "-O"; options[i++] = "";
}
options[i++] = "-P"; options[i++] = "" + getNoiseRate();
while (i < options.length) {
options[i++] = "";
}
return options;
}
/**
* Initializes the format for the dataset produced.
*
* @return the output data format
* @exception Exception data format could not be defined
*/
public Instances defineDataFormat() throws Exception {
Random random = new Random (getSeed());
setRandom(random);
Instances dataset;
FastVector attributes = new FastVector(3);
Attribute attribute;
boolean classFlag = getClassFlag();
FastVector classValues = null;
if (classFlag) classValues = new FastVector (m_NumClusters);
// define dataset
for (int i = 0; i < getNumAttributes(); i++) {
attribute = new Attribute("X" + i);
attributes.addElement(attribute);
}
if (classFlag) {
for (int i = 0; i < m_NumClusters; i++) {
classValues.addElement("c" + i);
}
attribute = new Attribute ("class", classValues);
attributes.addElement(attribute);
}
dataset = new Instances(getRelationName(), attributes, 0);
if (classFlag)
dataset.setClassIndex(m_NumAttributes);
// set dataset format of this class
Instances format = new Instances(dataset, 0);
setDatasetFormat(format);
m_ClusterList = defineClusters(random);
System.out.println("dataset" + dataset.numAttributes());
return dataset;
}
/**
* Generate an example of the dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
public Instance generateExample() throws Exception {
throw new Exception("Examples cannot be generated" +
" one by one.");
}
/**
* Generate all examples of the dataset.
* @return the instance generated
* @exception Exception if format not defined
*/
public Instances generateExamples() throws Exception {
Random random = getRandom();
Instances data = getDatasetFormat();
if (data == null) throw new Exception("Dataset format not defined.");
// generate examples
if (getOrderedFlag())
data = generateExamples(random, data);
else
throw new Exception("RANDOMIZED is not yet implemented.");
return (data);
}
/**
* Generate all examples of the dataset.
* @return the instance generated
* @exception Exception if format not defined
*/
public Instances generateExamples(Random random,
Instances format) throws Exception {
Instance example = null;
if (format == null) throw new Exception("Dataset format not defined.");
// generate examples for one cluster after another
int cNum = 0;
for (Enumeration enum = m_ClusterList.elements();
enum.hasMoreElements(); cNum++) {
Cluster cl = (Cluster) enum.nextElement();
double stdDev = cl.getStdDev();
int instNum = cl.getInstNum();
double [] center = cl.getCenter();
String cName = "c" + cNum;
for (int i = 0; i < instNum; i++) {
// generate example
example = generateInstance (format,
random,
stdDev,
center,
cName);
if (example != null)
example.setDataset(format);
format.add(example);
}
}
return (format);
}
/**
* Generate an example of the dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
private Instance generateInstance (Instances format,
Random randomG,
double stdDev,
double [] center,
String cName
) {
Instance example;
int numAtts = m_NumAttributes;
if (getClassFlag()) numAtts++;
example = new Instance(numAtts);
example.setDataset(format);
for (int i = 0; i < m_NumAttributes; i++) {
example.setValue(i, randomG.nextGaussian() * stdDev + center[i]);
}
if (getClassFlag()) {
example.setClassValue(cName);
}
return example;
}
/**
* Defines the clusters
*
* @param random random number generator
*/
private FastVector defineClusters(Random random)
throws Exception {
if (m_Pattern == GRID)
return defineClustersGRID(random);
else
return defineClustersRANDOM(random);
}
/**
* Defines the clusters if pattern is GRID
*
* @param random random number generator
*/
private FastVector defineClustersGRID(Random random)
throws Exception {
FastVector clusters = new FastVector(m_NumClusters);
double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum);
double minInstNum = (double) m_MinInstNum;
double diffRadius = m_MaxRadius - m_MinRadius;
Cluster cluster;
// compute gridsize
double gs = Math.pow(m_NumClusters, 1.0 / m_NumAttributes);
if (gs - ((double) ((int) gs)) > 0.0) {
m_GridSize = (int) (gs + 1.0);
} else { m_GridSize = (int) gs; }
// compute gridwidth
m_GridWidth = ((m_MaxRadius + m_MinRadius) / 2) * m_DistMult;
System.out.println("GridSize= " + m_GridSize);
System.out.println("GridWidth= " + m_GridWidth);
// initialize gridvector with zeros
GridVector gv = new GridVector(m_NumAttributes, m_GridSize);
for (int i = 0; i < m_NumClusters; i++) {
int instNum = (int) (random.nextDouble() * diffInstNum
+ minInstNum);
double radius = (random.nextDouble() * diffRadius) + m_MinRadius;
// center is defined in the constructor of cluster
cluster = new Cluster(instNum, radius,
gv.getGridVector(), m_GridWidth);
clusters.addElement((Object) cluster);
gv.addOne();
}
return clusters;
}
/**
* Defines the clusters if pattern is RANDOM
*
* @param random random number generator
*/
private FastVector defineClustersRANDOM(Random random)
throws Exception {
FastVector clusters = new FastVector(m_NumClusters);
double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum);
double minInstNum = (double) m_MinInstNum;
double diffRadius = m_MaxRadius - m_MinRadius;
Cluster cluster;
for (int i = 0; i < m_NumClusters; i++) {
int instNum = (int) (random.nextDouble() * diffInstNum
+ minInstNum);
double radius = (random.nextDouble() * diffRadius) + m_MinRadius;
// center is defined in the constructor of cluster
cluster = new Cluster(instNum, radius, random);
clusters.addElement((Object) cluster);
}
return clusters;
}
/**
* Compiles documentation about the data generation after
* the generation process
*
* @return string with additional information about generated dataset
* @exception Exception no input structure has been defined
*/
public String generateFinished() throws Exception {
StringBuffer docu = new StringBuffer();
Instances format = getDatasetFormat();//just for exception
// string is empty
docu.append("\n%\n%\n");
return docu.toString();
}
/**
* Compiles documentation about the data generation before
* the generation process
*
* @return string with additional information
*/
public String generateStart() {
StringBuffer docu = new StringBuffer();
// string is empty
docu.append("\n%\n%\n");
int sumInst = 0;
int cNum = 0;
for (Enumeration enum = m_ClusterList.elements();
enum.hasMoreElements(); cNum++) {
Cluster cl = (Cluster) enum.nextElement();
docu.append("%\n");
docu.append("% Cluster: c"+ cNum + "\n");
docu.append("% ----------------------------------------------\n");
docu.append("% StandardDeviation: "
+ Utils.doubleToString(cl.getStdDev(), 2) + "\n");
docu.append("% Number of instances: "
+ cl.getInstNum() + "\n");
sumInst += cl.getInstNum();
double [] center = cl.getCenter();
docu.append("% ");
for (int i = 0; i < center.length - 1; i++) {
docu.append(Utils.doubleToString(center[i], 2) + ", ");
}
docu.append(Utils.doubleToString(center[center.length - 1], 2) + "\n");
}
docu.append("\n% ----------------------------------------------\n");
docu.append("% Total number of instances: " + sumInst + "\n");
docu.append("% in " + cNum + " clusters\n");
docu.append("% Pattern chosen : ");
if (getGridFlag()) docu.append("GRID, "
+ "distance multiplier = " +
Utils.doubleToString(m_DistMult, 2) + "\n");
else
if (getSineFlag()) docu.append("SINE\n");
else
docu.append("RANDOM\n");
return docu.toString();
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments for the data producer:
*/
public static void main(String [] argv) {
try {
ClusterGenerator.makeData(new BIRCHCluster(), argv);
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}