/*
* Generator.java
* Copyright (C) 2000 Gabi Schmidberger
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package weka.datagenerators;
import java.lang.Exception;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.Enumeration;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.Attribute;
import weka.core.FastVector;
/**
* Abstract class for cluster data generators.
*
* ------------------------------------------------------------------- <p>
*
* General options are: <p>
*
* -r string <br>
* Name of the relation of the generated dataset. <br>
* (default = name built using name of used generator and options) <p>
*
* -a num <br>
* Number of attributes. (default = 2) <p>
*
* -k num <br>
* Number of clusters. (default = 4) <p>
*
* -c <br>
* Class Flag. If set, cluster is listed in extra class attribute.<p>
*
* -o filename<br>
* writes the generated dataset to the given file using ARFF-Format.
* (default = stdout).
*
* ------------------------------------------------------------------- <p>
*
* Example usage as the main of a datagenerator called RandomGenerator:
* <code> <pre>
* public static void main(String [] args) {
* try {
* DataGenerator.makeData(new RandomGenerator(), argv);
* } catch (Exception e) {
* System.err.println(e.getMessage());
* }
* }
* </pre> </code>
* <p>
*
* ------------------------------------------------------------------ <p>
*
*
* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public abstract class ClusterGenerator implements Serializable {
/** @serial Debugging mode */
private boolean m_Debug = false;
/** @serial The format for the generated dataset */
private Instances m_Format = null;
/** @serial Relation name the dataset should have */
private String m_RelationName = "";
/** @serial Number of attribute the dataset should have */
protected int m_NumAttributes = 2;
/** @serial Number of Clusters the dataset should have */
protected int m_NumClusters = 4;
/** @serial class flag */
private boolean m_ClassFlag = false;
/** @serial Number of instances that should be produced into the dataset
* this number is by default m_NumExamples,
* but can be reset by the generator
*/
private int m_NumExamplesAct = 0;
/** @serial PrintWriter */
private PrintWriter m_Output = null;
/**
* Initializes the format for the dataset produced.
* Must be called before the generateExample or generateExamples
* methods are used.
*
* @return the format for the dataset
* @exception Exception if the generating of the format failed
*/
abstract Instances defineDataFormat() throws Exception;
/**
* Generates one example of the dataset.
*
* @return the generated example
* @exception Exception if the format of the dataset is not yet defined
* @exception Exception if the generator only works with generateExamples
* which means in non single mode
*/
abstract Instance generateExample() throws Exception;
/**
* Generates all examples of the dataset.
*
* @return the generated dataset
* @exception Exception if the format of the dataset is not yet defined
* @exception Exception if the generator only works with generateExample,
* which means in single mode
*/
abstract Instances generateExamples() throws Exception;
/**
* Generates a comment string that documentates the data generator.
* By default this string is added at the beginning of the produced output
* as ARFF file type, next after the options.
*
* @return string contains info about the generated rules
* @exception Exception if the generating of the documentation fails
*/
abstract String generateStart () throws Exception;
/**
* Generates a comment string that documentates the data generator.
* By default this string is added at the end of the produced output
* as ARFF file type.
*
* @return string contains info about the generated rules
* @exception Exception if the generating of the documentation fails
*/
abstract String generateFinished () throws Exception;
/**
* Return if single mode is set for the given data generator
* mode depends on option setting and or generator type.
*
* @return single mode flag
* @exception Exception if mode is not set yet
*/
abstract boolean getSingleModeFlag () throws Exception;
/**
* Sets the class flag, if class flag is set,
* the cluster is listed as class atrribute in an extra attribute.
* @param classFlag the new class flag
*/
public void setClassFlag(boolean classFlag) { m_ClassFlag = classFlag; }
/**
* Gets the class flag.
* @return the class flag
*/
public boolean getClassFlag() {
boolean b = m_ClassFlag;
return m_ClassFlag; }
/**
* Sets the debug flag.
* @param debug the new debug flag
*/
public void setDebug(boolean debug) {
m_Debug = debug;
}
/**
* Gets the debug flag.
* @return the debug flag
*/
public boolean getDebug() { return m_Debug; }
/**
* Sets the relation name the dataset should have.
* @param relationName the new relation name
*/
public void setRelationName(String relationName) {
if (relationName.length() == 0) {
// build relationname
StringBuffer name = new StringBuffer(this.getClass().getName());
String [] options = getGenericOptions();
for (int i = 0; i < options.length; i++) {
name = name.append(options[i].trim());
}
if (this instanceof OptionHandler) {
options = ((OptionHandler)this).getOptions();
for (int i = 0; i < options.length; i++) {
name = name.append(options[i].trim());
}
}
m_RelationName = name.toString();
}
else
m_RelationName = relationName;
}
/**
* Gets the relation name the dataset should have.
* @return the relation name the dataset should have
*/
public String getRelationName() { return m_RelationName; }
/**
* Sets the number of clusters the dataset should have.
* @param numClusters the new number of clusters
*/
public void setNumClusters(int numClusters) { m_NumClusters = numClusters; }
/**
* Gets the number of clusters the dataset should have.
* @return the number of clusters the dataset should have
*/
public int getNumClusters() { return m_NumClusters; }
/**
* Sets the number of attributes the dataset should have.
* @param numAttributes the new number of attributes
*/
public void setNumAttributes(int numAttributes) {
m_NumAttributes = numAttributes;
}
/**
* Gets the number of attributes that should be produced.
* @return the number of attributes that should be produced
*/
public int getNumAttributes() { return m_NumAttributes; }
/**
* Sets the number of examples the dataset should have.
* @param numExamplesAct the new number of examples
*/
public void setNumExamplesAct(int numExamplesAct) {
m_NumExamplesAct = numExamplesAct;
}
/**
* Gets the number of examples the dataset should have.
* @return the number of examples the dataset should have
*/
public int getNumExamplesAct() { return m_NumExamplesAct; }
/**
* Sets the print writer.
* @param newOutput the new print writer
*/
public void setOutput(PrintWriter newOutput) {
m_Output = newOutput;
}
/**
* Gets the print writer.
* @return print writer object
*/
public PrintWriter getOutput() { return m_Output; }
/**
* Sets the format of the dataset that is to be generated.
* @param the new dataset format of the dataset
*/
protected void setFormat(Instances newFormat) {
m_Format = new Instances(newFormat, 0);
}
/**
* Gets the format of the dataset that is to be generated.
* @return the dataset format of the dataset
*/
protected Instances getFormat() {
Instances format = new Instances(m_Format, 0);
return format;
}
/**
* Returns a string representing the dataset in the instance queue.
* @return the string representing the output data format
*/
protected String toStringFormat() {
if (m_Format == null)
return "";
return m_Format.toString();
}
/**
* Calls the data generator.
*
* @param dataGenerator one of the data generators
* @param options options of the data generator
* @exception Exception if there was an error in the option list
*/
public static void makeData(ClusterGenerator generator, String [] options)
throws Exception {
PrintWriter output = null;
// read options /////////////////////////////////////////////////
try {
setOptions(generator, options);
} catch (Exception ex) {
String specificOptions = "";
if (generator instanceof OptionHandler) {
specificOptions = generator.listSpecificOptions(generator);
}
String genericOptions = listGenericOptions(generator);
throw new Exception('\n' + ex.getMessage()
+ specificOptions + genericOptions);
}
// define dataset format ///////////////////////////////////////
// computes actual number of examples to be produced
generator.setFormat(generator.defineDataFormat());
// get print writer /////////////////////////////////////////////
output = generator.getOutput();
// output of options ////////////////////////////////////////////
output.println("% ");
output.print("% " + generator.getClass().getName() + " ");
String [] outOptions = generator.getGenericOptions();
for (int i = 0; i < outOptions.length; i++) {
output.print(outOptions[i] + " ");
}
outOptions = ((OptionHandler) generator).getOptions();
for (int i = 0; i < outOptions.length; i++) {
output.print(outOptions[i] + " ");
}
output.println("\n%");
// comment at beginning of ARFF File ////////////////////////////
String commentAtStart = generator.generateStart();
if (commentAtStart.length() > 0) {
output.println(commentAtStart);
}
// ask data generator which mode ////////////////////////////////
boolean singleMode = generator.getSingleModeFlag();
// start data producer ////////////////////////////////////////
if (singleMode) {
// output of dataset header //////////////////////////////////
output.println(generator.toStringFormat());
for (int i = 0; i < generator.getNumExamplesAct(); i++) {
// over all examples to be produced
Instance inst = generator.generateExample();
output.println(inst);
}
} else { // generator produces all instances at once
Instances dataset = generator.generateExamples();
// output of dataset ////////////////////////////////////////////
output.println(dataset);
}
// comment at end of ARFF File /////////////////////////////////////
String commentAtEnd = generator.generateFinished();
if (commentAtEnd.length() > 0) {
output.println(commentAtEnd);
}
if (output != null) {
output.close();
}
}
/**
* Makes a string with the options of the specific data generator.
*
* @param generator the datagenerator that is used
* @return string with the options of the data generator used
*/
private String listSpecificOptions(ClusterGenerator generator) {
String optionString = "";
if (generator instanceof OptionHandler) {
optionString += "\nData Generator options:\n\n";
Enumeration enum = ((OptionHandler)generator).listOptions();
while (enum.hasMoreElements()) {
Option option = (Option) enum.nextElement();
optionString += option.synopsis() + '\n'
+ option.description() + "\n";
}
}
return optionString;
}
/**
* Sets the generic options and specific options.
*
* @param generator the data generator used
* @param options the generic options and the specific options
* @exception Exception if help request or any invalid option
*/
private static void setOptions(ClusterGenerator generator,
String[] options) throws Exception {
boolean helpRequest = false;
String outfileName = new String("");
PrintWriter output;
// get help
helpRequest = Utils.getFlag('h', options);
if (Utils.getFlag('d', options)) { generator.setDebug(true); }
// get relationname
String relationName = Utils.getOption('r', options);
// set relation name at end of method after all options are set
// get outputfilename
outfileName = Utils.getOption('o', options);
// get num of clusters
String num = Utils.getOption('k', options);
if (num.length() != 0)
generator.setNumClusters(Integer.parseInt(num));
// get class flag
if (Utils.getFlag('c', options))
generator.setClassFlag(true);
// get num of attributes
String numAttributes = Utils.getOption('a', options);
if (numAttributes.length() != 0)
generator.setNumAttributes(Integer.parseInt(numAttributes));
if (generator instanceof OptionHandler) {
((OptionHandler)generator).setOptions(options);
}
// all options are set, now set relation name
generator.setRelationName(relationName);
// End read options
Utils.checkForRemainingOptions(options);
if (helpRequest) {
throw new Exception("Help requested.\n");
}
if (outfileName.length() != 0) {
output = new PrintWriter(new FileOutputStream(outfileName));
} else {
output = new PrintWriter(System.out);
}
generator.setOutput(output);
}
/**
* Method for listing generic options.
*
* @param generator the data generator
* @return string with the generic data generator options
*/
private static String listGenericOptions (ClusterGenerator generator) {
String genericOptions = "\nGeneral options:\n\n"
+ "-h\n"
+ "\tGet help on available options.\n"
+ "-r <relation name>\n"
+ "\tThe name of the relation for the produced dataset.\n"
+ "-a <number of attributes>\n"
+ "\tThe number of attributes for the produced dataset.\n"
+ "-k <number of clusters>\n"
+ "\tThe number of clusters the dataset is produced in.\n"
+ "-c \n"
+ "\tThe class flag, if set, the cluster is listed in the class "
+ "attribute.\n"
+ "-o <file>\n"
+ "\tThe name of the file output instances will be written to.\n"
+ "\tIf not supplied the instances will be written to stdout.\n";
return genericOptions;
}
/**
* Gets the current generic settings of the datagenerator.
*
* @return an array of strings suitable for passing to setOptions
*/
private String [] getGenericOptions() {
String [] options = new String [10];
int i = 0;
String name = getRelationName();
if (name.length() > 0) {
options[i++] = "-r";
options[i++] = "" + getRelationName();
}
options[i++] = "-a"; options[i++] = "" + getNumAttributes();
options[i++] = "-k"; options[i++] = "" + getNumClusters();
if (getClassFlag()) {
options[i++] = "-c"; options[i++] = "";
}
while (i < options.length) {
options[i++] = "";
}
return options;
}
}