/*
* SEAGenerator.java
* Copyright (C) 2008 University of Waikato, Hamilton, New Zealand
* @author Albert Bifet
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package tr.gov.ulakbim.jDenetX.streams.generators;
import tr.gov.ulakbim.jDenetX.core.InstancesHeader;
import tr.gov.ulakbim.jDenetX.core.ObjectRepository;
import tr.gov.ulakbim.jDenetX.options.AbstractOptionHandler;
import tr.gov.ulakbim.jDenetX.options.FlagOption;
import tr.gov.ulakbim.jDenetX.options.IntOption;
import tr.gov.ulakbim.jDenetX.streams.InstanceStream;
import tr.gov.ulakbim.jDenetX.tasks.TaskMonitor;
import weka.core.*;
import java.util.Random;
// Generator described in paper:
// W. Nick Street and YongSeog Kim
// "A streaming ensemble algorithm (SEA) for large-scale classification",
// KDD '01: Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining
// 377-382 2001.
// Notes:
// The built in functions are based on the paper
public class SEAGenerator extends AbstractOptionHandler implements
InstanceStream {
@Override
public String getPurposeString() {
return "Generates SEA concepts functions.";
}
private static final long serialVersionUID = 1L;
public IntOption functionOption = new IntOption("function", 'f',
"Classification function used, as defined in the original paper.",
1, 1, 4);
public IntOption instanceRandomSeedOption = new IntOption(
"instanceRandomSeed", 'i',
"Seed for random generation of instances.", 1);
public FlagOption balanceClassesOption = new FlagOption("balanceClasses",
'b', "Balance the number of instances of each class.");
public IntOption numInstancesConcept = new IntOption("numInstancesConcept", 'n',
"The number of instances for each concept.", 0, 0, Integer.MAX_VALUE);
public IntOption noisePercentageOption = new IntOption("noisePercentage",
'n', "Percentage of noise to add to the data.", 10, 0, 100);
protected interface ClassFunction {
public int determineClass(double attrib1, double attrib2, double attrib3);
}
protected static ClassFunction[] classificationFunctions = {
// function 1
new ClassFunction() {
public int determineClass(double attrib1, double attrib2, double attrib3) {
return (attrib1 + attrib2 <= 8) ? 0 : 1;
}
},
// function 2
new ClassFunction() {
public int determineClass(double attrib1, double attrib2, double attrib3) {
return (attrib1 + attrib2 <= 9) ? 0 : 1;
}
},
// function 3
new ClassFunction() {
public int determineClass(double attrib1, double attrib2, double attrib3) {
return (attrib1 + attrib2 <= 7) ? 0 : 1;
}
},
// function 4
new ClassFunction() {
public int determineClass(double attrib1, double attrib2, double attrib3) {
return (attrib1 + attrib2 <= 9.5) ? 0 : 1;
}
}
};
protected InstancesHeader streamHeader;
protected Random instanceRandom;
protected boolean nextClassShouldBeZero;
@Override
protected void prepareForUseImpl(TaskMonitor monitor,
ObjectRepository repository) {
// generate header
FastVector attributes = new FastVector();
attributes.addElement(new Attribute("attrib1"));
attributes.addElement(new Attribute("attrib2"));
attributes.addElement(new Attribute("attrib3"));
FastVector classLabels = new FastVector();
classLabels.addElement("groupA");
classLabels.addElement("groupB");
attributes.addElement(new Attribute("class", classLabels));
this.streamHeader = new InstancesHeader(new Instances(
getCLICreationString(InstanceStream.class), attributes, 0));
this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
restart();
}
public long estimatedRemainingInstances() {
return -1;
}
public InstancesHeader getHeader() {
return this.streamHeader;
}
public boolean hasMoreInstances() {
return true;
}
public boolean isRestartable() {
return true;
}
public Instance nextInstance() {
double attrib1 = 0, attrib2 = 0, attrib3 = 0;
int group = 0;
boolean desiredClassFound = false;
while (!desiredClassFound) {
// generate attributes
attrib1 = 10 * this.instanceRandom.nextDouble();
attrib2 = 10 * this.instanceRandom.nextDouble();
attrib3 = 10 * this.instanceRandom.nextDouble();
// determine class
group = classificationFunctions[this.functionOption.getValue() - 1]
.determineClass(attrib1, attrib2, attrib3);
if (!this.balanceClassesOption.isSet()) {
desiredClassFound = true;
} else {
// balance the classes
if ((this.nextClassShouldBeZero && (group == 0))
|| (!this.nextClassShouldBeZero && (group == 1))) {
desiredClassFound = true;
this.nextClassShouldBeZero = !this.nextClassShouldBeZero;
} // else keep searching
}
}
//Add Noise
if ((1 + (this.instanceRandom.nextInt(100))) <= this.noisePercentageOption
.getValue()) {
group = (group == 0 ? 1 : 0);
}
// construct instance
InstancesHeader header = getHeader();
Instance inst = new DenseInstance(header.numAttributes());
inst.setValue(0, attrib1);
inst.setValue(1, attrib2);
inst.setValue(2, attrib3);
inst.setDataset(header);
inst.setClassValue(group);
return inst;
}
public void restart() {
this.instanceRandom = new Random(this.instanceRandomSeedOption
.getValue());
this.nextClassShouldBeZero = false;
}
public void getDescription(StringBuilder sb, int indent) {
// TODO Auto-generated method stub
}
}