/*
* AgrawalGenerator.java
* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
* @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package tr.gov.ulakbim.jDenetX.streams.generators;
import tr.gov.ulakbim.jDenetX.core.InstancesHeader;
import tr.gov.ulakbim.jDenetX.core.ObjectRepository;
import tr.gov.ulakbim.jDenetX.options.AbstractOptionHandler;
import tr.gov.ulakbim.jDenetX.options.FlagOption;
import tr.gov.ulakbim.jDenetX.options.FloatOption;
import tr.gov.ulakbim.jDenetX.options.IntOption;
import tr.gov.ulakbim.jDenetX.streams.InstanceStream;
import tr.gov.ulakbim.jDenetX.tasks.TaskMonitor;
import weka.core.*;
import java.util.Random;
// Generator described in paper:
// Rakesh Agrawal, Tomasz Imielinksi, and Arun Swami,
// "Database Mining: A Performance Perspective",
// IEEE Transactions on Knowledge and Data Engineering,
// 5(6), December 1993.
// Public C source code available at:
// http://www.almaden.ibm.com/cs/projects/iis/hdb/Projects/data_mining/datasets/syndata.html
//
// Notes:
// The built in functions are based on the paper (page 924),
// which turn out to be functions pred20 thru pred29 in the public C implementation
// Perturbation function works like C implementation rather than description in paper
public class AgrawalGenerator extends AbstractOptionHandler implements
InstanceStream {
@Override
public String getPurposeString() {
return "Generates one of ten different pre-defined loan functions.";
}
private static final long serialVersionUID = 1L;
public IntOption functionOption = new IntOption("function", 'f',
"Classification function used, as defined in the original paper.",
1, 1, 10);
public IntOption instanceRandomSeedOption = new IntOption(
"instanceRandomSeed", 'i',
"Seed for random generation of instances.", 1);
public FloatOption peturbFractionOption = new FloatOption("peturbFraction",
'p',
"The amount of peturbation (noise) introduced to numeric values.",
0.05, 0.0, 1.0);
public FlagOption balanceClassesOption = new FlagOption("balanceClasses",
'b', "Balance the number of instances of each class.");
protected interface ClassFunction {
public int determineClass(double salary, double commission, int age,
int elevel, int car, int zipcode, double hvalue, int hyears,
double loan);
}
protected static ClassFunction[] classificationFunctions = {
// function 1
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
return ((age < 40) || (60 <= age)) ? 0 : 1;
}
},
// function 2
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
if (age < 40) {
return ((50000 <= salary) && (salary <= 100000)) ? 0
: 1;
} else if (age < 60) {// && age >= 40
return ((75000 <= salary) && (salary <= 125000)) ? 0
: 1;
} else {// age >= 60
return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
}
}
},
// function 3
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
if (age < 40) {
return ((elevel == 0) || (elevel == 1)) ? 0 : 1;
} else if (age < 60) { // && age >= 40
return ((elevel == 1) || (elevel == 2) || (elevel == 3)) ? 0
: 1;
} else { // age >= 60
return ((elevel == 2) || (elevel == 3) || (elevel == 4)) ? 0
: 1;
}
}
},
// function 4
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
if (age < 40) {
if ((elevel == 0) || (elevel == 1)) {
return ((25000 <= salary) && (salary <= 75000)) ? 0
: 1;
}
return ((50000 <= salary) && (salary <= 100000)) ? 0
: 1;
} else if (age < 60) {// && age >= 40
if ((elevel == 1) || (elevel == 2) || (elevel == 3)) {
return ((50000 <= salary) && (salary <= 100000)) ? 0
: 1;
}
return ((75000 <= salary) && (salary <= 125000)) ? 0
: 1;
} else {// age >= 60
if ((elevel == 2) || (elevel == 3) || (elevel == 4)) {
return ((50000 <= salary) && (salary <= 100000)) ? 0
: 1;
}
return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
}
}
},
// function 5
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
if (age < 40) {
if ((50000 <= salary) && (salary <= 100000)) {
return ((100000 <= loan) && (loan <= 300000)) ? 0
: 1;
}
return ((200000 <= loan) && (loan <= 400000)) ? 0 : 1;
} else if (age < 60) {// && age >= 40
if ((75000 <= salary) && (salary <= 125000)) {
return ((200000 <= loan) && (loan <= 400000)) ? 0
: 1;
}
return ((300000 <= loan) && (loan <= 500000)) ? 0 : 1;
} else {// age >= 60
if ((25000 <= salary) && (salary <= 75000)) {
return ((300000 <= loan) && (loan <= 500000)) ? 0
: 1;
}
return ((100000 <= loan) && (loan <= 300000)) ? 0 : 1;
}
}
},
// function 6
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
double totalSalary = salary + commission;
if (age < 40) {
return ((50000 <= totalSalary) && (totalSalary <= 100000)) ? 0
: 1;
} else if (age < 60) {// && age >= 40
return ((75000 <= totalSalary) && (totalSalary <= 125000)) ? 0
: 1;
} else {// age >= 60
return ((25000 <= totalSalary) && (totalSalary <= 75000)) ? 0
: 1;
}
}
},
// function 7
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
double disposable = (2.0 * (salary + commission) / 3.0
- loan / 5.0 - 20000.0);
return disposable > 0 ? 0 : 1;
}
},
// function 8
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
double disposable = (2.0 * (salary + commission) / 3.0
- 5000.0 * elevel - 20000.0);
return disposable > 0 ? 0 : 1;
}
},
// function 9
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
double disposable = (2.0 * (salary + commission) / 3.0
- 5000.0 * elevel - loan / 5.0 - 10000.0);
return disposable > 0 ? 0 : 1;
}
},
// function 10
new ClassFunction() {
public int determineClass(double salary, double commission,
int age, int elevel, int car, int zipcode,
double hvalue, int hyears, double loan) {
double equity = 0.0;
if (hyears >= 20) {
equity = hvalue * (hyears - 20.0) / 10.0;
}
double disposable = (2.0 * (salary + commission) / 3.0
- 5000.0 * elevel + equity / 5.0 - 10000.0);
return disposable > 0 ? 0 : 1;
}
}};
protected InstancesHeader streamHeader;
protected Random instanceRandom;
protected boolean nextClassShouldBeZero;
@Override
protected void prepareForUseImpl(TaskMonitor monitor,
ObjectRepository repository) {
// generate header
FastVector attributes = new FastVector();
attributes.addElement(new Attribute("salary"));
attributes.addElement(new Attribute("commission"));
attributes.addElement(new Attribute("age"));
FastVector elevelLabels = new FastVector();
for (int i = 0; i < 5; i++) {
elevelLabels.addElement("level" + i);
}
attributes.addElement(new Attribute("elevel", elevelLabels));
FastVector carLabels = new FastVector();
for (int i = 0; i < 20; i++) {
carLabels.addElement("car" + (i + 1));
}
attributes.addElement(new Attribute("car", carLabels));
FastVector zipCodeLabels = new FastVector();
for (int i = 0; i < 9; i++) {
zipCodeLabels.addElement("zipcode" + (i + 1));
}
attributes.addElement(new Attribute("zipcode", zipCodeLabels));
attributes.addElement(new Attribute("hvalue"));
attributes.addElement(new Attribute("hyears"));
attributes.addElement(new Attribute("loan"));
FastVector classLabels = new FastVector();
classLabels.addElement("groupA");
classLabels.addElement("groupB");
attributes.addElement(new Attribute("class", classLabels));
this.streamHeader = new InstancesHeader(new Instances(
getCLICreationString(InstanceStream.class), attributes, 0));
this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
restart();
}
public long estimatedRemainingInstances() {
return -1;
}
public InstancesHeader getHeader() {
return this.streamHeader;
}
public boolean hasMoreInstances() {
return true;
}
public boolean isRestartable() {
return true;
}
public Instance nextInstance() {
double salary = 0, commission = 0, hvalue = 0, loan = 0;
int age = 0, elevel = 0, car = 0, zipcode = 0, hyears = 0, group = 0;
boolean desiredClassFound = false;
while (!desiredClassFound) {
// generate attributes
salary = 20000.0 + 130000.0 * this.instanceRandom.nextDouble();
commission = (salary >= 75000.0) ? 0
: (10000.0 + 65000.0 * this.instanceRandom.nextDouble());
// true to c implementation:
// if (instanceRandom.nextDouble() < 0.5 && salary < 75000.0)
// commission = 10000.0 + 65000.0 * instanceRandom.nextDouble();
age = 20 + this.instanceRandom.nextInt(61);
elevel = this.instanceRandom.nextInt(5);
car = this.instanceRandom.nextInt(20);
zipcode = this.instanceRandom.nextInt(9);
hvalue = (9.0 - zipcode) * 100000.0
* (0.5 + this.instanceRandom.nextDouble());
hyears = 1 + this.instanceRandom.nextInt(30);
loan = this.instanceRandom.nextDouble() * 500000.0;
// determine class
group = classificationFunctions[this.functionOption.getValue() - 1]
.determineClass(salary, commission, age, elevel, car,
zipcode, hvalue, hyears, loan);
if (!this.balanceClassesOption.isSet()) {
desiredClassFound = true;
} else {
// balance the classes
if ((this.nextClassShouldBeZero && (group == 0))
|| (!this.nextClassShouldBeZero && (group == 1))) {
desiredClassFound = true;
this.nextClassShouldBeZero = !this.nextClassShouldBeZero;
} // else keep searching
}
}
// perturb values
if (this.peturbFractionOption.getValue() > 0.0) {
salary = perturbValue(salary, 20000, 150000);
if (commission > 0) {
commission = perturbValue(commission, 10000, 75000);
}
age = (int) Math.round(perturbValue(age, 20, 80));
hvalue = perturbValue(hvalue, (9.0 - zipcode) * 100000.0, 0, 135000);
hyears = (int) Math.round(perturbValue(hyears, 1, 30));
loan = perturbValue(loan, 0, 500000);
}
// construct instance
InstancesHeader header = getHeader();
Instance inst = new DenseInstance(header.numAttributes());
inst.setValue(0, salary);
inst.setValue(1, commission);
inst.setValue(2, age);
inst.setValue(3, elevel);
inst.setValue(4, car);
inst.setValue(5, zipcode);
inst.setValue(6, hvalue);
inst.setValue(7, hyears);
inst.setValue(8, loan);
inst.setDataset(header);
inst.setClassValue(group);
return inst;
}
protected double perturbValue(double val, double min, double max) {
return perturbValue(val, max - min, min, max);
}
protected double perturbValue(double val, double range, double min,
double max) {
val += range * (2.0 * (this.instanceRandom.nextDouble() - 0.5))
* this.peturbFractionOption.getValue();
if (val < min) {
val = min;
} else if (val > max) {
val = max;
}
return val;
}
public void restart() {
this.instanceRandom = new Random(this.instanceRandomSeedOption
.getValue());
this.nextClassShouldBeZero = false;
}
public void getDescription(StringBuilder sb, int indent) {
// TODO Auto-generated method stub
}
}