package tr.gov.ulakbim.jDenetX.experiments.wrappers;
/*
*
* Copyright (C) 2010 TÜBİTAK ULAKBİM, Ankara, Turkey
* @author caglar (caglar@ulakbim.gov.tr)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your ption) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
import tr.gov.ulakbim.jDenetX.classifiers.AbstractClassifier;
import tr.gov.ulakbim.jDenetX.classifiers.Classifier;
import tr.gov.ulakbim.jDenetX.classifiers.SelfOzaBoostID;
import tr.gov.ulakbim.jDenetX.core.VotedInstance;
import tr.gov.ulakbim.jDenetX.core.VotedInstancePool;
import tr.gov.ulakbim.jDenetX.evaluation.BasicClassificationPerformanceEvaluator;
import tr.gov.ulakbim.jDenetX.evaluation.ClassificationPerformanceEvaluator;
import tr.gov.ulakbim.jDenetX.evaluation.LearningEvaluation;
import tr.gov.ulakbim.jDenetX.options.ClassOption;
import tr.gov.ulakbim.jDenetX.options.IntOption;
import tr.gov.ulakbim.jDenetX.streams.CachedInstancesStream;
import tr.gov.ulakbim.jDenetX.streams.InstanceStream;
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.SimpleKMeans;
import weka.clusterers.XMeans;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import java.util.*;
class EvalActiveBoostingID {
public String getPurposeString() {
return "Evaluates a Cotrain model on a stream.";
}
/**
* Generated Serial ID:
*/
private static final long serialVersionUID = 1586815404797353243L;
private int MaxInstances = 1000000;
private static final int INSTANCES_BETWEEN_MONITOR_UPDATES = 10;
public ClassOption modelOption = new ClassOption("model", 'm',
"Classifier to evaluate.", Classifier.class, "LearnClusterBaggingModel");
public ClassOption testStreamOption = new ClassOption("test_stream",
't', "test on a Stream to evaluate on.", InstanceStream.class,
"generators.RandomTreeGenerator");
public ClassOption streamOption = new ClassOption("stream", 's',
"Stream to evaluate on.", InstanceStream.class,
"generators.RandomTreeGenerator");
public ClassOption evaluatorOption = new ClassOption("evaluator", 'e',
"Classification performance evaluation method.",
ClassificationPerformanceEvaluator.class,
"BasicClassificationPerformanceEvaluator");
public IntOption maxInstancesOption = new IntOption("maxInstances", 'i',
"Maximum number of instances to test.", 1000000, 0,
Integer.MAX_VALUE);
public IntOption poolSizeOption = new IntOption("poolRatio", 'p',
"Maximum amount ratio for pool size.", 1000, 0,
Integer.MAX_VALUE);
private static final int SAMPLING_LIMIT = 10;
private static SelfOzaBoostID model;
private static int noOfClassesInPool = 1;
public EvalActiveBoostingID() {}
public EvalActiveBoostingID (Classifier model, InstanceStream stream,
ClassificationPerformanceEvaluator evaluator, int maxInstances) {
this.modelOption.setCurrentObject(model);
this.streamOption.setCurrentObject(stream);
this.evaluatorOption.setCurrentObject(evaluator);
this.maxInstancesOption.setValue(maxInstances);
}
public Class<?> getTaskResultType() {
return LearningEvaluation.class;
}
public int getMaxInstances() {
return MaxInstances;
}
public void setMaxInstances(int maxInstances) {
MaxInstances = maxInstances;
}
protected int selfTest(InstanceStream testStream) {
int returnStatus = 1;
Instance testInst = null;
int maxInstances = this.maxInstancesOption.getValue();
long instancesProcessed = 0;
//InstanceStream testStream = (InstanceStream) getPreparedClassOption(this.testStreamOption);
ClassificationPerformanceEvaluator evaluator = new BasicClassificationPerformanceEvaluator();
evaluator.reset();
while (testStream.hasMoreInstances()
&& ((maxInstances < 0) || (instancesProcessed < maxInstances))) {
testInst = (Instance) testStream.nextInstance().copy();
int trueClass = (int) testInst.classValue();
testInst.setClassMissing();
double[] prediction = model.getVotesForInstance(testInst);
evaluator.addClassificationAttempt(trueClass, prediction, testInst
.weight());
instancesProcessed++;
if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
long estimatedRemainingInstances = testStream
.estimatedRemainingInstances();
if (maxInstances > 0) {
long maxRemaining = maxInstances - instancesProcessed;
if ((estimatedRemainingInstances < 0)
|| (maxRemaining < estimatedRemainingInstances)) {
estimatedRemainingInstances = maxRemaining;
}
}
System.out.println(estimatedRemainingInstances < 0 ? -1.0
: (double) instancesProcessed
/ (double) (instancesProcessed + estimatedRemainingInstances));
}
}
return returnStatus;
}
protected void selfTrain(Instance testInst) {
int maxInstances = this.maxInstancesOption.getValue();
int poolSizeRatio = poolSizeOption.getValue();
int poolLimit = maxInstances / poolSizeRatio;
int poolCount = 0;
VotedInstancePool vInstPool = SelfOzaBoostID.getVotedInstancePool();
noOfClassesInPool = vInstPool.getNoOfClasses();
System.out.println("No of instances in the pool: " + vInstPool.getSize());
System.out.println("No of classes in the pool: " + noOfClassesInPool);
if (vInstPool.getSize() > 10) {
ArrayList<Attribute> attrs = new ArrayList<Attribute>();
for (int i = 0; i < testInst.numAttributes(); i++) {
attrs.add(testInst.attribute(i));
}
Instances instances = new Instances("instances", attrs, vInstPool.getSize());
Iterator instanceIt = vInstPool.iterator();
System.out.println("Size of pool: " + vInstPool.getSize());
while (instanceIt.hasNext() && poolCount < poolLimit) {
VotedInstance vInstance = (VotedInstance) instanceIt.next();
((Instances) instances).add(vInstance.getInstance());
poolCount++;
}
System.out.println("Size of instances: " + instances.size());
instances = clusterInstances(instances);
InstanceStream activeStream = new CachedInstancesStream((Instances) instances);
System.out.println("Selftraining have been started");
System.out.println("Number of self training instances: " + instances.numInstances());
long treeSize = vInstPool.getSize();
long limit = treeSize / SAMPLING_LIMIT;
Instance inst = null;
for (long j = 0; j < limit && activeStream.hasMoreInstances(); j++) {
inst = activeStream.nextInstance();
if (inst.numAttributes() == attrs.size()) {
model.trainOnInstance(inst);
}
}
}
}
public LearningEvaluation evalModel(InstanceStream trainStream, InstanceStream testStream, AbstractClassifier model) {
model = new SelfOzaBoostID();
InstanceStream stream = (InstanceStream)trainStream.copy();
ClassificationPerformanceEvaluator evaluator = new BasicClassificationPerformanceEvaluator();
Instance testInst = null;
int maxInstances = this.maxInstancesOption.getValue();
long instancesProcessed = 0;
System.out.println("Evaluating model...");
while (stream.hasMoreInstances()
&& ((maxInstances < 0) || (instancesProcessed < maxInstances))) {
testInst = (Instance) stream.nextInstance().copy();
int trueClass = (int) testInst.classValue();
testInst.setClassMissing();
double[] prediction = model.getVotesForInstance(testInst);
evaluator.addClassificationAttempt(trueClass, prediction, testInst
.weight());
instancesProcessed++;
if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
long estimatedRemainingInstances = stream
.estimatedRemainingInstances();
if (maxInstances > 0) {
long maxRemaining = maxInstances - instancesProcessed;
if ((estimatedRemainingInstances < 0)
|| (maxRemaining < estimatedRemainingInstances)) {
estimatedRemainingInstances = maxRemaining;
}
}
System.out.println(estimatedRemainingInstances < 0 ? -1.0
: (double) instancesProcessed
/ (double) (instancesProcessed + estimatedRemainingInstances));
}
}
System.out.println("Accuracy result before self-train: " + evaluator.getPerformanceMeasurements()[1]);
selfTrain(testInst);
int returnStatus = selfTest(testStream);
EvalActiveBoostingID.model.resetLearningImpl(); //Learning is completed so we can reset
return new LearningEvaluation(evaluator.getPerformanceMeasurements());
}
public Instances clusteredInstances(Instances data) {
if (data == null) {
throw new NullPointerException("Data is null at clusteredInstances method");
}
Instances sampled_data = data;
for (int i = 0; i < sampled_data.numInstances(); i++) {
sampled_data.remove(i);
}
SimpleKMeans sKmeans = new SimpleKMeans();
data.setClassIndex(data.numAttributes() - 1);
Remove filter = new Remove();
filter.setAttributeIndices("" + (data.classIndex() + 1));
List assignments = new ArrayList();
try {
filter.setInputFormat(data);
Instances dataClusterer = Filter.useFilter(data, filter);
String[] options = new String[3];
options[0] = "-I"; // max. iterations
options[1] = "500";
options[2] = "-O";
sKmeans.setNumClusters(data.numClasses());
sKmeans.setOptions(options);
sKmeans.buildClusterer(dataClusterer);
System.out.println("Kmeans\n:" + sKmeans);
System.out.println(Arrays.toString(sKmeans.getAssignments()));
assignments = Arrays.asList(sKmeans.getAssignments());
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Assignments\n: " + assignments);
ClusterEvaluation eval = new ClusterEvaluation();
eval.setClusterer(sKmeans);
try {
eval.evaluateClusterer(data);
} catch (Exception e) {
e.printStackTrace();
}
int classesToClustersMap[] = eval.getClassesToClusters();
for (int i = 0; i < classesToClustersMap.length; i++) {
if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) {
((Instances) sampled_data).add(data.get(i));
}
}
return ((Instances) sampled_data);
}
public static Instances clusterInstances(Instances data) {
XMeans xmeans = new XMeans();
Remove filter = new Remove();
Instances dataClusterer = null;
if (data == null) {
throw new NullPointerException("Data is null at clusteredInstances method");
}
//Get the attributes from the data for creating the sampled_data object
ArrayList<Attribute> attrList = new ArrayList<Attribute>();
Enumeration attributes = data.enumerateAttributes();
while (attributes.hasMoreElements()) {
attrList.add((Attribute) attributes.nextElement());
}
Instances sampled_data = new Instances(data.relationName(), attrList, 0);
data.setClassIndex(data.numAttributes() - 1);
sampled_data.setClassIndex(data.numAttributes() - 1);
filter.setAttributeIndices("" + (data.classIndex() + 1));
data.remove(0);//In Wavelet Stream of MOA always the first element comes without class
try {
filter.setInputFormat(data);
dataClusterer = Filter.useFilter(data, filter);
String[] options = new String[4];
options[0] = "-L"; // max. iterations
options[1] = Integer.toString(noOfClassesInPool - 1);
if (noOfClassesInPool > 2) {
options[1] = Integer.toString(noOfClassesInPool - 1);
xmeans.setMinNumClusters(noOfClassesInPool - 1);
} else {
options[1] = Integer.toString(noOfClassesInPool);
xmeans.setMinNumClusters(noOfClassesInPool);
}
xmeans.setMaxNumClusters(data.numClasses() + 1);
System.out.println("No of classes in the pool: " + noOfClassesInPool);
xmeans.setUseKDTree(true);
//xmeans.setOptions(options);
xmeans.buildClusterer(dataClusterer);
System.out.println("Xmeans\n:" + xmeans);
} catch (Exception e) {
e.printStackTrace();
}
//System.out.println("Assignments\n: " + assignments);
ClusterEvaluation eval = new ClusterEvaluation();
eval.setClusterer(xmeans);
try {
eval.evaluateClusterer(data);
int classesToClustersMap[] = eval.getClassesToClusters();
//check the classes to cluster map
int clusterNo = 0;
for (int i = 0; i < data.size(); i++) {
clusterNo = xmeans.clusterInstance(dataClusterer.get(i));
//Check if the class value of instance and class value of cluster matches
if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) {
sampled_data.add(data.get(i));
}
}
} catch (Exception e) {
e.printStackTrace();
}
return ((Instances) sampled_data);
}
}