/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.ensemble.adaboost;
import java.util.ArrayList;
import org.encog.ensemble.Ensemble;
import org.encog.ensemble.EnsembleAggregator;
import org.encog.ensemble.EnsembleML;
import org.encog.ensemble.EnsembleMLMethodFactory;
import org.encog.ensemble.EnsembleTrainFactory;
import org.encog.ensemble.EnsembleTypes;
import org.encog.ensemble.EnsembleTypes.ProblemType;
import org.encog.ensemble.EnsembleWeightedAggregator;
import org.encog.ensemble.GenericEnsembleML;
import org.encog.ensemble.data.EnsembleDataSet;
import org.encog.ensemble.data.factories.ResamplingDataSetFactory;
import org.encog.mathutil.VectorAlgebra;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.MLDataSet;
import org.encog.ml.train.MLTrain;
public class AdaBoost extends Ensemble {
private int T;
private VectorAlgebra va;
private ArrayList<Double> weights;
private ArrayList<Double> D;
private EnsembleWeightedAggregator weightedAggregator;
public AdaBoost(int iterationsT, int dataSetSize, EnsembleMLMethodFactory mlFactory, EnsembleTrainFactory trainFactory, EnsembleWeightedAggregator aggregator) {
this.dataSetFactory = new ResamplingDataSetFactory(dataSetSize);
this.T = iterationsT;
this.mlFactory = mlFactory;
this.va = new VectorAlgebra();
this.weights = new ArrayList<Double>();
this.members = new ArrayList<EnsembleML>();
this.trainFactory = trainFactory;
this.weightedAggregator = aggregator;
this.aggregator = aggregator;
this.D = new ArrayList<Double>();
}
private void createMember(double targetAccuracy, double selectionError, int maxIterations, int maxLoops, EnsembleDataSet testset, boolean verbose) throws TrainingAborted {
dataSetFactory.setSignificance(D);
MLDataSet thisSet = dataSetFactory.getNewDataSet();
GenericEnsembleML newML = new GenericEnsembleML(mlFactory.createML(dataSetFactory.getInputData().getInputSize(), dataSetFactory.getInputData().getIdealSize()),mlFactory.getLabel());
int attempts = 0;
do {
mlFactory.reInit(newML.getMl());
MLTrain train = trainFactory.getTraining(newML.getMl(), thisSet);
newML.setTraining(train);
newML.train(targetAccuracy, maxIterations, verbose);
attempts++;
if(attempts >= maxLoops)
{
throw new TrainingAborted("Max retraining iterations reached");
}
} while (newML.getError(testset) > selectionError);
double newWeight = getWeightedError(newML,thisSet);
members.add(newML);
weights.add(newWeight);
weightedAggregator.setWeights(weights);
D = updateD(newML,dataSetFactory.getDataSource(),D);
}
public void resize(int newSize, double targetAccuracy, double selectionError, int maxIterations, int maxLoops, EnsembleDataSet testset, boolean verbose) throws TrainingAborted {
if (newSize > T) {
for (int i = T; i < newSize; i++) {
createMember(targetAccuracy, selectionError, maxIterations, maxLoops, testset, verbose);
}
}
else if (newSize < T) {
for (int i = T; i > newSize; i--) {
members.remove(i);
}
}
T = newSize;
}
@Override
public void train(double targetAccuracy, double selectionError, int maxIterations, int maxLoops, EnsembleDataSet testset, boolean verbose) throws TrainingAborted {
for (int i = 0; i < T; i++) {
createMember(targetAccuracy, selectionError, maxIterations, maxLoops, testset, verbose);
}
}
private double epsilon(GenericEnsembleML ml, MLDataSet dataSet) {
int bad = 0;
for (MLDataPair data: dataSet) {
if (ml.classify(data.getInput()) != ml.winner(data.getIdeal()))
bad++;
}
return (float) bad / (float) dataSet.size();
}
private ArrayList<Double> updateD(GenericEnsembleML ml, MLDataSet dataSet, ArrayList<Double> D_t) {
ArrayList<Double> D_tplus1 = new ArrayList<Double>();
double epsilon = epsilon(ml, dataSet);
double alpha_t = Math.log((1 - epsilon) / epsilon);
for (int i = 0; i < dataSet.size(); i++) {
double D_tplus1_i = D_t.get(i) * Math.exp(-alpha_t * va.dotProduct(dataSet.get(i).getIdeal().getData(), ml.compute(dataSet.get(i).getInput()).getData()));
D_tplus1.add(D_tplus1_i);
}
return D_tplus1;
}
@Override
public void initMembers() {
int dss = dataSetFactory.getDataSourceSize();
for (int k = 0; k < dss; k++)
{
D.add(1.0 / (float) dss);
}
}
private double getWeightedError(GenericEnsembleML newML, MLDataSet dataSet) {
double sum = 0;
for (int i = 0; i < dataSet.size(); i++) {
MLDataPair currentData = dataSet.get(i);
if (newML.classify(currentData.getInput()) == newML.winner(currentData.getIdeal()))
sum += currentData.getSignificance();
}
return sum;
}
@Override
public void addMember(EnsembleML newMember) throws NotPossibleInThisMethod {
throw new NotPossibleInThisMethod();
}
@Override
public ProblemType getProblemType() {
return EnsembleTypes.ProblemType.CLASSIFICATION;
}
}