/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. Sánchez (luciano@uniovi.es) J. Alcalá-Fdez (jalcala@decsai.ugr.es) S. García (sglopez@ujaen.es) A. Fernández (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Mikel Galar Idoate (Universidad Pública de Navarra) 30/5/2010 * @author Modified by Sarah Vluymans (University of Ghent) 29/01/2014 * @author Modified by Alberto Fernandez (University of Jaen) 28/05/2014 * @version 0.1 * @since JDK 1.5 *</p> */ package keel.Algorithms.ImbalancedClassification.Ensembles; import java.io.File; import keel.Algorithms.ImbalancedClassification.Ensembles.SMOTE.MSMOTE; import keel.Algorithms.ImbalancedClassification.Ensembles.SMOTE.SMOTE; import keel.Algorithms.ImbalancedClassification.Ensembles.SPIDER.SPIDER; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import keel.Algorithms.ImbalancedClassification.Ensembles.Preprocess.Basic.Metodo; import org.core.Files; import org.core.Randomize; import keel.Algorithms.ImbalancedClassification.Auxiliar.AUC.PredPair; import keel.Algorithms.ImbalancedClassification.Ensembles.Preprocess.Instance_Selection.EUSCHCQstat.EUSCHCQstat; /** * <p>Title: Ensemble</p> * <p>Description: Class to implement the different ensemble methods for class imbalance problems * <p>Company: KEEL </p> * @author Created by Mikel Galar Idoate (UPNA) [30-05-10] * @author Modified by Alberto Fernandez (University of Jaen) 15/10/2012 * @author Modified by Sarah Vluymans (University of Ghent) * @version 1.2 * @since JDK1.6 */ class Ensemble { /* Parameters used by the ensemble */ String ensembleType; String trainMethod; int nClassifier; /* Iteration counter */ int t; /* For Cost-Sensitive */ /* Wether the costs are setled manually or they are configured depending on the IR */ String costType; /* Costs for the majority and minority classes */ double CostMaj, CostMin; int lambda; //for adaboostNC /* References to the original and the actual (modified by adaboost's data-distribution change) datasets */ myDataset originalDS, actualDS; /* weights of the instances for adaboost and weight of each classifier in the voting*/ double[] weights, alfa, penalization; /* Backup of the weights prior to the preprocessing, needed for RUSBoost and SMOTEBoost */ double[] weightsBackup; /* Percentage (N%) of the instances from the mayority class used in RUSBoost * Quantity of balancing for SMOTEBoosting and MSMOTEBoosting if N > 100 (else, the classes are balanced * in SMOTEBagging and MSMOTEBagging the classes are always balanced */ int N; /* Number of instances */ int nData; /* Number of majority and minority examples and their corresponding integer value*/ int nMaj, nMin; int majC, minC; /* Number of bags for EasyEnsemble and BalanceCascade techniques */ int nBags; /* Number of boosting iterations for EasyEnsemble and BalanceCascade */ int nBoostIterations; /* Theta values used in BalanceCascade for eliminating instances from the data-set after completing a bag */ double[] teta; /* Array to store the indexes of the previous data-set in the new one */ int[] selected; /* b parameters used in UnderOverBagging */ //ATENCION CREO QUE A NO SE USA int b; //UNDEROVERBAGGING and SMOTE/MSMOTEBagging /* Type of SPIDER preprocssing WEAK / RELABEL / STRONG */ String spiderType; /* Out-of-Bag estimation */ double e; /* ArrayList containg wether an instance was used to train a classifier of the ensemble or not * it is used in the computation of the Out-of-Bag error estimation */ ArrayList<boolean[]> trainingSetsOOB; /* Array for IIVOTES, it stores wether an instance in the out-of-bag estimation was correctly predicted or not * (if the instance was used for training, the prediction is false */ boolean[] predictions; /* Wether the preparation of the data-set is needed or not */ boolean prepareDSNeeded; /* Wether to develop a resampling in boosting procedures or to use the weights to train each tree */ boolean resampling; /* Reference to the base classifiers */ multi_C45 classifier; boolean[][] anteriores; boolean[][] salidasAnteriores; int nClassifierCounter; double[] pairwiseKappa; double[] errorMeanAUC, errorAUC; int[][] outputs; int pos; /** Constructor of the ensemble * * @param originalDS original data-set to perform the ensemble learning * @param nClassifier number of maximum classifiers in to boost * @param classifier the reference to the base classifiers */ public Ensemble(String ensembleType, myDataset originalDS, int nClassifier, int lambda, multi_C45 classifier) { /* Reading the ensemble type and the original data-set */ this.ensembleType = ensembleType; this.originalDS = originalDS; /* Initialization of helper variables */ nData = originalDS.getnData(); majC = originalDS.claseNumerica(originalDS.claseMasFrecuente()); nMaj = originalDS.numberInstances(majC); minC = majC == 0 ? 1 : 0; nMin = originalDS.numberInstances(minC); /* Initialize the weightes uniformly */ weights = new double[nData]; penalization = new double[nData]; originalDS.computeIR(); double max = 0; for (int i = 0; i < nData; i++){ weights[i] = 1.0 / (float)nData; } /* Initialize the data-set */ actualDS = originalDS; this.nClassifier = nClassifier; /* First iteration*/ t = 0; alfa = new double[nClassifier]; this.classifier = classifier; prepareDSNeeded = false; /* For KappaAUC diagrams */ pairwiseKappa = new double[nClassifier * (nClassifier - 1) / 2]; errorMeanAUC = new double[nClassifier * (nClassifier - 1) / 2]; errorAUC = new double[nClassifier]; outputs = new int[nClassifier][this.classifier.test.getnData()]; pos = 0; this.lambda = lambda; /************************************************************************* * Read the configuration depending on the ensemble type * ************************************************************************* */ int nextParameter = 6; /* IIVotes ensemble, IVotes + SPIDER! */ if (ensembleType.equalsIgnoreCase("IIVOTES")) { /* The data-set has to be preprocessed each time with SPIDER */ prepareDSNeeded = true; /* Read the seed to initialize the randomization */ Randomize.setSeed(Long.parseLong(classifier.parameters.getParameter(0))); /* Read the type of SPIDER preprocessing */ spiderType = classifier.parameters.getParameter(nextParameter++); /* Set the initial out-of-bag estimation to 0.5 and initialize * the structures used in OOB estimation */ e = 0.5; trainingSetsOOB = new ArrayList<boolean[]>(); predictions = new boolean[nData]; /* This method does not require more parameters */ return; } /* Bagging-based ensembles */ if (ensembleType.contains("BAG")) { /* Read the seed to initialize the randomization to perform the bootstrapping */ Randomize.setSeed(Long.parseLong(classifier.parameters.getParameter(0))); /* Multiples for UnderOverbagging and SMOTE/MSMOTEBagging, as recommended by their authors */ b = 10; if (ensembleType.contains("EUNDER")) { anteriores = new boolean[this.nClassifier][]; salidasAnteriores = new boolean[this.nClassifier][]; } } /* Boosting-, and Hybrid-based ensembles */ else if (ensembleType.contains("ADA") || ensembleType.contains("BOOST") || ensembleType.equalsIgnoreCase("EASYENSEMBLE") || ensembleType.equalsIgnoreCase("BALANCECASCADE")) { /* Read the train method: * RESAMPLING: resampling to obtain the desired data distribution * NORESAMPLING: use the weights to construct each tree */ trainMethod = classifier.parameters.getParameter(nextParameter++); if (trainMethod.equalsIgnoreCase("RESAMPLING")) { resampling = true; /* If resampling is used, we need to prepare the data-set and to read the seed */ prepareDSNeeded = true; Randomize.setSeed(Long.parseLong(classifier.parameters.getParameter(0))); } /* AdaC2 ensemble, Cost-sensitive Adaboost (version 2 with costs inside the exponent part of AdaBoost */ if (ensembleType.equalsIgnoreCase("ADAC2")) { /* Configure the costs, adaptive or manual */ costType = classifier.parameters.getParameter(nextParameter++); if (costType.equalsIgnoreCase("ADAPTIVE")) // Adaptive costs { CostMaj = (double)nMin / (double)nMaj; CostMin = 1.0; } else // Manual costs { CostMaj = Float.parseFloat(classifier.parameters.getParameter(nextParameter++)); CostMin = Float.parseFloat(classifier.parameters.getParameter(nextParameter++)); } } /* RUSBoost, SMOTE/MSMOTEBoost, that is, Boosting with preprocessing */ if (ensembleType.contains("RUSBOOST") || ensembleType.contains("SMOTEBOOST")) { /* Read the percentage of the majority class instances in the new data-set */ N = Integer.parseInt(classifier.parameters.getParameter(nextParameter++)); prepareDSNeeded = true; Randomize.setSeed(Long.parseLong(classifier.parameters.getParameter(0))); if (ensembleType.contains("ERUS")) { System.out.println(nextParameter); anteriores = new boolean[this.nClassifier][]; salidasAnteriores = new boolean[this.nClassifier][]; } } /* DATABOOST-IM algorithm */ else if (ensembleType.equalsIgnoreCase("DATABOOST-IM")) prepareDSNeeded = true; /* Hybrid-based ensembles, EasyEnsemble and BalanceCascade, * its unique difference is that BalanceCascade removes some instances from the data-set each iteration */ else if (ensembleType.equalsIgnoreCase("EASYENSEMBLE") || ensembleType.equalsIgnoreCase("BALANCECASCADE")) { /* Preparation is neede */ prepareDSNeeded = true; Randomize.setSeed(Long.parseLong(classifier.parameters.getParameter(0))); /* Read the number of bags */ nBags = Integer.parseInt(classifier.parameters.getParameter(nextParameter++)); /* nClassifier is the number of boosting iterations in this case */ nBoostIterations = nClassifier; /* The final number of classifiers is the number of bags times the number of adaboost iterations used in each bag */ this.nClassifier = nClassifier * nBags; alfa = new double[this.nClassifier]; /* In BalanceCascade we need to initialize the thetas */ if (ensembleType.equalsIgnoreCase("BALANCECASCADE")) teta = new double[this.nClassifier]; /* For KappaAUC diagrams */ pairwiseKappa = new double[this.nClassifier * (this.nClassifier - 1) / 2]; errorMeanAUC = new double[this.nClassifier * (this.nClassifier - 1) / 2]; errorAUC = new double[this.nClassifier]; outputs = new int[this.nClassifier][this.classifier.test.getnData()]; } } } /** Method to perform the voting strategy * * @param example instance to be predicted * @return the predicted class and the weighted voting sum */ PredPair computeClassScores(double[] example) { double sum = 0; // Weighted voting sum double confidence = 1; // Initial confidence is 1, it is used for the ensembles not using classifiers' confidences. /* Each classifier votes */ for (int t = 0; t < nClassifier; t++) { /* if alfa is 0, the classifier has not been initialized yet */ if (alfa[t] != 0) { /* Ensembles which do not use the confidences */ if (!(ensembleType.equalsIgnoreCase("ADAC2") || ensembleType.equalsIgnoreCase("ADABOOST") || ensembleType.equalsIgnoreCase("EASYENSEMBLE") || ensembleType.equalsIgnoreCase("BALANCECASCADE") || ensembleType.equalsIgnoreCase("ADABOOST.M1") || ensembleType.equalsIgnoreCase("DATABOOST-IM"))) confidence = classifier.obtainConfidence(t, example); /* A positive or a negative vote given depending on the predicted class */ if (classifier.obtainClass(t, example) == 0) sum += confidence * alfa[t]; else sum -= confidence * alfa[t]; } /* The adjusted theta is used in BalanceCascade */ if (teta != null) sum -= teta[t]; } /* The output class is selected depending on the sign of the weighted voting */ if (sum >= 0) return new PredPair(originalDS.getOutputValue(0), sum); else return new PredPair(originalDS.getOutputValue(1), sum); } /** The next iteration of the ensemble is performed depending on the type of ensemble * * @return true if the ensemble construction is finished */ boolean nextIteration() { boolean fin = false; // Whether the ensembles is finished or not /* For boosting-, and hybrid-based ensembles, the weights are updated */ if (ensembleType.contains("ADA") || ensembleType.contains("BOOST") || ensembleType.contains("EASYENSEMBLE") || ensembleType.contains("BALANCECASCADE")) fin = modifyWeights(); /* Bagging-based ensembles always use alfa = 1 */ else if (ensembleType.contains("BAG")) alfa[t] = 1; /* IIVotes needs to estimate the Out-of-bag error */ if (ensembleType.contains("IIVOTES")) { alfa[t] = 1; // OUT-OF-BAG ESTIMATION OF e(i) double e_i = outOfBagEstimation(originalDS, predictions); // if e(i) >= e(i - 1) then fin = true System.out.println("OOB error before = " + e); System.out.println("OOB error = " + e_i); e_i = 0.75 * e + 0.25 * e_i; System.out.println("OOB error = " + e_i); if (e_i < e) e = e_i; else { fin = true; alfa[t] = 0; } } /* The iteration counter is increased */ t++; /* The errors of the actual ensemble are computed and shown */ double total = classifier.classify(actualDS); System.out.println("Train err = " + total); if (1 - total < 0.001) //99.9% accuracy in train fin = true; total = classifier.classify(originalDS); System.out.println("Train original err = " + total); total = classifier.classify(classifier.test); System.out.println("Test err = " + total); return fin; } void writeAUCError(String outputTst) { String cadena = ""; for (int i = 0; i < pairwiseKappa.length; i++) { cadena += pairwiseKappa[i] + ", " + errorMeanAUC[i] + "\n"; } String sal = outputTst; sal.lastIndexOf("5-1.tst"); sal = sal.substring(0, sal.length() - 7); Files.addToFile(sal + "_KappaError.txt", cadena); Files.writeFile(outputTst + "_KappaError", cadena); } double computeKappa(int[] v1, int[] v2) { double solucion; int[][] confusion; confusion = new int[2][2]; double[] Tr = new double[2]; double[] Tc = new double[2]; for (int i = 0; i < 2; i++){ for (int j = 0; j < 2; j++) confusion[i][j] = 0; Tr[i] = Tc[i] = 0; } for (int i = 0; i < v1.length; i++){ confusion[v1[i]][v2[i]]++; Tr[v1[i]]++; Tc[v2[i]]++; } double sumDiagonales = 0.0, sumTrTc = 0.0; for(int i = 0; i < 2; i++){ sumDiagonales += confusion[i][i]; sumTrTc += Tr[i] * Tc[i]; } solucion = ((v1.length * sumDiagonales - sumTrTc) / (v1.length * v1.length - sumTrTc)); if (Double.isNaN(solucion)) solucion = 1.0; return solucion; } /** Theta is adjusted for the corresponding bag of BalanceCascade, given that the adjustation algorithm * is not explained, we eliminate those instances from the majority class correctly classified * in the current bag (with the highest confidence) * * @param bagNumber The bag number for which theta has to be adjusted * @return instances which has been correctly predicted by the current bag */ private boolean[] adjustTheta(int bagNumber) { boolean[] aciertos = new boolean[originalDS.getnData()]; double f = Math.pow((double)originalDS.numberInstances(minC) / (double)originalDS.numberInstances(majC), 1.0 / ((double)nBoostIterations - 1.0)); final Integer[] indexes = new Integer[originalDS.getnData()]; final double[] outputs = new double[originalDS.getnData()]; for (int i = 0; i < originalDS.getnData(); i++) { double[] example = originalDS.getExample(i); double sum = 0; double confidence = 1; for (int t = bagNumber * nBoostIterations; t < (bagNumber + 1) * nBoostIterations; t++) { if (alfa[t] != 0) { if (classifier.obtainClass(t, example) == 0) sum += confidence * alfa[t]; else sum -= confidence * alfa[t]; } } outputs[i] = sum; indexes[i] = i; } Arrays.sort(indexes, new Comparator<Integer>() { @Override public int compare(final Integer o1, final Integer o2) { return Double.compare(outputs[o2], outputs[o1]); } }); double FPrate = fprate(outputs, 0, aciertos); System.out.println(f + " == " +FPrate); return aciertos; } /** Computes the FPrate * * @param outputs the outputs of each instance using the current bag * @param teta theta parameter * @param corrects wether the instance has been correctly classified or not * @return FPrate */ double fprate(double[] outputs, double teta, boolean[] corrects) { double TP = 0, FP = 0, FN = 0, TN = 0; for (int i = 0; i < originalDS.getnData(); i++) { int c = (outputs[i] - teta >= 0 ? 0 : 1); int cReal = originalDS.getOutputAsInteger(i); if (c == cReal) corrects[i] = true; else corrects[i] = false; if (c == cReal && cReal == majC) TN++; else if (c == cReal && cReal != majC) TP++; else if (c != cReal && cReal == majC) FP++; else FN++; } return FP / (FP + TN); } /** Preparation of the data-set for Hybrid-based methods * The data-set is only prepared nBag times, that is, when t % nBoostIterations == 0 * For BalanceCascade, the theta has to be adjusted and the corresponding instances are eliminated */ private void prepareDatasetEasyEnsembleBalanceCascade() { /* Solo se prepara el dataset cuando nBag veces, para cascade, cuando termina * Hay que ajustar teta y eliminar del original */ if (t % nBoostIterations == 0) { if(ensembleType.equalsIgnoreCase("BALANCECASCADE") && t > 0) { /* If t > 0, adjust theta such that FPrate = FP / (Fp + TN) = f = \sqrt{T-1}{nMin/nMaj} * The adjust procedure is not well-explained in the paper, so we delete * the Majority class that have been correctly classified by the current classifier (only those that were used in training) */ boolean[] correct = adjustTheta(t / nBoostIterations - 1); // Delete the examples from the Majority class originalDS.deleteExamples(correct, selected, minC); nData = originalDS.getnData(); } // Create the actualDS with RandomUnderSampling of the majority class (50%) actualDS = new myDataset(originalDS); selected = actualDS.randomUnderSampling(originalDS, majC, 50); //N% of the total will be from the majority class /* The weights are uniformly initialized for the adaboost ensemble */ weights = new double[actualDS.size()]; for (int i = 0; i < actualDS.size(); i++) weights[i] = 1.0 / (float)actualDS.size(); } } /** * Creates a configuration file for the EUS-CHC approach. Qstat + GM approach * @param filename */ private void createConf(String filename){ String output = new String("algorithm = IS Methods\n"); output += "inputData = \"training2.txt\" \"training2.txt\" \"tst.dat\"\n" + "outputData = \"training.txt\" \"tstOutput.dat\"\n\n" + "Seed = 564545456\n" + "Population Size = 50\n" + "Number of Evaluations = 10000\n" + "Percentage of Change in restart = 0.35\n" + "0-1 in restart = 0.25\n" + "0-1 in diverge = 0.25\n" + "wrapper = k-NN\n" + "Number of Neighbors = 1\n" + "Distance Function = Euclidean\n" + "evMeasure = geometric mean\n" + "majSelection = majority_selection\n" + "EBUS = EBUS\n" + "P = 0.2\n" + "hybrid = NO smote + eus\n" + "kSMOTE = 5\n" + "ASMO = both\n" + "balance = YES\n" + "smoting = 1\n"; Files.writeFile(filename, output); } /** Preparation of the data-set for RUSBoost * The data-set is resampled and the weight distribution is changed * in order to form a distribution with the remaining weights */ private void prepareDatasetRUSBoost() { // Create the actualDS with RandomUnderSampling (N%, usually 50%) of the majority class if (ensembleType.equalsIgnoreCase("ERUSBOOST")) { Files.writeFile(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "training2.txt", originalDS.printDataSet()); Metodo m = null; createConf("EUB_M_GMConf.txt"); m = new EUSCHCQstat("EUB_M_GMConf.txt"); EUSCHCQstat m2 = (EUSCHCQstat)m; m2.setAnteriores(anteriores); m2.setSalidasAnteriores(salidasAnteriores); m.runAlgorithm(); m.run(); try { // originalDS.getIS().setAttributesAsNonStatic(); /* Read the preprocessed data-set */ actualDS = new myDataset(); actualDS.readClassificationSet(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "training.txt", false); }catch (IOException e) { System.err.println("There was a problem while reading the input preprocessed data-sets: " + e); } File f = new File(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "training.txt"); File f2 = new File(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "training2.txt"); f.delete(); f2.delete(); m2 = (EUSCHCQstat)m; anteriores[t] = m2.getBest().clone(); salidasAnteriores[t] = m2.getBestOutputs().clone(); m = null; selected = new int[actualDS.getnData()]; Arrays.fill(selected, -1); boolean[] aux = new boolean[originalDS.getnData()]; Arrays.fill(aux, false); for (int i = 0; i < actualDS.getnData(); i++) { double[] ej1 = actualDS.getExample(i); for (int j = 0; j < originalDS.getnData(); j++) { if (aux[j] == true) continue; double[] ej2 = originalDS.getExample(j); boolean fin = false; for (int k = 0; k < ej1.length && !fin; k++) { if (ej1[k] != ej2[k]) fin = true; } if (fin == false) { selected[i] = j; aux[j] = true; } } } }else { actualDS = new myDataset(originalDS); selected = actualDS.randomUnderSampling(originalDS, majC, N); //N% of the total will be from the majority class // The original weights are stored and the new weights are recalculated // Selected has the indexes of the instances from the previous data-set in the new one } weightsBackup = weights.clone(); weights = new double[selected.length]; double Z = 0; for (int i = 0; i < selected.length; i++) { if (selected[i] != -1) weights[i] = weightsBackup[selected[i]]; else weights[i] = 1.0 / (double)actualDS.getnData(); Z += weights[i]; } for (int i = 0; i < selected.length; i++) weights[i] /= Z; } /** Preparation of the data-set for SMOTEBoost and MSMOTEBoost * First, the data-set is preprocessed with SMOTE or MSMOTE and then * the weight distribution is changed in order to form a distribution with the new instances */ private void prepareDatasetSMOTEBoost() { System.out.println("Applying Preprocessing...[" + t + "]"); actualDS = null; if (ensembleType.contains("MSMOTE")) { originalDS.getIS().setAttributesAsNonStatic(); // MSMOTE configuration, the seed, kClean = 3, k = 5 or nMin, use both classes, balance, quantity of balancing, and distance = HVDM MSMOTE preprocess = new MSMOTE(originalDS.getIS(), Math.round(Randomize.Randdouble(0, 12345678.0)), 3, this.nMin > 5 ? 5 : nMin, 0, true, N < 100 ? 1.0 : (double)N / 100.0, "HVDM"); preprocess.ejecutar(); preprocess = null; } else { // SMOTE configuration, the seed, k = 5 or nMin, use both classes, balance, quantity of balancing, and distance = HVDM SMOTE preprocess = new SMOTE(originalDS.getIS(), Math.round(Randomize.Randdouble(0, 12345678.0)), this.nMin > 5 ? 5 : nMin, 0, true, N < 100 ? 1.0 : (double)N / 100.0, "HVDM"); preprocess.ejecutar(); preprocess = null; } try { /* Read the preprocessed data-set */ actualDS = new myDataset(); actualDS.readClassificationSet(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra", false); }catch (IOException e) { System.err.println("There was a problem while reading the input preprocessed data-sets: " + e); } File f = new File(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra"); f.delete(); /* Store the original weights */ weightsBackup = weights.clone(); /* Recompute the weights with the new instances */ weights = new double[actualDS.getnData()]; for (int i = 0; i < actualDS.getnData(); i++) { if (i < nData) // old ones weights[i] = weightsBackup[i] * (double)nData / (double)actualDS.getnData(); else // new ones weights[i] = 1.0 / (double)actualDS.getnData(); } } /** Preparation of the data-set for IIVotes (with SPIDER preprocessing) * First, the data-set is preprocessed with SMOTE or MSMOTE and then * the weight distribution is changed in order to form a distribution with the new instances */ private void prepareDatasetSPIDER() { System.out.println("Applying Preprocessing...[" + t + "]"); actualDS = null; /* The new data-set is formed using importance sampling which depends on the previous predictions and the OOB error estimation */ boolean[] used = null; do { actualDS = new myDataset(originalDS); used = actualDS.importanceSampling(originalDS, (int)(originalDS.getnData() / 2), predictions, e); } while (actualDS.vacio()); /* The instances of this data-set are stored to perform the OOB (after training) */ this.trainingSetsOOB.add(used.clone()); /* The SPIDER preprocessing is carried out in the actual data-set */ SPIDER preprocess = new SPIDER(actualDS.getIS(), 3, spiderType, "HVDM"); preprocess.ejecutar(); preprocess = null; actualDS.getIS().clearInstances(); actualDS = null; try { /* The preprocessed data-set is read */ actualDS = new myDataset(); actualDS.readClassificationSet(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra", false); }catch (IOException e) { System.err.println("There was a problem while reading the input preprocessed data-sets: " + e); } File f = new File(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra"); f.delete(); } /** Preparation of the data-set for DataBoost-IM * First, it identifies hard examples (seeds) and then the data-set is rebalanced */ private void prepareDatasetDataBoostIM() { System.out.println("Preparing Data-set for DataBoost-IM..."); weightsBackup = weights.clone(); if (t < 1) return; // Identify hard examples int Ns = 0; final Integer[] indexes = new Integer[nData]; for (int i = 0; i < nData; i++) { if (classifier.obtainClass(t - 1, originalDS.getExample(i)) != originalDS.getOutputAsInteger(i)) Ns++; indexes[i] = i; } Arrays.sort(indexes, new Comparator<Integer>() { @Override public int compare(final Integer o1, final Integer o2) { if (Double.compare(weights[o2], weights[o1]) == 0) if (originalDS.getOutputAsInteger(o2) == minC) return +1; else if (originalDS.getOutputAsInteger(o1) == minC) return -1; else return 0; else return Double.compare(weights[o2], weights[o1]); } }); /* Compute the number of seeds for each class */ int Nsmaj = 0, Nsmin = 0; for (int i = 0; i < Ns; i++) { if (originalDS.getOutputAsInteger(indexes[i]) == majC) Nsmaj++; else Nsmin++; } /* Compute the final number of seeds */ int Ml = Math.min(nMaj / nMin, Nsmaj); int Ms = Math.min(nMaj * Ml / nMin, Nsmin); /* Create the syntetic instances for each class and set their weights */ originalDS.computeStatisticsPerClass(); double[][] Xmaj = createSynteticData(majC, Ml, nMaj); double[][] Xmin = createSynteticData(minC, Ms, nMin); double[] weightsSeedsMaj = new double[Ml], weightsSeedsMin = new double[Ms]; int auxMl = 0, auxMs = 0; for (int i = 0; i < Ns; i++) { if (originalDS.getOutputAsInteger(i) == majC && auxMl < Ml) { weightsSeedsMaj[auxMl] = weights[indexes[i]]; auxMl++; } else if ( auxMs < Ms ) { weightsSeedsMin[auxMs] = weights[indexes[i]]; auxMs++; } } // Add syntetic data to the data-set actualDS = new myDataset(originalDS, majC, Xmaj, minC, Xmin); // Update weights int newNData = nData + Ml * nMaj + Ms * nMin; weightsBackup = weights.clone(); weights = new double[newNData]; int iAux = 0; for (int i = 0; i < nData; i++, iAux++) weights[iAux] = weightsBackup[i]; for (int i = nData; i < nData + Ml; i++) for (int j = 0; j < nMaj; j++, iAux++) weights[iAux] = weightsSeedsMaj[i - nData] / (double)nMaj; for (int i = nData + Ml; i < nData + Ml + Ms; i++) for (int j = 0; j < nMin; j++, iAux++) weights[iAux] = weightsSeedsMin[i - nData - Ml] / (double)nMin; // Rebalance! weights double Wmaj = 0, Wmin = 0; for (int i = 0; i < newNData; i++) if (actualDS.getOutputAsInteger(i) == majC) Wmaj += weights[i]; else Wmin += weights[i]; if (Wmaj > Wmin) for (int i = 0; i < newNData; i++){ if (actualDS.getOutputAsInteger(i) == minC) weights[i] *= Wmaj / Wmin; } else for (int i = 0; i < newNData; i++){ if (actualDS.getOutputAsInteger(i) == majC) weights[i] *= Wmin / Wmaj; } double Z = 0; for (int i = 0; i < newNData; i++) Z += weights[i]; for (int i = 0; i < newNData; i++) weights[i] /= Z; System.out.println("Preparartion finished!"); } /** Create synthetic instances for the given class (DataBoost-IM) * * @param c The class for which the examples must be created * @param nSets Number of instance sets to be created (number of seeds) * @param nExamples number of examples belonging to c in the original data-set */ private double[][] createSynteticData(int c, int nSets, int nExamples) { int nInputs = originalDS.getnInputs(); double[][] X = new double[nSets * nExamples][nInputs]; if (nSets == 0) return X; // Necesito, de los nominales, para cada clase los norminales // Recorro todos los ejemplos y dep de la clase añado a uno o a otro // se que de cada nominal hay nMaj y nMin, pero tengo que contar cuantos hay norminales // a la vez que cuento, guardo media y std para numericos /* We need to obtain the average and std of the numeric attributes in the class * and for the nominal attributes the nominal values of the instances */ int nNominal = 0, nNumeric = 0; double[][] numeric = new double[nInputs][2]; for (int i = 0; i < nInputs; i++) { if (originalDS.getTipo(i) == myDataset.NOMINAL || originalDS.getTipo(i) == myDataset.INTEGER) { nNominal++; numeric[i][0] = -1; numeric[i][1] = -1; } else { nNumeric++; numeric[i][0] = originalDS.getAveragePerClass()[c][i]; numeric[i][1] = originalDS.getStdPerClass()[c][i]; } } double[][] nominal = new double[nInputs][nExamples]; int nAux = 0; for (int i = 0; i < nData; i++) { if (originalDS.getOutputAsInteger(i) == c) { for (int j = 0; j < nInputs; j++) if (numeric[j][0] == -1) // Nominal nominal[j][nAux] = originalDS.getExample(i)[j]; nAux++; } } /* Generate syntetic data based on the seeds */ for (int i = 0; i < nSets; i++) { for (int k = 0; k < nInputs; k++) { if (numeric[k][0] == -1) // nominal { double[] auxNominal = nominal[k].clone(); int r; double aux; for (int j = 0; j < nExamples; j++) { r = Randomize.RandintClosed(j, nExamples - 1); aux = auxNominal[r]; auxNominal[r] = auxNominal[j]; auxNominal[j] = aux; } for (int j = 0; j < nExamples; j++) X[i * nExamples + j][k] = auxNominal[j]; } else // numeric for (int j = 0; j < nExamples; j++) X[i * nExamples + j][k] = Randomize.RandGaussian() * numeric[k][1] + numeric[k][0]; } } return X; } /** This function prepares the data-set for the next bagging-based ensembles * iteration depending on the ensemble method selected. */ private void nextBag() { if (ensembleType.equalsIgnoreCase("BAGGING")) { /* Bootstrapping */ actualDS = new myDataset(originalDS); actualDS.randomSampling(originalDS, majC, minC, nMaj, nMin); } else if (ensembleType.equalsIgnoreCase("UNDERBAGGING")) { /* Undersampling */ actualDS = new myDataset(originalDS); actualDS.randomUnderSampling(originalDS, majC, 50); } else if (ensembleType.equalsIgnoreCase("UNDERBAGGING2")) { /* Undersampling + sampling/bootstrapping of the minority */ actualDS = new myDataset(originalDS); actualDS.randomSampling(originalDS, majC, minC, nMin, nMin); } else if (ensembleType.equalsIgnoreCase("OVERBAGGING")) { /* Oversampling */ actualDS = new myDataset(originalDS); actualDS.randomUnderSampling(originalDS, minC, 50); } else if (ensembleType.equalsIgnoreCase("OVERBAGGING2")) { /* Oversampling + sampling/bootstrapping of the majority */ actualDS = new myDataset(originalDS); actualDS.randomSampling(originalDS, majC, minC, nMaj, nMaj); } else if (ensembleType.equalsIgnoreCase("UNDEROVERBAGGING")) { actualDS = new myDataset(originalDS); // The sampling rate changes being always multiple of 10 if (t + 1 > (nClassifier / 10)) b += 10; /* (b% * Nmaj) instances are taken from each class */ actualDS.randomSampling(originalDS, majC, minC, b); } else if (ensembleType.equalsIgnoreCase("SMOTEBAGGING")) { /* Both classes are balanced using SMOTE */ actualDS = new myDataset(originalDS); if (t + 1 > (nClassifier / 10)) b += 10; /* First resampling of b% * nMaj, then SMOTE to get a balanced set */ actualDS.randomSampling(originalDS, majC, minC, nMaj, b * nMaj/ nMin); N = 50 ; // Balance System.out.println("Applying Preprocessing...[" + t + "]"); // SMOTE configuration, the seed, k = 5 or nMin, use both classes, balance, quantity of balancing, and distance = HVDM SMOTE preprocess = new SMOTE(actualDS.getIS(), Math.round(Randomize.Randdouble(0, 12345678.0)), this.nMin > 5 ? 5 : nMin, 0, true, N < 100 ? 1.0 : (double)N / 100.0, "HVDM"); preprocess.ejecutar(); try { /* Read the preprocessed data-set */ actualDS = new myDataset(); actualDS.readClassificationSet(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra", false); }catch (IOException e) { System.err.println("There was a problem while reading the input preprocessed data-sets: " + e); } File f = new File(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra"); f.delete(); } else if (ensembleType.equalsIgnoreCase("MSMOTEBAGGING")) { /* Both classes are balanced using MSMOTE */ actualDS = new myDataset(originalDS); if (t + 1 > (nClassifier / 10)) b += 10; /* First resampling of b% * nMaj, then MSMOTE to get a balanced set */ actualDS.randomSampling(originalDS, majC, minC, nMaj, b * nMaj/ nMin); N = 50 ; // Balance System.out.println("Applying Preprocessing...[" + t + "]"); // MSMOTE configuration, the seed, kClean = 3, k = 5 or nMin, use both classes, balance, quantity of balancing, and distance = HVDM MSMOTE preprocess = new MSMOTE(actualDS.getIS(), Math.round(Randomize.Randdouble(0, 12345678.0)), 3, this.nMin > 5 ? 5 : nMin, 0, true, N < 100 ? 1.0 : (double)N / 100.0, "HVDM");//MSMOTE("conf.txt"); preprocess.ejecutar(); preprocess = null; try { /* Read the preprocessed data-set */ actualDS = new myDataset(); actualDS.readClassificationSet(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra", false); }catch (IOException e) { System.err.println("There was a problem while reading the input preprocessed data-sets: " + e); } File f = new File(multi_C45.outputTr.substring(0,multi_C45.outputTr.length()-4) + "train.tra"); f.delete(); } /* In bagging-based ensembles the weights are uniformly distributed, there are no weights */ weights = new double[actualDS.getnData()]; for (int i = 0; i < actualDS.getnData(); i++) weights[i] = 1.0 / (float)actualDS.getnData(); } /** It returns the weights' vector of the corresponding iteration. In case of AdaBoost and Boosting-based ensembles * the weights are given if the resampling is not selected, otherwise, the weights are uniformly distributed * because the resampling has been carried out considering the weight distribution * @return Weights' vector */ double[] getWeights() { if ((ensembleType.contains("ADA") || ensembleType.contains("BOOST") || ensembleType.contains("EASYENSEMBLE") || ensembleType.contains("BALANCECASCADE")) && !trainMethod.equalsIgnoreCase("RESAMPLING")) return weights; else { double[] uniformWeights = new double[actualDS.getnData()]; for (int i = 0; i < uniformWeights.length; i++) uniformWeights[i] = 1.0 / (double)uniformWeights.length; return uniformWeights; } } /** This function sets the costs for cost-sensitive boosting * * @param Cmaj Cost of misclassifying a majority class instance * @param Cmin Cost of misclassifying a minority class instance */ void setCosts(double Cmaj, double Cmin) { this.CostMaj = Cmaj; this.CostMin = Cmin; } /** This method obtains the dataset for the next iteration * * @return The new (prepared if it is neccessary) data-set */ myDataset getDS() { if (prepareDSNeeded) prepareDataset(); if (ensembleType.contains("BAG")) nextBag(); return actualDS; } /** This method prepares the new data-set depending on the ensemle type. * if the resampling is required (for boosting-based ensmbles), it is performed * after preparing the data-set. */ public void prepareDataset() { if (ensembleType.contains("RUSBOOST")) prepareDatasetRUSBoost(); else if (ensembleType.contains("SMOTEBOOST")) prepareDatasetSMOTEBoost(); else if (ensembleType.contains("IIVOTES")) prepareDatasetSPIDER(); else if (ensembleType.contains("DATABOOST-IM")) prepareDatasetDataBoostIM(); else if (ensembleType.contains("EASYENSEMBLE") || (ensembleType.contains("BALANCECASCADE"))) prepareDatasetEasyEnsembleBalanceCascade(); /* Boostrap the data-set instead of using the weights for training */ if (resampling) { myDataset auxDS = new myDataset(actualDS, weights); actualDS = auxDS; } } /** The appropriate function to update the weights is selected depending on the method * * @return Wether the boosting algorithm has finished or not. */ private boolean modifyWeights() { if (ensembleType.equalsIgnoreCase("ADABOOST")) return modifyWeightsAdaBoost(); else if (ensembleType.equalsIgnoreCase("ADABOOST.M1")) // Calcula diferente la forma de cambiar los pesos return modifyWeightsAdaBoostM1(); else if (ensembleType.equalsIgnoreCase("ADABOOST.M2")) // Utiliza la confianza return modifyWeightsAdaBoostM2(); else if (ensembleType.equalsIgnoreCase("ADAC2")) return modifyWeightsAdaC2(); else if (ensembleType.contains("RUSBOOST") || ensembleType.contains("SMOTEBOOST")) { weights = weightsBackup.clone(); return modifyWeightsAdaBoostM2(); } else if (ensembleType.equalsIgnoreCase("DATABOOST-IM")) { weights = weightsBackup.clone(); return modifyWeightsAdaBoostM1(); } else if (ensembleType.equalsIgnoreCase("EASYENSEMBLE") || ensembleType.equalsIgnoreCase("BALANCECASCADE")) return modifyWeightsAdaBoostActualDS(); else { if (ensembleType.equalsIgnoreCase("ADABOOST.NC")) { return modifyWeightsAdaBoostNC(); } else { return false; } } } /** AdaBoost algorithm * * @return true if the boosting has finished */ private boolean modifyWeightsAdaBoost() { double[] corrects = new double[nData]; // compute alfa_t double r = 0, Z = 0; for (int i = 0; i < nData; i++) { if (classifier.obtainClass(t, originalDS.getExample(i)) == originalDS.getOutputAsInteger(i)) corrects[i] = 1; else corrects[i] = -1; r += weights[i] * corrects[i]; } double err = (1 - r) / 2; if (err < 0.001 || err >= 0.5) { if (t > 0 && err >= 0.5) { nClassifier = t; t = t - 1; } else alfa[t] = 1.0; return true; } alfa[t] = 0.5 * Math.log((1 + r) / (1 - r)); for (int i = 0; i < nData; i++) { weights[i] *= Math.exp(-1.0 * alfa[t] * corrects[i]); Z += weights[i]; } for (int i = 0; i < nData; i++) weights[i] /= Z; return false; } /** AdaBoost.NC algorithm * * @return true if the boosting has finished */ private boolean modifyWeightsAdaBoostNC() { //Step 3. Calculate the penalty value for every example x_i //amb_t(x_i) = 1/t sum_{j=1}^{t}(||H_t = y_i|| - ||h_j = y_i||) //H_t = arg_max (y) sum_{t=1}{T}(alfa_t · ||h_t(x) = y||) ** A partir de la segunda iteración! (variable t) double [] amb_t = new double[originalDS.getnData()]; int [] Hts = new int[originalDS.getnData()]; for (int j = 0; j < originalDS.getnData(); j++){ Hts[j] = 0; double [] example = originalDS.getExample(j).clone(); if (salidaEnsemble(example) == originalDS.getOutputAsInteger(j)){ Hts[j] = 1; } } for(int i = 0; i < t; i++){ for (int j = 0; j < originalDS.getnData(); j++){ double [] example = originalDS.getExample(j).clone(); int ht = 0; if (classifier.obtainClass(i, example) == originalDS.getOutputAsInteger(j)){ ht = 1; } amb_t[j] += Math.abs(Hts[j] - ht); } } if (t > 0) for (int i = 0; i < originalDS.getnData(); i++) amb_t[i] /= t; //p_t(x_i) = 1 - |amb_t(x_i)| double [] pow = new double[originalDS.getnData()]; for (int i = 0; i < originalDS.getnData(); i++){ penalization[i] = 1 - Math.abs(amb_t[i]); pow[i] = Math.pow(penalization[i], lambda); } //Step 4. Calculate ht's weight alfa_t by error and penalty using: // alfa_t = 1/2 log (sum_{i, y_i = ht(x_i)}(D_t(x_i)·p_t(x_i)^l / sum_{i, y_i != ht(x_i)}(D_t(x_i)·p_t(x_i)^l ) double sumatoriaPos = 0; double sumatoriaNeg = 0; for (int i = 0; i < originalDS.getnData(); i++){ double [] example = originalDS.getExample(i).clone(); if (classifier.obtainClass(t, example) == originalDS.getOutputAsInteger(i)){ sumatoriaPos += this.weights[i] * pow[i]; }else{ sumatoriaNeg += this.weights[i] * pow[i]; } } alfa[t] = 1.0; if (sumatoriaNeg > 0){ alfa[t] = 0.5*Math.log(sumatoriaPos/sumatoriaNeg); }else{ return true; } //Step 5. Update data weights Dt and obtain new weights Dt+1 by error and penalty //D_{t+1}(x_i) = p_t(x_i)^l · Dt(x_i)^(-alfa_t ||h_t(x_i)= y_i||) / Z_t //Z_t is a normalization factor double max = 0; for (int i = 0; i < originalDS.getnData(); i++){ double [] example = originalDS.getExample(i).clone(); double aux = this.weights[i]; if (classifier.obtainClass(t, example) == originalDS.getOutputAsInteger(i)){ this.weights[i] = pow[i]*weights[i]*1.0/Math.pow(Math.E,alfa[t]); }else{ this.weights[i] = Math.E * pow[i] * this.weights[i]; } if (this.weights[i] > max){ max = this.weights[i]; } } for (int i = 0; i < originalDS.getnData(); i++){ this.weights[i] /= max; } return false; } //H_t = arg_max (y) sum_{t=1}{T}(alfa_t · ||h_t(x) = y||) ** From second iteration! (variable t) public int salidaEnsemble(double [] example){ int clase = 0; double [] scores = new double[originalDS.getnClasses()]; for (int i = 0; i < t; i++){ //for all sub-classifiers scores[classifier.obtainClass(i, example)] += alfa[i]; //obtain the class pointed by sub-classifier "t" //add "alpha" value to class "j" (y) } for (int i = 1; i < originalDS.getnClasses(); i++){ if (scores[i] > scores[clase]){ clase = i; } } return clase; } /** AdaBoost algorithm performed on ActualDS for BalanceCascade and EasyEnsemble * * @return true if the boosting has finished */ private boolean modifyWeightsAdaBoostActualDS() { double[] corrects = new double[actualDS.getnData()]; // compute alfa_t double r = 0, Z = 0; double sumFail = 0; for (int i = 0; i < actualDS.getnData(); i++) { if (classifier.obtainClass(t, actualDS.getExample(i)) == actualDS.getOutputAsInteger(i)) corrects[i] = 1; else { corrects[i] = -1; sumFail += weights[i]; } r += weights[i] * corrects[i]; } if (sumFail < 0.001 || sumFail >= 0.5) { if (sumFail < 0.5) alfa[t] = 1.0; t = (t / nBoostIterations + 1) * nBoostIterations - 1; if (t / nBoostIterations >= nBags - 1) return true; else { return false; } } // alfa[t] = 0.5 * Math.log((1 + r) / (1 - r)); es lo mismo alfa[t] = 0.5 * Math.log((1 - sumFail) / sumFail); for (int i = 0; i < actualDS.getnData(); i++) { weights[i] *= Math.exp(-1.0 * alfa[t] * corrects[i]); Z += weights[i]; } for (int i = 0; i < actualDS.getnData(); i++) weights[i] /= Z; return false; } /** AdaBoost.M1 algorithm * * @return true if the boosting has finished */ private boolean modifyWeightsAdaBoostM1() { double[] corrects = new double[nData]; // calcular alfa_t double Z = 0, sumFail = 0; for (int i = 0; i < nData; i++) { if (classifier.obtainClass(t, originalDS.getExample(i)) != originalDS.getOutputAsInteger(i)) { corrects[i] = -1; sumFail += weights[i]; } else corrects[i] = 1; } if (sumFail < 0.001 || sumFail >= 0.5) { if (t > 0 && sumFail > 0.5) { nClassifier = t; t = t - 1; } else alfa[t] = 1.0; return true; } double beta = sumFail / (1 - sumFail); for (int i = 0; i < nData; i++) { if (corrects[i] == 1) weights[i] *= beta; Z += weights[i]; } for (int i = 0; i < nData; i++) weights[i] /= Z; alfa[t] = Math.log(1.0 / beta); return false; } /** AdaBoost.M2 algorithm * * @return true if the boosting has finished */ private boolean modifyWeightsAdaBoostM2() { double[] corrects = new double[nData]; double[] confianza = new double[nData]; // calcular alfa_t double Z = 0, sumFail = 0; for (int i = 0; i < nData; i++) { confianza[i] = classifier.obtainConfidence(t, originalDS.getExample(i)); if (classifier.obtainClass(t, originalDS.getExample(i)) != originalDS.getOutputAsInteger(i)) { corrects[i] = -1; sumFail += 2 * weights[i] * confianza[i]; } else { corrects[i] = 1; sumFail += weights[i] * (2 - 2 * confianza[i]); } } sumFail *= 0.5; double beta = sumFail / (1 - sumFail); for (int i = 0; i < nData; i++) { if (corrects[i] == 1) weights[i] *= Math.pow(beta, confianza[i]); else weights[i] *= Math.pow(beta, 1 - confianza[i]); Z += weights[i]; } for (int i = 0; i < nData; i++) weights[i] /= Z; alfa[t] = Math.log(1.0 / beta); return false; } /** AdaC2 boosting algorithm, where costs are instroduced within the exponent part of Adaboost * * @return true if the boosting has finished */ private boolean modifyWeightsAdaC2() { double[] corrects = new double[nData]; double[] C = new double[nData]; double sumFail = 0, sumCorrect = 0; // calcular alfa_t double Z = 0; for (int i = 0; i < nData; i++) { C[i] = majC == actualDS.getOutputAsInteger(i) ? CostMaj : CostMin; if (classifier.obtainClass(t, originalDS.getExample(i)) == originalDS.getOutputAsInteger(i)) { corrects[i] = 1; sumCorrect += C[i] * weights[i]; } else { corrects[i] = -1; sumFail += C[i] * weights[i]; } } if (sumFail < 0.001 || sumFail >= sumCorrect) { if (t > 0 && sumFail >= sumCorrect) { t = t - 1; nClassifier = t; } else alfa[t] = 1.0; return true; } alfa[t] = 0.5 * Math.log(sumCorrect / sumFail); for (int i = 0; i < nData; i++) { weights[i] *= Math.exp(-1.0 * alfa[t] * corrects[i]) * C[i]; Z += weights[i]; } for (int i = 0; i < nData; i++) weights[i] /= Z; return false; } /** Out-of-Bag error estimation algorithm * * @param originalDS the original data-sets which contains all the instances * @param predictions wether the instance in the position has been correctly classified or not (only for instances which were in the bag) * @return Out-of-Bag error estimation */ private double outOfBagEstimation(myDataset originalDS, boolean[] predictions) { double total = 0; double TP = 0, FP = 0, FN = 0, TN = 0; for (int i = 0; i < originalDS.getnData(); i++) { boolean counted = false; double[] example = originalDS.getExample(i); double sum = 0; double confidence = 1; for (int t = 0; t < nClassifier; t++) { if (alfa[t] != 0 && !this.trainingSetsOOB.get(t)[i]) { if (!counted) { total++; counted = true; } confidence = classifier.obtainConfidence(t, example); if (classifier.obtainClass(t, example) == 0) sum += confidence * alfa[t]; else sum -= confidence * alfa[t]; } } int output = -1; if (sum >= 0) output = 0; else if (sum < 0) output = 1; int claseReal = originalDS.getOutputAsInteger(i); if (output == claseReal && counted) predictions[i] = true; else predictions[i] = false; if (counted){ if (claseReal == output && this.majC == output) TN++; else if (claseReal == output && this.majC != output) TP++; else if (claseReal != output && this.majC == output) FP++; else FN++; } } double TPrate = TP / (TP + FN); double TNrate = TN / (TN + FP); double gmean = Math.sqrt(TPrate * TNrate); double acc = (TN + TP) / (TN + TP + FN + FP); return 1 - acc; } }