/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel.jmysvm.svm; import java.util.Iterator; import com.rapidminer.example.Attribute; import com.rapidminer.operator.Operator; import com.rapidminer.operator.learner.functions.kernel.AbstractMySVMLearner; import com.rapidminer.operator.learner.functions.kernel.JMySVMLearner; import com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExample; import com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples; import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.Kernel; import com.rapidminer.operator.learner.functions.kernel.jmysvm.optimizer.QuadraticProblem; import com.rapidminer.operator.learner.functions.kernel.jmysvm.optimizer.QuadraticProblemSMO; import com.rapidminer.operator.learner.functions.kernel.jmysvm.util.MaxHeap; import com.rapidminer.operator.learner.functions.kernel.jmysvm.util.MinHeap; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.LogService; import com.rapidminer.tools.RandomGenerator; /** * Abstract base class for all SVMs * * @author Stefan Rueping, Ingo Mierswa * @version $Id: SVM.java,v 1.6 2008/05/09 19:23:26 ingomierswa Exp $ */ public abstract class SVM implements SVMInterface { private static final int[] RAPID_MINER_VERBOSITY = { LogService.STATUS, LogService.STATUS, LogService.STATUS, LogService.MINIMUM, LogService.MINIMUM }; protected Kernel the_kernel; protected SVMExamples the_examples; double[] alphas; double[] ys; protected int examples_total; // protected int verbosity; protected int target_count; protected double convergence_epsilon = 1e-3; protected double lambda_factor; protected int[] at_bound; protected double[] sum; protected boolean[] which_alpha; protected int[] working_set; protected double[] primal; protected double sum_alpha; protected double lambda_eq; protected int to_shrink; protected double feasible_epsilon; protected double lambda_WS; protected boolean quadraticLossPos = false; protected boolean quadraticLossNeg = false; boolean shrinked; protected double epsilon_pos = 0.0d; protected double epsilon_neg = 0.0d; private int max_iterations = 100000; protected int working_set_size = 10; protected int parameters_working_set_size = 10; // was set in parameters protected double is_zero = 1e-10; protected int shrink_const = 50; protected double C = 1.0d; protected double[] cPos; protected double[] cNeg; protected double descend = 1e-15; MinHeap heap_min; MaxHeap heap_max; protected QuadraticProblem qp; private Operator paramOperator; private RandomGenerator randomGenerator; public SVM() {} /** * class constructor. Throws an operator exception if a non-optional * parameter was not set and has no default value. */ public SVM(Operator paramOperator, Kernel new_kernel, SVMExamples new_examples, com.rapidminer.example.ExampleSet rapidMinerExamples, RandomGenerator randomGenerator) throws UndefinedParameterError { this.paramOperator = paramOperator; the_examples = new_examples; this.randomGenerator = randomGenerator; max_iterations = paramOperator.getParameterAsInt(AbstractMySVMLearner.PARAMETER_MAX_ITERATIONS); convergence_epsilon = paramOperator.getParameterAsDouble(AbstractMySVMLearner.PARAMETER_CONVERGENCE_EPSILON); quadraticLossPos = paramOperator.getParameterAsBoolean(JMySVMLearner.PARAMETER_QUADRATIC_LOSS_POS); quadraticLossNeg = paramOperator.getParameterAsBoolean(JMySVMLearner.PARAMETER_QUADRATIC_LOSS_NEG); cPos = new double[rapidMinerExamples.size()]; cNeg = new double[rapidMinerExamples.size()]; Attribute weightAttribute = rapidMinerExamples.getAttributes().getWeight(); if (weightAttribute != null) { Iterator<com.rapidminer.example.Example> reader = rapidMinerExamples.iterator(); int index = 0; while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); cPos[index] = cNeg[index] = example.getValue(weightAttribute); index++; } if (paramOperator.getParameterAsBoolean(JMySVMLearner.PARAMETER_BALANCE_COST)) logWarning("Since the example set contains a weight attribute, the parameter balance_cost will be ignored."); logln(1, "Use defined weight attribute for example weights."); } else { Attribute weightPosAttribute = rapidMinerExamples.getAttributes().getSpecial("weight_pos"); Attribute weightNegAttribute = rapidMinerExamples.getAttributes().getSpecial("weight_neg"); if ((weightPosAttribute != null) && (weightNegAttribute != null)) { Iterator<com.rapidminer.example.Example> reader = rapidMinerExamples.iterator(); int index = 0; while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); cPos[index] = example.getValue(weightPosAttribute); cNeg[index] = example.getValue(weightNegAttribute); index++; } if (paramOperator.getParameterAsBoolean(JMySVMLearner.PARAMETER_BALANCE_COST)) logWarning("Since the example set contains a weight attribute, the parameter balance_cost will be ignored."); logln(1, "Use defined weight_pos and weight_neg attributes for example weights."); } else { double generalCpos = paramOperator.getParameterAsDouble(JMySVMLearner.PARAMETER_L_POS); double generalCneg = paramOperator.getParameterAsDouble(JMySVMLearner.PARAMETER_L_NEG); if (paramOperator.getParameterAsBoolean(JMySVMLearner.PARAMETER_BALANCE_COST)) { generalCpos *= the_examples.count_examples() / (2.0d * (the_examples.count_examples() - the_examples.count_pos_examples())); generalCneg *= ((the_examples.count_examples()) / (2.0d * the_examples.count_pos_examples())); } for (int i = 0; i < cPos.length; i++) { cPos[i] = generalCpos; cNeg[i] = generalCneg; } } } this.C = paramOperator.getParameterAsDouble(AbstractMySVMLearner.PARAMETER_C); double epsilonValue = paramOperator.getParameterAsDouble(JMySVMLearner.PARAMETER_EPSILON); if (epsilonValue != -1) { epsilon_pos = epsilon_neg = epsilonValue; } else { epsilon_pos = paramOperator.getParameterAsDouble(JMySVMLearner.PARAMETER_EPSILON_PLUS); epsilon_neg = paramOperator.getParameterAsDouble(JMySVMLearner.PARAMETER_EPSILON_MINUS); } }; /** * Init the SVM * * @param new_kernel * new kernel function. * @param new_examples * the data container */ public void init(Kernel new_kernel, SVMExamples new_examples) { the_kernel = new_kernel; the_examples = new_examples; examples_total = the_examples.count_examples(); parameters_working_set_size = working_set_size; if (this.C <= 0.0d) { this.C = 0.0d; for (int i = 0; i < examples_total; i++) { this.C += the_kernel.calculate_K(i, i); }; this.C = examples_total / this.C; logln(3, "C set to " + this.C); } if (cPos != null) { for (int i = 0; i < cPos.length; i++) { cPos[i] *= this.C; cNeg[i] *= this.C; } } lambda_factor = 1.0; lambda_eq = 0; target_count = 0; sum_alpha = 0; feasible_epsilon = convergence_epsilon; alphas = the_examples.get_alphas(); ys = the_examples.get_ys(); }; /** * Train the SVM */ public void train() { if (examples_total <= 0) { // exception? the_examples.set_b(Double.NaN); return; } if (examples_total == 1) { the_examples.set_b(ys[0]); return; } target_count = 0; shrinked = false; init_optimizer(); init_working_set(); int iteration = 0; boolean converged = false; M: while (iteration < max_iterations) { iteration++; logln(4, "optimizer iteration " + iteration); // log(3,"."); optimize(); put_optimizer_values(); converged = convergence(); if (converged) { logln(3, ""); // dots project_to_constraint(); if (shrinked) { // check convergence for all alphas logln(2, "***** Checking convergence for all variables"); reset_shrinked(); converged = convergence(); }; if (converged) { logln(1, "*** Convergence"); break M; }; // set variables free again shrink_const += 10; target_count = 0; for (int i = 0; i < examples_total; i++) { at_bound[i] = 0; }; }; shrink(); calculate_working_set(); update_working_set(); }; int i; if ((iteration >= max_iterations) && (!converged)) { logln(1, "*** No convergence: Time up."); if (shrinked) { // set sums for all variables for statistics reset_shrinked(); }; }; // calculate b double new_b = 0; int new_b_count = 0; for (i = 0; i < examples_total; i++) { if ((alphas[i] - cNeg[i] < -is_zero) && (alphas[i] > is_zero)) { new_b += ys[i] - sum[i] - epsilon_neg; new_b_count++; } else if ((alphas[i] + cPos[i] > is_zero) && (alphas[i] < -is_zero)) { new_b += ys[i] - sum[i] + epsilon_pos; new_b_count++; }; }; if (new_b_count > 0) { the_examples.set_b(new_b / new_b_count); } else { // unlikely for (i = 0; i < examples_total; i++) { if ((alphas[i] < is_zero) && (alphas[i] > -is_zero)) { new_b += ys[i] - sum[i]; new_b_count++; }; }; if (new_b_count > 0) { the_examples.set_b(new_b / new_b_count); } else { // even unlikelier for (i = 0; i < examples_total; i++) { new_b += ys[i] - sum[i]; new_b_count++; }; the_examples.set_b(new_b / new_b_count); }; }; // if(verbosity>= 2){ logln(2, "Done training: " + iteration + " iterations."); // if(verbosity >= 2){ double now_target = 0; double now_target_dummy = 0; for (i = 0; i < examples_total; i++) { now_target_dummy = sum[i] / 2 - ys[i]; if (is_alpha_neg(i)) { now_target_dummy += epsilon_pos; } else { now_target_dummy -= epsilon_neg; }; now_target += alphas[i] * now_target_dummy; }; logln(2, "Target function: " + now_target); // }; // }; print_statistics(); exit_optimizer(); }; /** * print statistics about result */ protected void print_statistics() { int dim = the_examples.get_dim(); int i, j; double alpha; double[] x; int svs = 0; int bsv = 0; double mae = 0; double mse = 0; int countpos = 0; int countneg = 0; double y; double prediction; double min_lambda = Double.MAX_VALUE; double b = the_examples.get_b(); for (i = 0; i < examples_total; i++) { if (lambda(i) < min_lambda) { min_lambda = lambda(i); }; y = ys[i]; prediction = sum[i] + b; mae += Math.abs(y - prediction); mse += (y - prediction) * (y - prediction); alpha = alphas[i]; if (y < prediction - epsilon_pos) { countpos++; } else if (y > prediction + epsilon_neg) { countneg++; }; if (alpha != 0) { svs++; if ((alpha == cPos[i]) || (alpha == -cNeg[i])) { bsv++; }; }; }; mae /= examples_total; mse /= examples_total; min_lambda = -min_lambda; logln(1, "Error on KKT is " + min_lambda); logln(1, svs + " SVs"); logln(1, bsv + " BSVs"); logln(1, "MAE " + mae); logln(1, "MSE " + mse); logln(1, countpos + " pos loss"); logln(1, countneg + " neg loss"); // if(verbosity >= 2){ // print hyperplane double[] w = new double[dim]; for (j = 0; j < dim; j++) w[j] = 0; for (i = 0; i < examples_total; i++) { x = the_examples.get_example(i).toDense(dim); alpha = alphas[i]; for (j = 0; j < dim; j++) { w[j] += alpha * x[j]; }; }; // double[] Exp = the_examples.Exp; // double[] Dev = the_examples.Dev; // if(Exp != null){ // for(j=0;j<dim;j++){ // if(Dev[j] != 0){ // w[j] /= Dev[j]; // }; // if(0 != Dev[dim]){ // w[j] *= Dev[dim]; // }; // b -= w[j]*Exp[j]; // }; // b += Exp[dim]; // }; // logln(2," "); for (j = 0; j < dim; j++) { logln(2, "w[" + j + "] = " + w[j]); }; logln(2, "b = " + b); if (dim == 1) { logln(2, "y = " + w[0] + "*x+" + b); }; // }; }; /** Return the weights of the features. */ public double[] getWeights() { int dim = the_examples.get_dim(); double[] w = new double[dim]; for (int j = 0; j < dim; j++) w[j] = 0; for (int i = 0; i < examples_total; i++) { double[] x = the_examples.get_example(i).toDense(dim); double alpha = alphas[i]; for (int j = 0; j < dim; j++) { w[j] += alpha * x[j]; } } return w; } /** Returns the value of b. */ public double getB() { return the_examples.get_b(); } /** * Gets the complexity constant of the svm. */ public double getC() { return C; } /** * init the optimizer */ protected void init_optimizer() { which_alpha = new boolean[working_set_size]; primal = new double[working_set_size]; sum = new double[examples_total]; at_bound = new int[examples_total]; // init variables if (working_set_size > examples_total) working_set_size = examples_total; qp = new QuadraticProblemSMO(is_zero / 100, convergence_epsilon / 100, working_set_size * working_set_size); qp.set_n(working_set_size); // reserve workspace for calculate_working_set working_set = new int[working_set_size]; heap_max = new MaxHeap(0); heap_min = new MinHeap(0); // if(the_examples.Dev != null){ // double dev = the_examples.Dev[the_examples.get_dim()]; // if(dev != 0){ // epsilon_pos /= dev; // epsilon_neg /= dev; // }; // }; int i; for (i = 0; i < working_set_size; i++) { qp.l[i] = 0; // -is_zero; }; if (quadraticLossPos) { for (i = 0; i < cPos.length; i++) cPos[i] = Double.MAX_VALUE; }; if (quadraticLossNeg) { for (i = 0; i < cNeg.length; i++) cNeg[i] = Double.MAX_VALUE; }; for (i = 0; i < examples_total; i++) { alphas[i] = 0.0; }; lambda_WS = 0; to_shrink = 0; qp.set_n(working_set_size); }; /** * exit the optimizer */ protected void exit_optimizer() { qp = null; }; /** * shrink the variables */ protected void shrink() { // move shrinked examples to back if (to_shrink > examples_total / 10) { int i; int last_pos = examples_total; if (last_pos > working_set_size) { for (i = 0; i < last_pos; i++) { if (at_bound[i] >= shrink_const) { // shrink i sum_alpha += alphas[i]; last_pos--; the_examples.swap(i, last_pos); the_kernel.swap(i, last_pos); sum[i] = sum[last_pos]; at_bound[i] = at_bound[last_pos]; if (last_pos <= working_set_size) { break; }; }; }; to_shrink = 0; shrinked = true; if (last_pos < examples_total) { examples_total = last_pos; the_kernel.set_examples_size(examples_total); }; }; logln(4, "shrinked to " + examples_total + " variables"); }; }; /** * reset the shrinked variables */ protected void reset_shrinked() { int old_ex_tot = examples_total; target_count = 0; examples_total = the_examples.count_examples(); the_kernel.set_examples_size(examples_total); // unshrink, recalculate sum for all variables int i, j; // reset all sums for (i = old_ex_tot; i < examples_total; i++) { sum[i] = 0; at_bound[i] = 0; }; double alpha; double[] kernel_row; for (i = 0; i < examples_total; i++) { alpha = alphas[i]; if (alpha != 0) { kernel_row = the_kernel.get_row(i); for (j = old_ex_tot; j < examples_total; j++) { sum[j] += alpha * kernel_row[j]; }; }; }; sum_alpha = 0; shrinked = false; logln(5, "Resetting shrinked from " + old_ex_tot + " to " + examples_total); }; /** * Project variables to constraints */ protected void project_to_constraint() { // project alphas to match the constraint double alpha_sum = sum_alpha; int SVcount = 0; double alpha; int i; for (i = 0; i < examples_total; i++) { alpha = alphas[i]; alpha_sum += alpha; if (((alpha > is_zero) && (alpha - cNeg[i] < -is_zero)) || ((alpha < -is_zero) && (alpha + cPos[i] > is_zero))) { SVcount++; }; }; if (SVcount > 0) { // project alpha_sum /= SVcount; for (i = 0; i < examples_total; i++) { alpha = alphas[i]; if (((alpha > is_zero) && (alpha - cNeg[i] < -is_zero)) || ((alpha < -is_zero) && (alpha + cPos[i] > is_zero))) { alphas[i] -= alpha_sum; }; }; }; }; /** * Calculates the working set * * @exception Exception * on any error */ protected void calculate_working_set() { // reset WSS if (working_set_size < parameters_working_set_size) { working_set_size = parameters_working_set_size; if (working_set_size > examples_total) working_set_size = examples_total; }; heap_min.init(working_set_size / 2); heap_max.init(working_set_size / 2 + working_set_size % 2); int i = 0; double sort_value; double the_nabla; boolean is_feasible; int j; while (i < examples_total) { is_feasible = feasible(i); if (is_feasible) { the_nabla = nabla(i); if (is_alpha_neg(i)) { sort_value = -the_nabla; // - : maximum inconsistency // approach } else { sort_value = the_nabla; }; // add to heaps heap_min.add(sort_value, i); heap_max.add(sort_value, i); }; i++; }; int[] new_ws = heap_min.get_values(); working_set_size = 0; int pos; j = heap_min.size(); for (i = 0; i < j; i++) { working_set[working_set_size] = new_ws[i]; working_set_size++; }; pos = working_set_size; new_ws = heap_max.get_values(); j = heap_max.size(); for (i = 0; i < j; i++) { working_set[working_set_size] = new_ws[i]; working_set_size++; }; if ((!heap_min.empty()) && (!heap_max.empty())) { if (heap_min.top_value() >= heap_max.top_value()) { // there could be the same values in the min- and maxheap, // sort them out (this is very unlikely) j = 0; i = 0; while (i < pos) { // working_set[i] also in max-heap? j = pos; while ((j < working_set_size) && (working_set[j] != working_set[i])) { j++; }; if (j < working_set_size) { // working_set[i] equals working_set[j] // remove j from WS working_set[j] = working_set[working_set_size - 1]; working_set_size--; } else { i++; }; }; }; }; if (target_count > 0) { // convergence error on last iteration? // some more tests on WS // unlikely to happen, so speed isn't so important // are all variables at the bound? int pos_abs; boolean bounded_pos = true; boolean bounded_neg = true; pos = 0; double alpha; while ((pos < working_set_size) && (bounded_pos || bounded_neg)) { pos_abs = working_set[pos]; alpha = alphas[pos_abs]; if (is_alpha_neg(pos_abs)) { if (alpha - cNeg[pos_abs] < -is_zero) { bounded_pos = false; }; if (alpha > is_zero) { bounded_neg = false; }; } else { if (alpha + cNeg[pos_abs] > is_zero) { bounded_neg = false; }; if (alpha < -is_zero) { bounded_pos = false; }; }; pos++; }; if (bounded_pos) { // all alphas are at upper bound // need alpha that can be moved upward // use alpha with smallest lambda double max_lambda = Double.MAX_VALUE; int max_pos = examples_total; for (pos_abs = 0; pos_abs < examples_total; pos_abs++) { alpha = alphas[pos_abs]; if (is_alpha_neg(pos_abs)) { if (alpha - cNeg[pos_abs] < -is_zero) { if (lambda(pos_abs) < max_lambda) { max_lambda = lambda(pos_abs); max_pos = pos_abs; }; }; } else { if (alpha < -is_zero) { if (lambda(pos_abs) < max_lambda) { max_lambda = lambda(pos_abs); max_pos = pos_abs; }; }; }; }; if (max_pos < examples_total) { if (working_set_size < parameters_working_set_size) { working_set_size++; }; working_set[working_set_size - 1] = max_pos; }; } else if (bounded_neg) { // all alphas are at lower bound // need alpha that can be moved downward // use alpha with smallest lambda double max_lambda = Double.MAX_VALUE; int max_pos = examples_total; for (pos_abs = 0; pos_abs < examples_total; pos_abs++) { alpha = alphas[pos_abs]; if (is_alpha_neg(pos_abs)) { if (alpha > is_zero) { if (lambda(pos_abs) < max_lambda) { max_lambda = lambda(pos_abs); max_pos = pos_abs; }; }; } else { if (alpha + cNeg[pos_abs] > is_zero) { if (lambda(pos_abs) < max_lambda) { max_lambda = lambda(pos_abs); max_pos = pos_abs; }; }; }; }; if (max_pos < examples_total) { if (working_set_size < parameters_working_set_size) { working_set_size++; }; working_set[working_set_size - 1] = max_pos; }; }; }; if ((working_set_size < parameters_working_set_size) && (working_set_size < examples_total)) { // use full working set pos = (int) (randomGenerator.nextDouble() * examples_total); int ok; while ((working_set_size < parameters_working_set_size) && (working_set_size < examples_total)) { // add pos into WS if it isn't already ok = 1; for (i = 0; i < working_set_size; i++) { if (working_set[i] == pos) { ok = 0; i = working_set_size; }; }; if (1 == ok) { working_set[working_set_size] = pos; working_set_size++; }; pos = (pos + 1) % examples_total; }; }; int ipos; for (ipos = 0; ipos < working_set_size; ipos++) { which_alpha[ipos] = is_alpha_neg(working_set[ipos]); }; }; /** * Updates the working set */ protected void update_working_set() { // setup subproblem int i, j; int pos_i, pos_j; double[] kernel_row; double sum_WS; boolean[] my_which_alpha = which_alpha; for (pos_i = 0; pos_i < working_set_size; pos_i++) { i = working_set[pos_i]; // put row sort_i in hessian kernel_row = the_kernel.get_row(i); sum_WS = 0; // for(pos_j=0;pos_j<working_set_size;pos_j++){ for (pos_j = 0; pos_j < pos_i; pos_j++) { j = working_set[pos_j]; // put all elements K(i,j) in hessian, where j in WS if (((!my_which_alpha[pos_j]) && (!my_which_alpha[pos_i])) || ((my_which_alpha[pos_j]) && (my_which_alpha[pos_i]))) { // both i and j positive or negative (qp.H)[pos_i * working_set_size + pos_j] = kernel_row[j]; (qp.H)[pos_j * working_set_size + pos_i] = kernel_row[j]; } else { // one of i and j positive, one negative (qp.H)[pos_i * working_set_size + pos_j] = -kernel_row[j]; (qp.H)[pos_j * working_set_size + pos_i] = -kernel_row[j]; }; }; for (pos_j = 0; pos_j < working_set_size; pos_j++) { j = working_set[pos_j]; sum_WS += alphas[j] * kernel_row[j]; }; // set main diagonal (qp.H)[pos_i * working_set_size + pos_i] = kernel_row[i]; // linear and box constraints if (!my_which_alpha[pos_i]) { // alpha (qp.A)[pos_i] = -1; // lin(alpha) = y_i+eps-sum_{i not in WS} alpha_i K_{ij} // = y_i+eps-sum_i+sum_{i in WS} (qp.c)[pos_i] = ys[i] + epsilon_pos - sum[i] + sum_WS; primal[pos_i] = -alphas[i]; (qp.u)[pos_i] = cPos[i]; } else { // alpha^* (qp.A)[pos_i] = 1; (qp.c)[pos_i] = -ys[i] + epsilon_neg + sum[i] - sum_WS; primal[pos_i] = alphas[i]; (qp.u)[pos_i] = cNeg[i]; }; }; if (quadraticLossNeg) { for (pos_i = 0; pos_i < working_set_size; pos_i++) { i = working_set[pos_i]; if (my_which_alpha[pos_i]) { (qp.H)[pos_i * (working_set_size + 1)] += 1 / cNeg[i]; (qp.u)[pos_i] = Double.MAX_VALUE; }; }; }; if (quadraticLossPos) { for (pos_i = 0; pos_i < working_set_size; pos_i++) { i = working_set[pos_i]; if (!my_which_alpha[pos_i]) { (qp.H)[pos_i * (working_set_size + 1)] += 1 / cPos[i]; (qp.u)[pos_i] = Double.MAX_VALUE; }; }; }; }; /** * Initialises the working set * * @exception Exception * on any error */ protected void init_working_set() { // calculate sum int i, j; project_to_constraint(); // skip kernel calculation as all alphas = 0 for (i = 0; i < examples_total; i++) { sum[i] = 0; at_bound[i] = 0; }; // first working set is random j = 0; i = 0; while ((i < working_set_size) && (j < examples_total)) { working_set[i] = j; if (is_alpha_neg(j)) { which_alpha[i] = true; } else { which_alpha[i] = false; }; i++; j++; }; update_working_set(); }; /** * Calls the optimizer */ protected abstract void optimize(); /** * Stores the optimizer results */ protected void put_optimizer_values() { // update nabla, sum, examples. // sum[i] += (primal_j^*-primal_j-alpha_j^*+alpha_j)K(i,j) // check for |nabla| < is_zero (nabla <-> nabla*) int i = 0; int j = 0; int pos_i; double the_new_alpha; double[] kernel_row; double alpha_diff; double my_sum[] = sum; pos_i = working_set_size; while (pos_i > 0) { pos_i--; if (which_alpha[pos_i]) { the_new_alpha = primal[pos_i]; } else { the_new_alpha = -primal[pos_i]; }; // next three statements: keep this order! i = working_set[pos_i]; alpha_diff = the_new_alpha - alphas[i]; alphas[i] = the_new_alpha; if (alpha_diff != 0) { // update sum ( => nabla) kernel_row = the_kernel.get_row(i); for (j = examples_total - 1; j >= 0; j--) { my_sum[j] += alpha_diff * kernel_row[j]; }; }; }; }; /** * Checks if the optimization converged * * @return boolean true optimzation if converged */ protected boolean convergence() { double the_lambda_eq = 0; int total = 0; double alpha_sum = 0; double alpha = 0; int i; boolean result = true; // actual convergence-test total = 0; alpha_sum = 0; for (i = 0; i < examples_total; i++) { alpha = alphas[i]; alpha_sum += alpha; if ((alpha > is_zero) && (alpha - cNeg[i] < -is_zero)) { // alpha^* = - nabla the_lambda_eq += -nabla(i); // all_ys[i]-epsilon_neg-sum[i]; total++; } else if ((alpha < -is_zero) && (alpha + cPos[i] > is_zero)) { // alpha = nabla the_lambda_eq += nabla(i); // all_ys[i]+epsilon_pos-sum[i]; total++; }; }; logln(4, "lambda_eq = " + (the_lambda_eq / total)); if (total > 0) { lambda_eq = the_lambda_eq / total; } else { // keep WS lambda_eq lambda_eq = lambda_WS; // (lambda_eq+4*lambda_WS)/5; logln(4, "*** no SVs in convergence(), lambda_eq = " + lambda_eq + "."); }; if (target_count > 2) { // estimate lambda from WS if (target_count > 20) { // desperate attempt to get good lambda! lambda_eq = ((40 - target_count) * lambda_eq + (target_count - 20) * lambda_WS) / 20; logln(5, "Re-Re-calculated lambda from WS: " + lambda_eq); if (target_count > 40) { // really desperate, kick one example out! i = working_set[target_count % working_set_size]; if (is_alpha_neg(i)) { lambda_eq = -nabla(i); } else { lambda_eq = nabla(i); }; logln(5, "set lambda_eq to nabla(" + i + "): " + lambda_eq); }; } else { lambda_eq = lambda_WS; logln(5, "Re-calculated lambda_eq from WS: " + lambda_eq); }; }; // check linear constraint if (java.lang.Math.abs(alpha_sum + sum_alpha) > convergence_epsilon) { // equality constraint violated logln(4, "No convergence: equality constraint violated: |" + (alpha_sum + sum_alpha) + "| >> 0"); project_to_constraint(); result = false; }; i = 0; while ((i < examples_total) && (result != false)) { if (lambda(i) >= -convergence_epsilon) { i++; } else { result = false; }; }; return result; }; protected abstract double nabla(int i); /** * lagrangion multiplier of variable i * * @param i * variable index * @return lambda */ protected double lambda(int i) { double alpha; double result; if (is_alpha_neg(i)) { result = -java.lang.Math.abs(nabla(i) + lambda_eq); } else { result = -java.lang.Math.abs(nabla(i) - lambda_eq); }; // default = not at bound alpha = alphas[i]; if (alpha > is_zero) { // alpha* if (alpha - cNeg[i] >= -is_zero) { // upper bound active result = -lambda_eq - nabla(i); }; } else if (alpha >= -is_zero) { // lower bound active if (is_alpha_neg(i)) { result = nabla(i) + lambda_eq; } else { result = nabla(i) - lambda_eq; }; } else if (alpha + cPos[i] <= is_zero) { // upper bound active result = lambda_eq - nabla(i); }; return result; }; protected boolean feasible(int i) { boolean is_feasible = true; double alpha = alphas[i]; double the_lambda = lambda(i); if (alpha - cNeg[i] >= -is_zero) { // alpha* at upper bound if (the_lambda >= 0) { at_bound[i]++; if (at_bound[i] == shrink_const) to_shrink++; } else { at_bound[i] = 0; }; } else if ((alpha <= is_zero) && (alpha >= -is_zero)) { // lower bound active if (the_lambda >= 0) { at_bound[i]++; if (at_bound[i] == shrink_const) to_shrink++; } else { at_bound[i] = 0; }; } else if (alpha + cPos[i] <= is_zero) { // alpha at upper bound if (the_lambda >= 0) { at_bound[i]++; if (at_bound[i] == shrink_const) to_shrink++; } else { at_bound[i] = 0; }; } else { // not at bound at_bound[i] = 0; }; if ((the_lambda >= feasible_epsilon) || (at_bound[i] >= shrink_const)) { is_feasible = false; }; return is_feasible; } protected abstract boolean is_alpha_neg(int i); /** * log the output plus newline * * @param level * warning level * @param message * Message test */ protected void logln(int level, String message) { if (paramOperator != null) paramOperator.getLog().log(message, RAPID_MINER_VERBOSITY[level - 1]); else LogService.getGlobal().log(message, RAPID_MINER_VERBOSITY[level - 1]); } protected void logWarning(String message) { paramOperator.getLog().log(message, LogService.WARNING); } /** * predict values on the testset with model */ public void predict(SVMExamples to_predict) { int i; double prediction; SVMExample sVMExample; // int size = the_examples.count_examples(); // IM 04/02/12 int size = to_predict.count_examples(); for (i = 0; i < size; i++) { sVMExample = to_predict.get_example(i); prediction = predict(sVMExample); to_predict.set_y(i, prediction); }; logln(4, "Prediction generated"); }; public double predict(int i) { return predict(the_examples.get_example(i)); } /** * predict a single example */ public double predict(SVMExample sVMExample) { int i; int[] sv_index; double[] sv_att; double the_sum = the_examples.get_b(); double alpha; for (i = 0; i < examples_total; i++) { alpha = alphas[i]; if (alpha != 0) { sv_index = the_examples.index[i]; sv_att = the_examples.atts[i]; the_sum += alpha * the_kernel.calculate_K(sv_index, sv_att, sVMExample.index, sVMExample.att); }; }; return the_sum; }; /** * check internal variables, for debugging only */ protected void check() { double tsum; int i, j; double s = 0; for (i = 0; i < examples_total; i++) { s += alphas[i]; tsum = 0; for (j = 0; j < the_examples.count_examples(); j++) { tsum += alphas[j] * the_kernel.calculate_K(i, j); }; if (Math.abs(tsum - sum[i]) > is_zero) { logln(1, "ERROR: sum[" + i + "] off by " + (tsum - sum[i])); // throw(new Exception("ERROR: sum["+i+"] off by // "+(tsum-sum[i]))); // System.exit(1); }; }; if (Math.abs(s + sum_alpha) > is_zero) { logln(1, "ERROR: sum_alpha is off by " + (s + sum_alpha)); // throw(new Exception("ERROR: sum_alpha is off by // "+(s+sum_alpha))); // System.exit(1); }; }; /** * Returns a double array of estimated performance values. These are * accuracy, precision and recall. Works only for classification SVMs. */ public double[] getXiAlphaEstimation(Kernel kernel) { double r_delta = 0.0d; for (int j = 0; j < examples_total; j++) { double norm_x = kernel.calculate_K(j, j); for (int i = 0; i < examples_total; i++) { double r_current = norm_x - kernel.calculate_K(i, j); if (r_current > r_delta) { r_delta = r_current; } } } int total_pos = 0; int total_neg = 0; int estim_pos = 0; int estim_neg = 0; double xi = 0.0d; for (int i = 0; i < examples_total; i++) { double alpha = the_examples.get_alpha(i); double prediction = predict(i); double y = the_examples.get_y(i); if (y > 0) { if (prediction > 1) { xi = 0; } else { xi = 1 - prediction; }; if (2 * alpha * r_delta + xi >= 1) { estim_pos++; }; total_pos++; } else { if (prediction < -1) { xi = 0; } else { xi = 1 + prediction; }; if (2 * (-alpha) * r_delta + xi >= 1) { estim_neg++; }; total_neg++; }; }; double[] result = new double[3]; result[0] = 1.0d - (double) (estim_pos + estim_neg) / (double) (total_pos + total_neg); result[1] = (double) (total_pos - estim_pos) / (double) (total_pos - estim_pos + estim_neg); result[2] = 1.0d - (double) estim_pos / (double) total_pos; return result; } };