SVMpattern.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.kernel.jmysvm.svm;

import com.rapidminer.operator.Operator;
import com.rapidminer.operator.learner.functions.kernel.JMySVMLearner;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.examples.SVMExamples;
import com.rapidminer.operator.learner.functions.kernel.jmysvm.kernel.Kernel;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.RandomGenerator;


/**
 * Class for pattern recognition SVM
 * 
 * @author Stefan Rueping, Ingo Mierswa
 * @version $Id: SVMpattern.java,v 1.4 2008/05/09 19:23:26 ingomierswa Exp $
 */
public class SVMpattern extends SVM {

	private boolean calculateXiAlpha = false;

	public SVMpattern() {}

	public SVMpattern(Operator paramOperator, Kernel kernel, SVMExamples sVMExamples, com.rapidminer.example.ExampleSet rapidMinerExamples, RandomGenerator randomGenerator) throws UndefinedParameterError {
		super(paramOperator, kernel, sVMExamples, rapidMinerExamples, randomGenerator);
		this.calculateXiAlpha = paramOperator.getParameterAsBoolean(JMySVMLearner.PARAMETER_ESTIMATE_PERFORMANCE);
	};

	/**
	 * Calls the optimizer
	 */
	protected void optimize() {
		// optimizer-specific call

		// qp.n = working_set_size;

		int i;
		int j;

		// equality constraint
		qp.b[0] = 0;
		for (i = 0; i < working_set_size; i++) {
			qp.b[0] += alphas[working_set[i]];
		};

		double[] my_primal = primal;
		// set initial optimization parameters
		double new_target = 0;
		double old_target = 0;
		double target_tmp;
		for (i = 0; i < working_set_size; i++) {
			target_tmp = my_primal[i] * qp.H[i * working_set_size + i] / 2;
			for (j = 0; j < i; j++) {
				target_tmp += my_primal[j] * qp.H[j * working_set_size + i];
			};
			target_tmp += qp.c[i];
			old_target += target_tmp * my_primal[i];
		};

		double new_constraint_sum = 0;
		double my_is_zero = is_zero;
		int sv_count = working_set_size;

		// optimize
		boolean KKTerror = true; // KKT not yet satisfied
		boolean convError = false; // KKT can still be satisfied

		qp.max_allowed_error = convergence_epsilon;

		qp.x = my_primal;
		qp.lambda_eq = lambda_eq;
		qp.solve();
		my_primal = qp.x;
		lambda_WS = qp.lambda_eq;

		// loop while some KKT condition is not valid (alpha=0)
		while (KKTerror) {
			// clip
			sv_count = working_set_size;
			new_constraint_sum = qp.b[0];
			for (i = 0; i < working_set_size; i++) {
				// check if at bound
				if (my_primal[i] <= my_is_zero) {
					// at lower bound
					my_primal[i] = qp.l[i];
					sv_count--;
				} else if (qp.u[i] - my_primal[i] <= my_is_zero) {
					// at upper bound
					my_primal[i] = qp.u[i];
					sv_count--;
				};
				new_constraint_sum -= qp.A[i] * my_primal[i];
			};

			// enforce equality constraint
			if (sv_count > 0) {
				new_constraint_sum /= sv_count;
				logln(5, "adjusting " + sv_count + " alphas by " + new_constraint_sum);
				for (i = 0; i < working_set_size; i++) {
					if ((my_primal[i] > qp.l[i]) && (my_primal[i] < qp.u[i])) {
						// real sv
						my_primal[i] += qp.A[i] * new_constraint_sum;
					};
				};
			} else if (Math.abs(new_constraint_sum) > working_set_size * is_zero) {
				// error, can't get feasible point
				logln(5, "WARNING: No SVs, constraint_sum = " + new_constraint_sum);
				old_target = -Double.MIN_VALUE;
				convError = true;
			};
			// test descend
			new_target = 0;
			for (i = 0; i < working_set_size; i++) {
				// attention: optimizer changes one triangle of H!
				target_tmp = my_primal[i] * qp.H[i * working_set_size + i] / 2.0;
				for (j = 0; j < i; j++) {
					target_tmp += my_primal[j] * qp.H[j * working_set_size + i];
				};
				target_tmp += qp.c[i];
				new_target += target_tmp * my_primal[i];
			};

			if (new_target < old_target) {
				KKTerror = false;
				if (descend < old_target - new_target) {
					target_count = 0;
				} else {
					convError = true;
				};
				logln(5, "descend = " + (old_target - new_target));
			} else if (sv_count > 0) {
				// less SVs
				// set my_is_zero to min_i(primal[i]-qp.l[i], qp.u[i]-primal[i])
				my_is_zero = Double.MAX_VALUE;
				for (i = 0; i < working_set_size; i++) {
					if ((my_primal[i] > qp.l[i]) && (my_primal[i] < qp.u[i])) {
						if (my_primal[i] - qp.l[i] < my_is_zero) {
							my_is_zero = my_primal[i] - qp.l[i];
						};
						if (qp.u[i] - my_primal[i] < my_is_zero) {
							my_is_zero = qp.u[i] - my_primal[i];
						};
					};
				};
				if (target_count == 0) {
					my_is_zero *= 2;
				};
				logln(5, "WARNING: no descend (" + (old_target - new_target) + " <= " + descend + "), adjusting is_zero to " + my_is_zero);
				logln(5, "new_target = " + new_target);
			} else {
				// nothing we can do
				logln(5, "WARNING: no descend (" + (old_target - new_target) + " <= " + descend + "), stopping.");
				KKTerror = false;
				convError = true;
			};
		};

		if (convError) {
			target_count++;
			if (old_target < new_target) {
				for (i = 0; i < working_set_size; i++) {
					my_primal[i] = qp.A[i] * alphas[working_set[i]];
				};
				logln(5, "WARNING: Convergence error, restoring old primals");
			};
		};

		if (target_count > 50) {
			// non-recoverable numerical error
			convergence_epsilon *= 2;
			feasible_epsilon = convergence_epsilon;
			logln(1, "WARNING: reducing KKT precision to " + convergence_epsilon);
			target_count = 0;
		};
	};

	protected final boolean is_alpha_neg(int i) {
		boolean result;
		if (ys[i] > 0) {
			result = true;
		} else {
			result = false;
		};
		return result;
	};

	protected final double nabla(int i) {
		double result;
		if (is_alpha_neg(i)) {
			result = (sum[i] - 1);
		} else {
			result = (-sum[i] - 1);
		};
		return result;
	};

	protected void print_statistics() {
		int dim = the_examples.get_dim();
		int i, j;
		double alpha;
		double[] x;
		int svs = 0;
		int bsv = 0;
		int correct_pos = 0;
		int correct_neg = 0;
		int total_pos = 0;
		int total_neg = 0;
		double y;
		double prediction;
		double min_lambda = Double.MAX_VALUE;
		double b = the_examples.get_b();
		double xi;
		double r_delta = 0;
		boolean do_xi_alpha = false;
		double norm_w = 0;
		double max_norm_x = 0;
		double min_norm_x = 1e20;
		double norm_x = 0;
		int estim_pos = 0;
		int estim_neg = 0;

		// xi-alpha estimators
		if (calculateXiAlpha) {
			do_xi_alpha = true;
			for (i = 0; i < examples_total; i++) {
				// needed before test-loop for performance estimators
				norm_w += alphas[i] * sum[i];

				alpha = alphas[i];
				if (alpha != 0) {
					norm_x = the_kernel.calculate_K(i, i);
					if (norm_x > max_norm_x) {
						max_norm_x = norm_x;
					};
					if (norm_x < min_norm_x) {
						min_norm_x = norm_x;
					};
				};
			};

			r_delta = 0;
			double r_current;
			for (j = 0; j < examples_total; j++) {
				norm_x = the_kernel.calculate_K(j, j);
				for (i = 0; i < examples_total; i++) {
					r_current = norm_x - the_kernel.calculate_K(i, j);
					if (r_current > r_delta) {
						r_delta = r_current;
					};
				};
			};
		};

		for (i = 0; i < examples_total; i++) {
			if (lambda(i) < min_lambda) {
				min_lambda = lambda(i);
			};
			y = ys[i];
			prediction = sum[i] + b;
			alpha = alphas[i];
			if (y > 0) {
				if (prediction > 0) {
					correct_pos++;
				};
				if (do_xi_alpha) {
					if (prediction > 1) {
						xi = 0;
					} else {
						xi = 1 - prediction;
					};
					if (2 * alpha * r_delta + xi >= 1) {
						estim_pos++;
					};
				};
				total_pos++;
			} else {
				if (prediction <= 0) {
					correct_neg++;
				};
				if (do_xi_alpha) {
					if (prediction < -1) {
						xi = 0;
					} else {
						xi = 1 + prediction;
					};
					if (2 * (-alpha) * r_delta + xi >= 1) {
						estim_neg++;
					};
				};
				total_neg++;
			};
			if (alpha != 0) {
				svs++;
				if ((alpha == cPos[i]) || (alpha == -cNeg[i])) {
					bsv++;
				};
			};
		};
		min_lambda = -min_lambda;

		logln(1, "Error on KKT is " + min_lambda);
		logln(1, svs + " SVs");
		logln(1, bsv + " BSVs");
		logln(1, "Accuracy : " + ((double) (correct_pos + correct_neg) / (double) (total_pos + total_neg)));
		logln(1, "Precision: " + ((double) correct_pos / (double) (correct_pos + total_neg - correct_neg)));
		logln(1, "Recall   : " + ((double) correct_pos / (double) total_pos));
		logln(1, "Pred:\t+\t-");
		logln(1, "\t" + correct_pos + "\t" + (total_pos - correct_pos) + "\t(true pos)");
		logln(1, "\t" + (total_neg - correct_neg) + "\t" + correct_neg + "\t(true neg)");
		if (do_xi_alpha) {
			logln(1, "Xi-Alpha Accuracy" + (1 - ((double) (estim_pos + estim_neg)) / ((double) (total_pos + total_neg))));
			logln(1, "Xi-Alpha Precision" + (((double) (total_pos - estim_pos)) / ((double) (total_pos - estim_pos + estim_neg))));
			logln(1, "Xi-Alpha Recall" + (1 - (double) estim_pos / ((double) total_pos)));
		};

		// if(verbosity >= 2){
		// print hyperplane
		double[] w = new double[dim];
		for (j = 0; j < dim; j++)
			w[j] = 0;
		for (i = 0; i < examples_total; i++) {
			x = the_examples.get_example(i).toDense(dim);
			alpha = alphas[i];
			for (j = 0; j < dim; j++) {
				w[j] += alpha * x[j];
			};
		};
		// double[] Exp = the_examples.Exp;
		// double[] Dev = the_examples.Dev;
		// if(Exp != null){
		// for(j=0;j<dim;j++){
		// if(Dev[j] != 0){
		// w[j] /= Dev[j];
		// };
		// if(0 != Dev[dim]){
		// w[j] *= Dev[dim];
		// };
		// b -= w[j]*Exp[j];
		// };
		// b += Exp[dim];
		// };
		// logln(2," ");
		for (j = 0; j < dim; j++) {
			logln(2, "w[" + j + "] = " + w[j]);
		};
		logln(2, "b = " + b);
		if (dim == 1) {
			logln(2, "y = " + w[0] + "*x+" + b);
		};
	};
	// };
};